frame-clock: Simplify dynamic max render time calculation

Store only two values per kind of duration: The short term and long term maximum. The short term maximum is updated in each frame clock dispatch. The long term maximum is updated at most once per second: If the short term maximum is higher, the long term maximum is updated to match it. Otherwise, a fraction of the delta between the two maxima is subtracted from the long term maximum. Compared to the previous algorithm: * The calculcations are simpler. * The calculated max render time has a slow exponential drop-off (by at most a few milliseconds every second) instead of potentially abruptly dropping after as few as 16 frames. This should fix https://gitlab.gnome.org/GNOME/gnome-shell/-/issues/4830 since the short term maximum should always include a sample from the clock's second tick. v2: * Use divisor 2 instead of 4. Part-of: <https://gitlab.gnome.org/GNOME/mutter/-/merge_requests/2500>
2025-08-02 14:44:40 +00:00 · 2022-07-04 18:57:45 +02:00
parent 99850f4645
commit ca341ef1ea
1 changed files with 88 additions and 76 deletions
--- a/clutter/clutter/clutter-frame-clock.c
+++ b/clutter/clutter/clutter-frame-clock.c
@@ -34,17 +34,6 @@ enum

 static guint signals[N_SIGNALS];

-/* An estimate queue holds several int64_t values. Adding a new value to the
- * queue overwrites the oldest value.
- */
-#define ESTIMATE_QUEUE_LENGTH 16
-
-typedef struct _EstimateQueue
-{
-  int64_t values[ESTIMATE_QUEUE_LENGTH];
-  int next_index;
-} EstimateQueue;
-
 #define SYNC_DELAY_FALLBACK_FRACTION 0.875

 typedef struct _ClutterFrameListener
@@ -97,14 +86,21 @@ struct _ClutterFrameClock
  /* Last KMS buffer submission time. */
  int64_t last_flip_time_us;

-  /* Last few durations between desired and effective dispatch start. */
-  EstimateQueue dispatch_lateness_us;
-  /* Last few durations between dispatch start and buffer swap. */
-  EstimateQueue dispatch_to_swap_us;
-  /* Last few durations between buffer swap and GPU rendering finish. */
-  EstimateQueue swap_to_rendering_done_us;
-  /* Last few durations between buffer swap and KMS submission. */
-  EstimateQueue swap_to_flip_us;
+  /* Last time we promoted short term durations to long term ones */
+  int64_t longterm_promotion_us;
+
+  /* Short & long term maximum values */
+  struct {
+    /* Duration between desired and effective dispatch start. */
+    int64_t max_dispatch_lateness_us;
+    /* Duration between dispatch start and buffer swap. */
+    int64_t max_dispatch_to_swap_us;
+    /* Duration between buffer swap and GPU rendering finish. */
+    int64_t max_swap_to_rendering_done_us;
+    /* Duration between buffer swap and KMS submission. */
+    int64_t max_swap_to_flip_us;
+  } shortterm, longterm;
+
  /* If we got new measurements last frame. */
  gboolean got_measurements_last_frame;

@@ -119,14 +115,6 @@ struct _ClutterFrameClock
 G_DEFINE_TYPE (ClutterFrameClock, clutter_frame_clock,
               G_TYPE_OBJECT)

-static void
-estimate_queue_add_value (EstimateQueue *queue,
-                          int64_t        value)
-{
-  queue->values[queue->next_index] = value;
-  queue->next_index = (queue->next_index + 1) % ESTIMATE_QUEUE_LENGTH;
-}
-
 float
 clutter_frame_clock_get_refresh_rate (ClutterFrameClock *frame_clock)
 {
@@ -227,6 +215,23 @@ maybe_reschedule_update (ClutterFrameClock *frame_clock)
    }
 }

+static void
+update_longterm_max (int64_t *longterm_max,
+                     int64_t *shortterm_max)
+{
+  /* Do not update long term max if there has been no measurement in over a second. */
+  if (!*shortterm_max)
+    return;
+
+  if (*longterm_max > *shortterm_max)
+    /* Exponential drop-off toward the short term max */
+    *longterm_max -= (*longterm_max - *shortterm_max) / 2;
+  else
+    *longterm_max = *shortterm_max;
+
+  *shortterm_max = 0;
+}
+
 void
 clutter_frame_clock_notify_presented (ClutterFrameClock *frame_clock,
                                      ClutterFrameInfo  *frame_info)
@@ -276,9 +281,6 @@ clutter_frame_clock_notify_presented (ClutterFrameClock *frame_clock,
  if (frame_info->presentation_time > 0)
    frame_clock->last_presentation_time_us = frame_info->presentation_time;

-  estimate_queue_add_value (&frame_clock->dispatch_lateness_us,
-                            frame_clock->last_dispatch_lateness_us);
-
  frame_clock->got_measurements_last_frame = FALSE;

  if (frame_info->cpu_time_before_buffer_swap_us != 0 &&
@@ -302,12 +304,12 @@ clutter_frame_clock_notify_presented (ClutterFrameClock *frame_clock,
                    swap_to_rendering_done_us,
                    swap_to_flip_us);

-      estimate_queue_add_value (&frame_clock->dispatch_to_swap_us,
-                                dispatch_to_swap_us);
-      estimate_queue_add_value (&frame_clock->swap_to_rendering_done_us,
-                                swap_to_rendering_done_us);
-      estimate_queue_add_value (&frame_clock->swap_to_flip_us,
-                                swap_to_flip_us);
+      frame_clock->shortterm.max_dispatch_to_swap_us =
+        MAX (frame_clock->shortterm.max_dispatch_to_swap_us, dispatch_to_swap_us);
+      frame_clock->shortterm.max_swap_to_rendering_done_us =
+        MAX (frame_clock->shortterm.max_swap_to_rendering_done_us, swap_to_rendering_done_us);
+      frame_clock->shortterm.max_swap_to_flip_us =
+        MAX (frame_clock->shortterm.max_swap_to_flip_us, swap_to_flip_us);

      frame_clock->got_measurements_last_frame = TRUE;
    }
@@ -317,6 +319,20 @@ clutter_frame_clock_notify_presented (ClutterFrameClock *frame_clock,
                    frame_clock->last_dispatch_lateness_us);
    }

+  if (frame_info->presentation_time - frame_clock->longterm_promotion_us > G_USEC_PER_SEC)
+    {
+      update_longterm_max (&frame_clock->longterm.max_dispatch_lateness_us,
+                           &frame_clock->shortterm.max_dispatch_lateness_us);
+      update_longterm_max (&frame_clock->longterm.max_dispatch_to_swap_us,
+                           &frame_clock->shortterm.max_dispatch_to_swap_us);
+      update_longterm_max (&frame_clock->longterm.max_swap_to_rendering_done_us,
+                           &frame_clock->shortterm.max_swap_to_rendering_done_us);
+      update_longterm_max (&frame_clock->longterm.max_swap_to_flip_us,
+                           &frame_clock->shortterm.max_swap_to_flip_us);
+
+      frame_clock->longterm_promotion_us = frame_info->presentation_time;
+    }
+
  if (frame_info->refresh_rate > 1.0)
    {
      clutter_frame_clock_set_refresh_rate (frame_clock,
@@ -362,12 +378,11 @@ static int64_t
 clutter_frame_clock_compute_max_render_time_us (ClutterFrameClock *frame_clock)
 {
  int64_t refresh_interval_us;
-  int64_t max_dispatch_lateness_us = 0;
-  int64_t max_dispatch_to_swap_us = 0;
-  int64_t max_swap_to_rendering_done_us = 0;
-  int64_t max_swap_to_flip_us = 0;
+  int64_t max_dispatch_lateness_us;
+  int64_t max_dispatch_to_swap_us;
+  int64_t max_swap_to_rendering_done_us;
+  int64_t max_swap_to_flip_us;
  int64_t max_render_time_us;
-  int i;

  refresh_interval_us = frame_clock->refresh_interval_us;

@@ -376,21 +391,18 @@ clutter_frame_clock_compute_max_render_time_us (ClutterFrameClock *frame_clock)
                  CLUTTER_DEBUG_DISABLE_DYNAMIC_MAX_RENDER_TIME))
    return refresh_interval_us * SYNC_DELAY_FALLBACK_FRACTION;

-  for (i = 0; i < ESTIMATE_QUEUE_LENGTH; ++i)
-    {
  max_dispatch_lateness_us =
-        MAX (max_dispatch_lateness_us,
-             frame_clock->dispatch_lateness_us.values[i]);
+    MAX (frame_clock->longterm.max_dispatch_lateness_us,
+         frame_clock->shortterm.max_dispatch_lateness_us);
  max_dispatch_to_swap_us =
-        MAX (max_dispatch_to_swap_us,
-             frame_clock->dispatch_to_swap_us.values[i]);
+    MAX (frame_clock->longterm.max_dispatch_to_swap_us,
+         frame_clock->shortterm.max_dispatch_to_swap_us);
  max_swap_to_rendering_done_us =
-        MAX (max_swap_to_rendering_done_us,
-             frame_clock->swap_to_rendering_done_us.values[i]);
+    MAX (frame_clock->longterm.max_swap_to_rendering_done_us,
+         frame_clock->shortterm.max_swap_to_rendering_done_us);
  max_swap_to_flip_us =
-        MAX (max_swap_to_flip_us,
-             frame_clock->swap_to_flip_us.values[i]);
-    }
+    MAX (frame_clock->longterm.max_swap_to_flip_us,
+         frame_clock->shortterm.max_swap_to_flip_us);

  /* Max render time shows how early the frame clock needs to be dispatched
   * to make it to the predicted next presentation time. It is composed of:
@@ -706,6 +718,10 @@ clutter_frame_clock_dispatch (ClutterFrameClock *frame_clock,
  else
    frame_clock->last_dispatch_lateness_us = lateness_us;

+  frame_clock->shortterm.max_dispatch_lateness_us =
+    MAX (frame_clock->shortterm.max_dispatch_lateness_us,
+         frame_clock->last_dispatch_lateness_us);
+
  frame_clock->last_dispatch_time_us = time_us;
  g_source_set_ready_time (frame_clock->source, -1);

@@ -794,11 +810,10 @@ clutter_frame_clock_record_flip_time (ClutterFrameClock *frame_clock,
 GString *
 clutter_frame_clock_get_max_render_time_debug_info (ClutterFrameClock *frame_clock)
 {
-  int64_t max_dispatch_lateness_us = 0;
-  int64_t max_dispatch_to_swap_us = 0;
-  int64_t max_swap_to_rendering_done_us = 0;
-  int64_t max_swap_to_flip_us = 0;
-  int i;
+  int64_t max_dispatch_lateness_us;
+  int64_t max_dispatch_to_swap_us;
+  int64_t max_swap_to_rendering_done_us;
+  int64_t max_swap_to_flip_us;
  GString *string;

  string = g_string_new (NULL);
@@ -810,21 +825,18 @@ clutter_frame_clock_get_max_render_time_debug_info (ClutterFrameClock *frame_clo
  else
    g_string_append_printf (string, " (no measurements last frame)");

-  for (i = 0; i < ESTIMATE_QUEUE_LENGTH; ++i)
-    {
  max_dispatch_lateness_us =
-        MAX (max_dispatch_lateness_us,
-             frame_clock->dispatch_lateness_us.values[i]);
+    MAX (frame_clock->longterm.max_dispatch_lateness_us,
+         frame_clock->shortterm.max_dispatch_lateness_us);
  max_dispatch_to_swap_us =
-        MAX (max_dispatch_to_swap_us,
-             frame_clock->dispatch_to_swap_us.values[i]);
+    MAX (frame_clock->longterm.max_dispatch_to_swap_us,
+         frame_clock->shortterm.max_dispatch_to_swap_us);
  max_swap_to_rendering_done_us =
-        MAX (max_swap_to_rendering_done_us,
-             frame_clock->swap_to_rendering_done_us.values[i]);
+    MAX (frame_clock->longterm.max_swap_to_rendering_done_us,
+         frame_clock->shortterm.max_swap_to_rendering_done_us);
  max_swap_to_flip_us =
-        MAX (max_swap_to_flip_us,
-             frame_clock->swap_to_flip_us.values[i]);
-    }
+    MAX (frame_clock->longterm.max_swap_to_flip_us,
+         frame_clock->shortterm.max_swap_to_flip_us);

  g_string_append_printf (string, "\nVblank duration: %ld µs +",
                          frame_clock->vblank_duration_us);