From ca341ef1ea0c5e8ffb1ec3bc7d6762a453f4c9ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michel=20D=C3=A4nzer?= Date: Mon, 4 Jul 2022 18:57:45 +0200 Subject: [PATCH] frame-clock: Simplify dynamic max render time calculation Store only two values per kind of duration: The short term and long term maximum. The short term maximum is updated in each frame clock dispatch. The long term maximum is updated at most once per second: If the short term maximum is higher, the long term maximum is updated to match it. Otherwise, a fraction of the delta between the two maxima is subtracted from the long term maximum. Compared to the previous algorithm: * The calculcations are simpler. * The calculated max render time has a slow exponential drop-off (by at most a few milliseconds every second) instead of potentially abruptly dropping after as few as 16 frames. This should fix https://gitlab.gnome.org/GNOME/gnome-shell/-/issues/4830 since the short term maximum should always include a sample from the clock's second tick. v2: * Use divisor 2 instead of 4. Part-of: --- clutter/clutter/clutter-frame-clock.c | 164 ++++++++++++++------------ 1 file changed, 88 insertions(+), 76 deletions(-) diff --git a/clutter/clutter/clutter-frame-clock.c b/clutter/clutter/clutter-frame-clock.c index 4361df704..546a1dd32 100644 --- a/clutter/clutter/clutter-frame-clock.c +++ b/clutter/clutter/clutter-frame-clock.c @@ -34,17 +34,6 @@ enum static guint signals[N_SIGNALS]; -/* An estimate queue holds several int64_t values. Adding a new value to the - * queue overwrites the oldest value. - */ -#define ESTIMATE_QUEUE_LENGTH 16 - -typedef struct _EstimateQueue -{ - int64_t values[ESTIMATE_QUEUE_LENGTH]; - int next_index; -} EstimateQueue; - #define SYNC_DELAY_FALLBACK_FRACTION 0.875 typedef struct _ClutterFrameListener @@ -97,14 +86,21 @@ struct _ClutterFrameClock /* Last KMS buffer submission time. */ int64_t last_flip_time_us; - /* Last few durations between desired and effective dispatch start. */ - EstimateQueue dispatch_lateness_us; - /* Last few durations between dispatch start and buffer swap. */ - EstimateQueue dispatch_to_swap_us; - /* Last few durations between buffer swap and GPU rendering finish. */ - EstimateQueue swap_to_rendering_done_us; - /* Last few durations between buffer swap and KMS submission. */ - EstimateQueue swap_to_flip_us; + /* Last time we promoted short term durations to long term ones */ + int64_t longterm_promotion_us; + + /* Short & long term maximum values */ + struct { + /* Duration between desired and effective dispatch start. */ + int64_t max_dispatch_lateness_us; + /* Duration between dispatch start and buffer swap. */ + int64_t max_dispatch_to_swap_us; + /* Duration between buffer swap and GPU rendering finish. */ + int64_t max_swap_to_rendering_done_us; + /* Duration between buffer swap and KMS submission. */ + int64_t max_swap_to_flip_us; + } shortterm, longterm; + /* If we got new measurements last frame. */ gboolean got_measurements_last_frame; @@ -119,14 +115,6 @@ struct _ClutterFrameClock G_DEFINE_TYPE (ClutterFrameClock, clutter_frame_clock, G_TYPE_OBJECT) -static void -estimate_queue_add_value (EstimateQueue *queue, - int64_t value) -{ - queue->values[queue->next_index] = value; - queue->next_index = (queue->next_index + 1) % ESTIMATE_QUEUE_LENGTH; -} - float clutter_frame_clock_get_refresh_rate (ClutterFrameClock *frame_clock) { @@ -227,6 +215,23 @@ maybe_reschedule_update (ClutterFrameClock *frame_clock) } } +static void +update_longterm_max (int64_t *longterm_max, + int64_t *shortterm_max) +{ + /* Do not update long term max if there has been no measurement in over a second. */ + if (!*shortterm_max) + return; + + if (*longterm_max > *shortterm_max) + /* Exponential drop-off toward the short term max */ + *longterm_max -= (*longterm_max - *shortterm_max) / 2; + else + *longterm_max = *shortterm_max; + + *shortterm_max = 0; +} + void clutter_frame_clock_notify_presented (ClutterFrameClock *frame_clock, ClutterFrameInfo *frame_info) @@ -276,9 +281,6 @@ clutter_frame_clock_notify_presented (ClutterFrameClock *frame_clock, if (frame_info->presentation_time > 0) frame_clock->last_presentation_time_us = frame_info->presentation_time; - estimate_queue_add_value (&frame_clock->dispatch_lateness_us, - frame_clock->last_dispatch_lateness_us); - frame_clock->got_measurements_last_frame = FALSE; if (frame_info->cpu_time_before_buffer_swap_us != 0 && @@ -302,12 +304,12 @@ clutter_frame_clock_notify_presented (ClutterFrameClock *frame_clock, swap_to_rendering_done_us, swap_to_flip_us); - estimate_queue_add_value (&frame_clock->dispatch_to_swap_us, - dispatch_to_swap_us); - estimate_queue_add_value (&frame_clock->swap_to_rendering_done_us, - swap_to_rendering_done_us); - estimate_queue_add_value (&frame_clock->swap_to_flip_us, - swap_to_flip_us); + frame_clock->shortterm.max_dispatch_to_swap_us = + MAX (frame_clock->shortterm.max_dispatch_to_swap_us, dispatch_to_swap_us); + frame_clock->shortterm.max_swap_to_rendering_done_us = + MAX (frame_clock->shortterm.max_swap_to_rendering_done_us, swap_to_rendering_done_us); + frame_clock->shortterm.max_swap_to_flip_us = + MAX (frame_clock->shortterm.max_swap_to_flip_us, swap_to_flip_us); frame_clock->got_measurements_last_frame = TRUE; } @@ -317,6 +319,20 @@ clutter_frame_clock_notify_presented (ClutterFrameClock *frame_clock, frame_clock->last_dispatch_lateness_us); } + if (frame_info->presentation_time - frame_clock->longterm_promotion_us > G_USEC_PER_SEC) + { + update_longterm_max (&frame_clock->longterm.max_dispatch_lateness_us, + &frame_clock->shortterm.max_dispatch_lateness_us); + update_longterm_max (&frame_clock->longterm.max_dispatch_to_swap_us, + &frame_clock->shortterm.max_dispatch_to_swap_us); + update_longterm_max (&frame_clock->longterm.max_swap_to_rendering_done_us, + &frame_clock->shortterm.max_swap_to_rendering_done_us); + update_longterm_max (&frame_clock->longterm.max_swap_to_flip_us, + &frame_clock->shortterm.max_swap_to_flip_us); + + frame_clock->longterm_promotion_us = frame_info->presentation_time; + } + if (frame_info->refresh_rate > 1.0) { clutter_frame_clock_set_refresh_rate (frame_clock, @@ -362,12 +378,11 @@ static int64_t clutter_frame_clock_compute_max_render_time_us (ClutterFrameClock *frame_clock) { int64_t refresh_interval_us; - int64_t max_dispatch_lateness_us = 0; - int64_t max_dispatch_to_swap_us = 0; - int64_t max_swap_to_rendering_done_us = 0; - int64_t max_swap_to_flip_us = 0; + int64_t max_dispatch_lateness_us; + int64_t max_dispatch_to_swap_us; + int64_t max_swap_to_rendering_done_us; + int64_t max_swap_to_flip_us; int64_t max_render_time_us; - int i; refresh_interval_us = frame_clock->refresh_interval_us; @@ -376,21 +391,18 @@ clutter_frame_clock_compute_max_render_time_us (ClutterFrameClock *frame_clock) CLUTTER_DEBUG_DISABLE_DYNAMIC_MAX_RENDER_TIME)) return refresh_interval_us * SYNC_DELAY_FALLBACK_FRACTION; - for (i = 0; i < ESTIMATE_QUEUE_LENGTH; ++i) - { - max_dispatch_lateness_us = - MAX (max_dispatch_lateness_us, - frame_clock->dispatch_lateness_us.values[i]); - max_dispatch_to_swap_us = - MAX (max_dispatch_to_swap_us, - frame_clock->dispatch_to_swap_us.values[i]); - max_swap_to_rendering_done_us = - MAX (max_swap_to_rendering_done_us, - frame_clock->swap_to_rendering_done_us.values[i]); - max_swap_to_flip_us = - MAX (max_swap_to_flip_us, - frame_clock->swap_to_flip_us.values[i]); - } + max_dispatch_lateness_us = + MAX (frame_clock->longterm.max_dispatch_lateness_us, + frame_clock->shortterm.max_dispatch_lateness_us); + max_dispatch_to_swap_us = + MAX (frame_clock->longterm.max_dispatch_to_swap_us, + frame_clock->shortterm.max_dispatch_to_swap_us); + max_swap_to_rendering_done_us = + MAX (frame_clock->longterm.max_swap_to_rendering_done_us, + frame_clock->shortterm.max_swap_to_rendering_done_us); + max_swap_to_flip_us = + MAX (frame_clock->longterm.max_swap_to_flip_us, + frame_clock->shortterm.max_swap_to_flip_us); /* Max render time shows how early the frame clock needs to be dispatched * to make it to the predicted next presentation time. It is composed of: @@ -706,6 +718,10 @@ clutter_frame_clock_dispatch (ClutterFrameClock *frame_clock, else frame_clock->last_dispatch_lateness_us = lateness_us; + frame_clock->shortterm.max_dispatch_lateness_us = + MAX (frame_clock->shortterm.max_dispatch_lateness_us, + frame_clock->last_dispatch_lateness_us); + frame_clock->last_dispatch_time_us = time_us; g_source_set_ready_time (frame_clock->source, -1); @@ -794,11 +810,10 @@ clutter_frame_clock_record_flip_time (ClutterFrameClock *frame_clock, GString * clutter_frame_clock_get_max_render_time_debug_info (ClutterFrameClock *frame_clock) { - int64_t max_dispatch_lateness_us = 0; - int64_t max_dispatch_to_swap_us = 0; - int64_t max_swap_to_rendering_done_us = 0; - int64_t max_swap_to_flip_us = 0; - int i; + int64_t max_dispatch_lateness_us; + int64_t max_dispatch_to_swap_us; + int64_t max_swap_to_rendering_done_us; + int64_t max_swap_to_flip_us; GString *string; string = g_string_new (NULL); @@ -810,21 +825,18 @@ clutter_frame_clock_get_max_render_time_debug_info (ClutterFrameClock *frame_clo else g_string_append_printf (string, " (no measurements last frame)"); - for (i = 0; i < ESTIMATE_QUEUE_LENGTH; ++i) - { - max_dispatch_lateness_us = - MAX (max_dispatch_lateness_us, - frame_clock->dispatch_lateness_us.values[i]); - max_dispatch_to_swap_us = - MAX (max_dispatch_to_swap_us, - frame_clock->dispatch_to_swap_us.values[i]); - max_swap_to_rendering_done_us = - MAX (max_swap_to_rendering_done_us, - frame_clock->swap_to_rendering_done_us.values[i]); - max_swap_to_flip_us = - MAX (max_swap_to_flip_us, - frame_clock->swap_to_flip_us.values[i]); - } + max_dispatch_lateness_us = + MAX (frame_clock->longterm.max_dispatch_lateness_us, + frame_clock->shortterm.max_dispatch_lateness_us); + max_dispatch_to_swap_us = + MAX (frame_clock->longterm.max_dispatch_to_swap_us, + frame_clock->shortterm.max_dispatch_to_swap_us); + max_swap_to_rendering_done_us = + MAX (frame_clock->longterm.max_swap_to_rendering_done_us, + frame_clock->shortterm.max_swap_to_rendering_done_us); + max_swap_to_flip_us = + MAX (frame_clock->longterm.max_swap_to_flip_us, + frame_clock->shortterm.max_swap_to_flip_us); g_string_append_printf (string, "\nVblank duration: %ld µs +", frame_clock->vblank_duration_us);