// Copyright 2020-2022, Collabora, Ltd. // SPDX-License-Identifier: BSL-1.0 /*! * @file * @brief Shared frame timing code. * @author Jakob Bornecrantz * @ingroup aux_util */ #include "os/os_time.h" #include "util/u_time.h" #include "util/u_misc.h" #include "util/u_debug.h" #include "util/u_pacing.h" #include "util/u_metrics.h" #include "util/u_logging.h" #include "util/u_trace_marker.h" #include #include #include DEBUG_GET_ONCE_LOG_OPTION(log_level, "U_PACING_APP_LOG", U_LOGGING_WARN) #define UPA_LOG_T(...) U_LOG_IFL_T(debug_get_log_option_log_level(), __VA_ARGS__) #define UPA_LOG_D(...) U_LOG_IFL_D(debug_get_log_option_log_level(), __VA_ARGS__) #define UPA_LOG_I(...) U_LOG_IFL_I(debug_get_log_option_log_level(), __VA_ARGS__) #define UPA_LOG_W(...) U_LOG_IFL_W(debug_get_log_option_log_level(), __VA_ARGS__) #define UPA_LOG_E(...) U_LOG_IFL_E(debug_get_log_option_log_level(), __VA_ARGS__) /*! * Define to validate latched and retired call. Currently disabled due to * simplistic frame allocation code, enable once improved. */ #undef VALIDATE_LATCHED_AND_RETIRED /* * * Structs enums, and defines. * */ #define FRAME_COUNT (8) enum u_pa_state { U_PA_READY, U_RT_WAIT_LEFT, U_RT_PREDICTED, U_RT_BEGUN, U_RT_DELIVERED, U_RT_GPU_DONE, }; struct u_pa_frame { int64_t frame_id; //! How long we thought the frame would take. uint64_t predicted_frame_time_ns; //! When we predicted the app should wake up. uint64_t predicted_wake_up_time_ns; //! When the client's GPU work should have completed. uint64_t predicted_gpu_done_time_ns; //! When we predicted this frame to be shown. uint64_t predicted_display_time_ns; //! The selected display period. uint64_t predicted_display_period_ns; /*! * When the app told us to display this frame, can be different * then the predicted display time so we track that separately. */ uint64_t display_time_ns; //! When something happened. struct { uint64_t predicted_ns; uint64_t wait_woke_ns; uint64_t begin_ns; uint64_t delivered_ns; uint64_t gpu_done_ns; } when; enum u_pa_state state; }; struct pacing_app { struct u_pacing_app base; //! Id for this session. int64_t session_id; struct u_pa_frame frames[FRAME_COUNT]; uint32_t current_frame; uint32_t next_frame; int64_t frame_counter; struct { //! App time between wait returning and begin being called. uint64_t cpu_time_ns; //! Time between begin and frame data being delivered. uint64_t draw_time_ns; //! Time between the frame data being delivered and GPU completing. uint64_t wait_time_ns; //! Extra time between end of draw time and when the compositor wakes up. uint64_t margin_ns; } app; //!< App statistics. struct { //! The last display time that the thing driving this helper got. uint64_t predicted_display_time_ns; //! The last display period the hardware is running at. uint64_t predicted_display_period_ns; //! The extra time needed by the thing driving this helper. uint64_t extra_ns; } last_input; uint64_t last_returned_ns; }; /* * * Helpers * */ static inline struct pacing_app * pacing_app(struct u_pacing_app *upa) { return (struct pacing_app *)upa; } #define DEBUG_PRINT_FRAME_ID() UPA_LOG_T("%" PRIi64, frame_id) #define GET_INDEX_FROM_ID(RT, ID) ((uint64_t)(ID) % FRAME_COUNT) #define IIR_ALPHA_LT 0.8 #define IIR_ALPHA_GT 0.8 static void do_iir_filter(uint64_t *target, double alpha_lt, double alpha_gt, uint64_t sample) { uint64_t t = *target; double alpha = t < sample ? alpha_lt : alpha_gt; double a = time_ns_to_s(t) * alpha; double b = time_ns_to_s(sample) * (1.0 - alpha); *target = time_s_to_ns(a + b); } static uint64_t min_period(const struct pacing_app *pa) { return pa->last_input.predicted_display_period_ns; } static uint64_t last_sample_displayed(const struct pacing_app *pa) { return pa->last_input.predicted_display_time_ns; } static uint64_t last_return_predicted_display(const struct pacing_app *pa) { return pa->last_returned_ns; } static uint64_t total_app_time_ns(const struct pacing_app *pa) { return pa->app.cpu_time_ns + pa->app.draw_time_ns + pa->app.wait_time_ns; } static uint64_t total_compositor_time_ns(const struct pacing_app *pa) { return pa->app.margin_ns + pa->last_input.extra_ns; } static uint64_t total_app_and_compositor_time_ns(const struct pacing_app *pa) { return total_app_time_ns(pa) + total_compositor_time_ns(pa); } static uint64_t calc_period(const struct pacing_app *pa) { // Error checking. uint64_t base_period_ns = min_period(pa); if (base_period_ns == 0) { assert(false && "Have not yet received and samples from timing driver."); base_period_ns = U_TIME_1MS_IN_NS * 16; // Sure } // Calculate the using both values separately. uint64_t period_ns = base_period_ns; while (pa->app.cpu_time_ns > period_ns) { period_ns += base_period_ns; } while (pa->app.draw_time_ns > period_ns) { period_ns += base_period_ns; } while (pa->app.wait_time_ns > period_ns) { period_ns += base_period_ns; } return period_ns; } static uint64_t predict_display_time(const struct pacing_app *pa, uint64_t now_ns, uint64_t period_ns) { // Total app and compositor time to produce a frame uint64_t app_and_compositor_time_ns = total_app_and_compositor_time_ns(pa); // Start from the last time that the driver displayed something. uint64_t val = last_sample_displayed(pa); // Return a time after the last returned display time. Add half the // display period to the comparison for robustness when the last display // time shifts slightly with respect to the last sample. while (val <= last_return_predicted_display(pa) + (period_ns / 2)) { val += period_ns; } // Have to have enough time to perform app work. while ((val - app_and_compositor_time_ns) <= now_ns) { val += period_ns; } return val; } /* * * Metrics and tracing. * */ static void do_metrics(struct pacing_app *pa, struct u_pa_frame *f, bool discarded) { if (!u_metrics_is_active()) { return; } struct u_metrics_session_frame umsf = { .session_id = pa->session_id, .frame_id = f->frame_id, .predicted_frame_time_ns = f->predicted_frame_time_ns, .predicted_wake_up_time_ns = f->predicted_wake_up_time_ns, .predicted_gpu_done_time_ns = f->predicted_gpu_done_time_ns, .predicted_display_time_ns = f->predicted_display_time_ns, .predicted_display_period_ns = f->predicted_display_period_ns, .display_time_ns = f->display_time_ns, .when_predicted_ns = f->when.predicted_ns, .when_wait_woke_ns = f->when.wait_woke_ns, .when_begin_ns = f->when.begin_ns, .when_delivered_ns = f->when.delivered_ns, .when_gpu_done_ns = f->when.gpu_done_ns, .discarded = discarded, }; u_metrics_write_session_frame(&umsf); } static void do_tracing(struct pacing_app *pa, struct u_pa_frame *f) { if (!U_TRACE_CATEGORY_IS_ENABLED(timing)) { return; } #ifdef U_TRACE_TRACY // Uses Tracy specific things. uint64_t cpu_ns = f->when.begin_ns - f->when.wait_woke_ns; TracyCPlot("App CPU(ms)", time_ns_to_ms_f(cpu_ns)); uint64_t draw_ns = f->when.delivered_ns - f->when.begin_ns; TracyCPlot("App Draw(ms)", time_ns_to_ms_f(draw_ns)); uint64_t gpu_ns = f->when.gpu_done_ns - f->when.delivered_ns; TracyCPlot("App GPU(ms)", time_ns_to_ms_f(gpu_ns)); int64_t diff_ns = (int64_t)f->when.gpu_done_ns - (int64_t)f->predicted_gpu_done_time_ns; TracyCPlot("App vs Expected(ms)", time_ns_to_ms_f(diff_ns)); #endif #ifdef U_TRACE_PERCETTO // Uses Percetto specific things. #define TE_BEG(TRACK, TIME, NAME) U_TRACE_EVENT_BEGIN_ON_TRACK_DATA(timing, TRACK, TIME, NAME, PERCETTO_I(f->frame_id)) #define TE_END(TRACK, TIME) U_TRACE_EVENT_END_ON_TRACK(timing, TRACK, TIME) TE_BEG(pa_cpu, f->when.predicted_ns, "sleep"); TE_END(pa_cpu, f->when.wait_woke_ns); uint64_t cpu_start_ns = f->when.wait_woke_ns + 1; TE_BEG(pa_cpu, cpu_start_ns, "cpu"); TE_END(pa_cpu, f->when.begin_ns); TE_BEG(pa_draw, f->when.begin_ns, "draw"); if (f->when.begin_ns > f->predicted_gpu_done_time_ns) { TE_BEG(pa_draw, f->when.begin_ns, "late"); TE_END(pa_draw, f->when.delivered_ns); } else if (f->when.delivered_ns > f->predicted_gpu_done_time_ns) { TE_BEG(pa_draw, f->predicted_gpu_done_time_ns, "late"); TE_END(pa_draw, f->when.delivered_ns); } TE_END(pa_draw, f->when.delivered_ns); TE_BEG(pa_wait, f->when.delivered_ns, "wait"); if (f->when.delivered_ns > f->predicted_gpu_done_time_ns) { TE_BEG(pa_wait, f->when.delivered_ns, "late"); TE_END(pa_wait, f->when.gpu_done_ns); } else if (f->when.delivered_ns > f->predicted_gpu_done_time_ns) { TE_BEG(pa_wait, f->predicted_gpu_done_time_ns, "late"); TE_END(pa_wait, f->when.gpu_done_ns); } TE_END(pa_wait, f->when.gpu_done_ns); #undef TE_BEG #undef TE_END #endif } /* * * Member functions. * */ static void pa_predict(struct u_pacing_app *upa, uint64_t now_ns, int64_t *out_frame_id, uint64_t *out_wake_up_time, uint64_t *out_predicted_display_time, uint64_t *out_predicted_display_period) { struct pacing_app *pa = pacing_app(upa); int64_t frame_id = ++pa->frame_counter; *out_frame_id = frame_id; DEBUG_PRINT_FRAME_ID(); uint64_t period_ns = calc_period(pa); uint64_t predict_ns = predict_display_time(pa, now_ns, period_ns); // How long we think the frame should take. uint64_t frame_time_ns = total_app_time_ns(pa); // When should the client wake up. uint64_t wake_up_time_ns = predict_ns - total_app_and_compositor_time_ns(pa); // When the client's GPU work should have completed. uint64_t gpu_done_time_ns = predict_ns - total_compositor_time_ns(pa); pa->last_returned_ns = predict_ns; *out_wake_up_time = wake_up_time_ns; *out_predicted_display_time = predict_ns; *out_predicted_display_period = period_ns; size_t index = GET_INDEX_FROM_ID(pa, frame_id); struct u_pa_frame *f = &pa->frames[index]; assert(f->frame_id == -1); assert(f->state == U_PA_READY); f->state = U_RT_PREDICTED; f->frame_id = frame_id; f->predicted_frame_time_ns = frame_time_ns; f->predicted_wake_up_time_ns = wake_up_time_ns; f->predicted_gpu_done_time_ns = gpu_done_time_ns; f->predicted_display_time_ns = predict_ns; f->predicted_display_period_ns = period_ns; f->when.predicted_ns = now_ns; } static void pa_mark_point(struct u_pacing_app *upa, int64_t frame_id, enum u_timing_point point, uint64_t when_ns) { struct pacing_app *pa = pacing_app(upa); UPA_LOG_T("%" PRIi64 " (%u)", frame_id, point); size_t index = GET_INDEX_FROM_ID(pa, frame_id); struct u_pa_frame *f = &pa->frames[index]; assert(f->frame_id == frame_id); switch (point) { case U_TIMING_POINT_WAKE_UP: assert(f->state == U_RT_PREDICTED); f->when.wait_woke_ns = when_ns; f->state = U_RT_WAIT_LEFT; break; case U_TIMING_POINT_BEGIN: assert(f->state == U_RT_WAIT_LEFT); f->when.begin_ns = when_ns; f->state = U_RT_BEGUN; break; case U_TIMING_POINT_SUBMIT: default: assert(false); } } static void pa_mark_discarded(struct u_pacing_app *upa, int64_t frame_id, uint64_t when_ns) { struct pacing_app *pa = pacing_app(upa); DEBUG_PRINT_FRAME_ID(); size_t index = GET_INDEX_FROM_ID(pa, frame_id); struct u_pa_frame *f = &pa->frames[index]; assert(f->frame_id == frame_id); assert(f->state == U_RT_WAIT_LEFT || f->state == U_RT_BEGUN); // Update all data. f->when.delivered_ns = when_ns; // Write out metrics data. do_metrics(pa, f, true); // Reset the frame. U_ZERO(f); // Zero for metrics f->state = U_PA_READY; f->frame_id = -1; } static void pa_mark_delivered(struct u_pacing_app *upa, int64_t frame_id, uint64_t when_ns, uint64_t display_time_ns) { struct pacing_app *pa = pacing_app(upa); DEBUG_PRINT_FRAME_ID(); size_t index = GET_INDEX_FROM_ID(pa, frame_id); struct u_pa_frame *f = &pa->frames[index]; assert(f->frame_id == frame_id); assert(f->state == U_RT_BEGUN); // Update all data. f->when.delivered_ns = when_ns; f->display_time_ns = display_time_ns; f->state = U_RT_DELIVERED; } static void pa_mark_gpu_done(struct u_pacing_app *upa, int64_t frame_id, uint64_t when_ns) { struct pacing_app *pa = pacing_app(upa); DEBUG_PRINT_FRAME_ID(); size_t index = GET_INDEX_FROM_ID(pa, frame_id); struct u_pa_frame *f = &pa->frames[index]; assert(f->frame_id == frame_id); assert(f->state == U_RT_DELIVERED); // Update all data. f->when.gpu_done_ns = when_ns; f->state = U_RT_GPU_DONE; /* * Process data. */ int64_t diff_ns = f->predicted_gpu_done_time_ns - when_ns; bool late = false; if (diff_ns < 0) { diff_ns = -diff_ns; late = true; } uint64_t diff_cpu_ns = f->when.begin_ns - f->when.wait_woke_ns; uint64_t diff_draw_ns = f->when.delivered_ns - f->when.begin_ns; uint64_t diff_wait_ns = f->when.gpu_done_ns - f->when.delivered_ns; UPA_LOG_D( "Delivered frame %.2fms %s." // "\n\tperiod: %.2f" // "\n\tcpu o: %.2f, n: %.2f" // "\n\tdraw o: %.2f, n: %.2f" // "\n\twait o: %.2f, n: %.2f", // time_ns_to_ms_f(diff_ns), late ? "late" : "early", // time_ns_to_ms_f(f->predicted_display_period_ns), // time_ns_to_ms_f(pa->app.cpu_time_ns), time_ns_to_ms_f(diff_cpu_ns), // time_ns_to_ms_f(pa->app.draw_time_ns), time_ns_to_ms_f(diff_draw_ns), // time_ns_to_ms_f(pa->app.wait_time_ns), time_ns_to_ms_f(diff_wait_ns)); // do_iir_filter(&pa->app.cpu_time_ns, IIR_ALPHA_LT, IIR_ALPHA_GT, diff_cpu_ns); do_iir_filter(&pa->app.draw_time_ns, IIR_ALPHA_LT, IIR_ALPHA_GT, diff_draw_ns); do_iir_filter(&pa->app.wait_time_ns, IIR_ALPHA_LT, IIR_ALPHA_GT, diff_wait_ns); // Write out metrics and tracing data. do_metrics(pa, f, false); do_tracing(pa, f); #ifndef VALIDATE_LATCHED_AND_RETIRED // Reset the frame. U_ZERO(f); // Zero for metrics f->state = U_PA_READY; f->frame_id = -1; #endif } static void pa_latched(struct u_pacing_app *upa, int64_t frame_id, uint64_t when_ns, int64_t system_frame_id) { struct pacing_app *pa = pacing_app(upa); #ifdef VALIDATE_LATCHED_AND_RETIRED size_t index = GET_INDEX_FROM_ID(pa, frame_id); struct u_pa_frame *f = &pa->frames[index]; assert(f->frame_id == frame_id); assert(f->state == U_RT_GPU_DONE); #else (void)pa; #endif struct u_metrics_used umu = { .session_id = pa->session_id, .session_frame_id = frame_id, .system_frame_id = system_frame_id, .when_ns = when_ns, }; u_metrics_write_used(&umu); } static void pa_retired(struct u_pacing_app *upa, int64_t frame_id, uint64_t when_ns) { struct pacing_app *pa = pacing_app(upa); #ifdef VALIDATE_LATCHED_AND_RETIRED size_t index = GET_INDEX_FROM_ID(pa, frame_id); struct u_pa_frame *f = &pa->frames[index]; assert(f->frame_id == frame_id); assert(f->state == U_RT_GPU_DONE || f->state == U_RT_DELIVERED); // Reset the frame. U_ZERO(f); // Zero for metrics f->state = U_PA_READY; f->frame_id = -1; #else (void)pa; #endif } static void pa_info(struct u_pacing_app *upa, uint64_t predicted_display_time_ns, uint64_t predicted_display_period_ns, uint64_t extra_ns) { struct pacing_app *pa = pacing_app(upa); pa->last_input.predicted_display_time_ns = predicted_display_time_ns; pa->last_input.predicted_display_period_ns = predicted_display_period_ns; pa->last_input.extra_ns = extra_ns; } static void pa_destroy(struct u_pacing_app *upa) { free(upa); } static xrt_result_t pa_create(int64_t session_id, struct u_pacing_app **out_upa) { struct pacing_app *pa = U_TYPED_CALLOC(struct pacing_app); pa->base.predict = pa_predict; pa->base.mark_point = pa_mark_point; pa->base.mark_discarded = pa_mark_discarded; pa->base.mark_delivered = pa_mark_delivered; pa->base.mark_gpu_done = pa_mark_gpu_done; pa->base.latched = pa_latched; pa->base.retired = pa_retired; pa->base.info = pa_info; pa->base.destroy = pa_destroy; pa->session_id = session_id; pa->app.cpu_time_ns = U_TIME_1MS_IN_NS * 2; pa->app.draw_time_ns = U_TIME_1MS_IN_NS * 2; pa->app.margin_ns = U_TIME_1MS_IN_NS * 2; for (size_t i = 0; i < ARRAY_SIZE(pa->frames); i++) { pa->frames[i].state = U_PA_READY; pa->frames[i].frame_id = -1; } *out_upa = &pa->base; return XRT_SUCCESS; } /* * * Factory functions. * */ static xrt_result_t paf_create(struct u_pacing_app_factory *upaf, struct u_pacing_app **out_upa) { static int64_t session_id_gen = 0; // For now until global session id is introduced. return pa_create(session_id_gen++, out_upa); } static void paf_destroy(struct u_pacing_app_factory *upaf) { free(upaf); } /* * * 'Exported' functions. * */ xrt_result_t u_pa_factory_create(struct u_pacing_app_factory **out_upaf) { struct u_pacing_app_factory *upaf = U_TYPED_CALLOC(struct u_pacing_app_factory); upaf->create = paf_create; upaf->destroy = paf_destroy; *out_upaf = upaf; return XRT_SUCCESS; }