// Copyright 2022, Collabora, Ltd. // SPDX-License-Identifier: BSL-1.0 /*! * @file * @brief Frame pacing tests. * @author Rylie Pavlik */ #include #include "catch_amalgamated.hpp" #include "time_utils.hpp" #include #include #include #include #include using namespace std::chrono_literals; using namespace std::chrono; static constexpr unanoseconds frame_interval_ns(16ms); namespace { uint64_t getNextPresentAfterTimestampAndKnownPresent(uint64_t timestamp_ns, uint64_t known_present_ns) { while (known_present_ns < timestamp_ns) { known_present_ns += frame_interval_ns.count(); } return known_present_ns; } uint64_t getPresentBefore(uint64_t timestamp_ns, uint64_t known_present_ns) { while (known_present_ns >= timestamp_ns && known_present_ns > frame_interval_ns.count()) { known_present_ns -= frame_interval_ns.count(); } return known_present_ns; } uint64_t getNextPresentAfterTimestamp(uint64_t timestamp_ns, uint64_t known_present_ns) { auto present_before_ns = getPresentBefore(timestamp_ns, known_present_ns); return getNextPresentAfterTimestampAndKnownPresent(timestamp_ns, present_before_ns); } struct CompositorPredictions { int64_t frame_id{0}; uint64_t wake_up_time_ns{0}; uint64_t desired_present_time_ns{0}; uint64_t present_slop_ns{0}; uint64_t predicted_display_time_ns{0}; uint64_t predicted_display_period_ns{0}; uint64_t min_display_period_ns{0}; }; } // namespace static void basicPredictionConsistencyChecks(uint64_t now_ns, CompositorPredictions const &predictions) { INFO(predictions.frame_id); INFO(now_ns); CHECK(predictions.wake_up_time_ns >= now_ns); CHECK(predictions.desired_present_time_ns > now_ns); CHECK(predictions.desired_present_time_ns > predictions.wake_up_time_ns); CHECK(predictions.predicted_display_time_ns > now_ns); CHECK(predictions.predicted_display_time_ns > predictions.desired_present_time_ns); // display period predicted to be +- 2ms (arbitrary) CHECK(unanoseconds(predictions.predicted_display_period_ns) < (frame_interval_ns + unanoseconds(2ms))); CHECK(unanoseconds(predictions.predicted_display_period_ns) > (frame_interval_ns - unanoseconds(2ms))); } struct SimulatedDisplayTimingData { SimulatedDisplayTimingData(int64_t id, uint64_t desired_present_time, uint64_t gpu_finish, uint64_t now) : frame_id(id), desired_present_time_ns(desired_present_time), actual_present_time_ns(getNextPresentAfterTimestampAndKnownPresent(gpu_finish, desired_present_time)), earliest_present_time_ns(getNextPresentAfterTimestamp(gpu_finish, desired_present_time)), present_margin_ns(earliest_present_time_ns - gpu_finish), now_ns(now) {} int64_t frame_id; uint64_t desired_present_time_ns; uint64_t actual_present_time_ns; uint64_t earliest_present_time_ns; uint64_t present_margin_ns; uint64_t now_ns; void call_u_pc_info(u_pacing_compositor *upc) const { std::cout << "frame_id: " << frame_id << std::endl; std::cout << "desired_present_time_ns: " << desired_present_time_ns << std::endl; std::cout << "actual_present_time_ns: " << actual_present_time_ns << std::endl; std::cout << "earliest_present_time_ns: " << earliest_present_time_ns << std::endl; std::cout << "present_margin_ns: " << present_margin_ns << std::endl; std::cout << "now_ns: " << now_ns << "\n" << std::endl; u_pc_info(upc, frame_id, desired_present_time_ns, actual_present_time_ns, earliest_present_time_ns, present_margin_ns, now_ns); } }; static inline bool operator>(SimulatedDisplayTimingData const &lhs, SimulatedDisplayTimingData const &rhs) { return lhs.now_ns > rhs.now_ns; } using SimulatedDisplayTimingQueue = std::priority_queue, std::greater>; //! Process all simulated timing data in the queue that should be processed by now. static void processDisplayTimingQueue(SimulatedDisplayTimingQueue &display_timing_queue, uint64_t now_ns, u_pacing_compositor *upc) { while (!display_timing_queue.empty() && display_timing_queue.top().now_ns <= now_ns) { display_timing_queue.top().call_u_pc_info(upc); display_timing_queue.pop(); } } //! Process all remaining simulated timing data in the queue and return the timestamp of the last one. static uint64_t drainDisplayTimingQueue(SimulatedDisplayTimingQueue &display_timing_queue, uint64_t now_ns, u_pacing_compositor *upc) { while (!display_timing_queue.empty()) { now_ns = display_timing_queue.top().now_ns; display_timing_queue.top().call_u_pc_info(upc); display_timing_queue.pop(); } return now_ns; } static void doFrame(SimulatedDisplayTimingQueue &display_timing_queue, u_pacing_compositor *upc, MockClock &clock, uint64_t wake_time_ns, uint64_t desired_present_time_ns, int64_t frame_id, unanoseconds wake_delay, unanoseconds begin_delay, unanoseconds draw_delay, unanoseconds submit_delay, unanoseconds gpu_time_after_submit) { REQUIRE(clock.now() <= wake_time_ns); // wake up (after delay) clock.advance_to(wake_time_ns); clock.advance(wake_delay); processDisplayTimingQueue(display_timing_queue, clock.now(), upc); u_pc_mark_point(upc, U_TIMING_POINT_WAKE_UP, frame_id, clock.now()); // begin (after delay) clock.advance(begin_delay); processDisplayTimingQueue(display_timing_queue, clock.now(), upc); u_pc_mark_point(upc, U_TIMING_POINT_BEGIN, frame_id, clock.now()); // spend cpu time drawing clock.advance(draw_delay); u_pc_mark_point(upc, U_TIMING_POINT_SUBMIT_BEGIN, frame_id, clock.now()); // spend cpu time before submit clock.advance(submit_delay); processDisplayTimingQueue(display_timing_queue, clock.now(), upc); u_pc_mark_point(upc, U_TIMING_POINT_SUBMIT_END, frame_id, clock.now()); // spend gpu time before present clock.advance(gpu_time_after_submit); auto gpu_finish = clock.now(); auto next_scanout_timepoint = getNextPresentAfterTimestampAndKnownPresent(gpu_finish, desired_present_time_ns); REQUIRE(next_scanout_timepoint >= gpu_finish); // our wisdom arrives after scanout MockClock infoClock; infoClock.advance_to(next_scanout_timepoint); infoClock.advance(1ms); display_timing_queue.push({frame_id, desired_present_time_ns, gpu_finish, infoClock.now()}); } // u_pc is for the compositor, we should take way less than a frame to do our job. static constexpr auto wakeDelay = microseconds(20); static constexpr auto shortBeginDelay = microseconds(20); static constexpr auto shortDrawDelay = 150us; static constexpr auto shortSubmitDelay = 50us; static constexpr auto shortGpuTime = 1ms; static constexpr auto longBeginDelay = 1ms; static constexpr auto longDrawDelay = 2ms; static constexpr auto longGpuTime = 2ms; TEST_CASE("u_pacing_compositor_display_timing") { u_pacing_compositor *upc = nullptr; MockClock clock; REQUIRE(XRT_SUCCESS == u_pc_display_timing_create(frame_interval_ns.count(), &U_PC_DISPLAY_TIMING_CONFIG_DEFAULT, &upc)); REQUIRE(upc != nullptr); clock.advance(1ms); CompositorPredictions predictions; u_pc_predict(upc, clock.now(), &predictions.frame_id, &predictions.wake_up_time_ns, &predictions.desired_present_time_ns, &predictions.present_slop_ns, &predictions.predicted_display_time_ns, &predictions.predicted_display_period_ns, &predictions.min_display_period_ns); basicPredictionConsistencyChecks(clock.now(), predictions); auto frame_id = predictions.frame_id; SimulatedDisplayTimingQueue queue; SECTION("faster than expected") { // We have a 16ms period // wake promptly clock.advance(wakeDelay); u_pc_mark_point(upc, U_TIMING_POINT_WAKE_UP, frame_id, clock.now()); // start promptly clock.advance(shortBeginDelay); u_pc_mark_point(upc, U_TIMING_POINT_BEGIN, frame_id, clock.now()); // spend cpu time drawing clock.advance(shortDrawDelay); u_pc_mark_point(upc, U_TIMING_POINT_SUBMIT_BEGIN, frame_id, clock.now()); // spend a little cpu time submitting the work to the GPU clock.advance(shortSubmitDelay); u_pc_mark_point(upc, U_TIMING_POINT_SUBMIT_END, frame_id, clock.now()); // spend time in gpu rendering until present clock.advance(shortGpuTime); auto gpu_finish = clock.now(); auto next_scanout_timepoint = getNextPresentAfterTimestampAndKnownPresent(gpu_finish, predictions.desired_present_time_ns); // our wisdom arrives after scanout MockClock infoClock; infoClock.advance_to(next_scanout_timepoint); infoClock.advance(1ms); queue.push({frame_id, predictions.desired_present_time_ns, gpu_finish, infoClock.now()}); // Do basically the same thing a few more frames. for (int i = 0; i < 20; ++i) { CompositorPredictions loopPred; u_pc_predict(upc, clock.now(), &loopPred.frame_id, &loopPred.wake_up_time_ns, &loopPred.desired_present_time_ns, &loopPred.present_slop_ns, &loopPred.predicted_display_time_ns, &loopPred.predicted_display_period_ns, &loopPred.min_display_period_ns); CHECK(loopPred.frame_id > i); INFO("frame id" << loopPred.frame_id); INFO(clock.now()); basicPredictionConsistencyChecks(clock.now(), loopPred); doFrame(queue, upc, clock, loopPred.wake_up_time_ns, loopPred.desired_present_time_ns, loopPred.frame_id, wakeDelay, shortBeginDelay, shortDrawDelay, shortSubmitDelay, shortGpuTime); } // we should now get a shorter time before present to wake up. CompositorPredictions newPred; u_pc_predict(upc, clock.now(), &newPred.frame_id, &newPred.wake_up_time_ns, &newPred.desired_present_time_ns, &newPred.present_slop_ns, &newPred.predicted_display_time_ns, &newPred.predicted_display_period_ns, &newPred.min_display_period_ns); basicPredictionConsistencyChecks(clock.now(), newPred); CHECK(unanoseconds(newPred.desired_present_time_ns - newPred.wake_up_time_ns) < unanoseconds(predictions.desired_present_time_ns - predictions.wake_up_time_ns)); CHECK(unanoseconds(newPred.desired_present_time_ns - newPred.wake_up_time_ns) > unanoseconds(shortDrawDelay + shortSubmitDelay + shortGpuTime)); } SECTION("slower than desired") { // We have a 16ms period // wake promptly clock.advance(wakeDelay); u_pc_mark_point(upc, U_TIMING_POINT_WAKE_UP, frame_id, clock.now()); // waste time before beginframe clock.advance(longBeginDelay); u_pc_mark_point(upc, U_TIMING_POINT_BEGIN, frame_id, clock.now()); // spend cpu time drawing clock.advance(longDrawDelay); u_pc_mark_point(upc, U_TIMING_POINT_SUBMIT_BEGIN, frame_id, clock.now()); // spend a little cpu time submitting the work to the GPU clock.advance(shortSubmitDelay); u_pc_mark_point(upc, U_TIMING_POINT_SUBMIT_END, frame_id, clock.now()); // spend time in gpu rendering until present clock.advance(longGpuTime); auto gpu_finish = clock.now(); auto next_scanout_timepoint = getNextPresentAfterTimestampAndKnownPresent(gpu_finish, predictions.desired_present_time_ns); REQUIRE(next_scanout_timepoint > gpu_finish); // our wisdom arrives after scanout MockClock infoClock; infoClock.advance_to(next_scanout_timepoint); infoClock.advance(1ms); queue.push({frame_id, predictions.desired_present_time_ns, gpu_finish, infoClock.now()}); // Do basically the same thing a few more frames. for (int i = 0; i < 50; ++i) { CompositorPredictions loopPred; u_pc_predict(upc, clock.now(), &loopPred.frame_id, &loopPred.wake_up_time_ns, &loopPred.desired_present_time_ns, &loopPred.present_slop_ns, &loopPred.predicted_display_time_ns, &loopPred.predicted_display_period_ns, &loopPred.min_display_period_ns); INFO(loopPred.frame_id); INFO(clock.now()); basicPredictionConsistencyChecks(clock.now(), loopPred); doFrame(queue, upc, clock, loopPred.wake_up_time_ns, loopPred.desired_present_time_ns, loopPred.frame_id, wakeDelay, longBeginDelay, longDrawDelay, shortSubmitDelay, longGpuTime); } // we should now get a bigger time before present to wake up. CompositorPredictions newPred; u_pc_predict(upc, clock.now(), &newPred.frame_id, &newPred.wake_up_time_ns, &newPred.desired_present_time_ns, &newPred.present_slop_ns, &newPred.predicted_display_time_ns, &newPred.predicted_display_period_ns, &newPred.min_display_period_ns); basicPredictionConsistencyChecks(clock.now(), newPred); CHECK(unanoseconds(newPred.desired_present_time_ns - newPred.wake_up_time_ns) > unanoseconds(longBeginDelay + longDrawDelay + shortSubmitDelay + longGpuTime)); } u_pc_destroy(&upc); } TEST_CASE("u_pacing_compositor_fake") { MockClock clock; u_pacing_compositor *upc = nullptr; REQUIRE(XRT_SUCCESS == u_pc_fake_create(frame_interval_ns.count(), clock.now(), &upc)); REQUIRE(upc != nullptr); clock.advance(1ms); SECTION("Standalone predictions") { CompositorPredictions predictions; u_pc_predict(upc, clock.now(), &predictions.frame_id, &predictions.wake_up_time_ns, &predictions.desired_present_time_ns, &predictions.present_slop_ns, &predictions.predicted_display_time_ns, &predictions.predicted_display_period_ns, &predictions.min_display_period_ns); basicPredictionConsistencyChecks(clock.now(), predictions); } SECTION("Consistency in loop") { SimulatedDisplayTimingQueue queue; SECTION("Fast") { for (int i = 0; i < 10; ++i) { CompositorPredictions predictions; u_pc_predict(upc, clock.now(), &predictions.frame_id, &predictions.wake_up_time_ns, &predictions.desired_present_time_ns, &predictions.present_slop_ns, &predictions.predicted_display_time_ns, &predictions.predicted_display_period_ns, &predictions.min_display_period_ns); INFO(predictions.frame_id); INFO(clock.now()); basicPredictionConsistencyChecks(clock.now(), predictions); doFrame(queue, upc, clock, predictions.wake_up_time_ns, predictions.desired_present_time_ns, predictions.frame_id, wakeDelay, shortBeginDelay, shortDrawDelay, shortSubmitDelay, shortGpuTime); } drainDisplayTimingQueue(queue, clock.now(), upc); } SECTION("Slow") { for (int i = 0; i < 10; ++i) { CompositorPredictions predictions; u_pc_predict(upc, clock.now(), &predictions.frame_id, &predictions.wake_up_time_ns, &predictions.desired_present_time_ns, &predictions.present_slop_ns, &predictions.predicted_display_time_ns, &predictions.predicted_display_period_ns, &predictions.min_display_period_ns); INFO(predictions.frame_id); INFO(clock.now()); basicPredictionConsistencyChecks(clock.now(), predictions); doFrame(queue, upc, clock, predictions.wake_up_time_ns, predictions.desired_present_time_ns, predictions.frame_id, wakeDelay, longBeginDelay, longDrawDelay, shortSubmitDelay, longGpuTime); } drainDisplayTimingQueue(queue, clock.now(), upc); } } u_pc_destroy(&upc); }