diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 5059f0321..a7cd921a4 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -1,4 +1,4 @@ -# Copyright 2018-2021, Collabora, Ltd. +# Copyright 2018-2022, Collabora, Ltd. # # SPDX-License-Identifier: BSL-1.0 @@ -35,6 +35,12 @@ target_link_libraries(tests_history_buf PRIVATE tests_main) target_link_libraries(tests_history_buf PRIVATE aux_util aux_math) add_test(NAME tests_history_buf COMMAND tests_history_buf --success) +# pacing +add_executable(tests_pacing tests_pacing.cpp) +target_link_libraries(tests_pacing PRIVATE tests_main) +target_link_libraries(tests_pacing PRIVATE aux_util aux_math) +add_test(NAME tests_pacing COMMAND tests_pacing --success) + # misc wrappers add_executable(tests_cxx_wrappers tests_cxx_wrappers.cpp) target_link_libraries(tests_cxx_wrappers PRIVATE tests_main) diff --git a/tests/tests_pacing.cpp b/tests/tests_pacing.cpp new file mode 100644 index 000000000..2bbc137b4 --- /dev/null +++ b/tests/tests_pacing.cpp @@ -0,0 +1,388 @@ +// Copyright 2022, Collabora, Ltd. +// SPDX-License-Identifier: BSL-1.0 +/*! + * @file + * @brief Frame pacing tests. + * @author Ryan Pavlik + */ + +#include + +#include "catch/catch.hpp" + +#include "time_utils.hpp" + +#include +#include +#include +#include +#include + +using namespace std::chrono_literals; +using namespace std::chrono; + +static constexpr unanoseconds frame_interval_ns(16ms); + +namespace { + +uint64_t +getNextPresentAfterTimestampAndKnownPresent(uint64_t timestamp_ns, uint64_t known_present_ns) +{ + + while (known_present_ns < timestamp_ns) { + known_present_ns += frame_interval_ns.count(); + } + return known_present_ns; +} +uint64_t +getPresentBefore(uint64_t timestamp_ns, uint64_t known_present_ns) +{ + + while (known_present_ns >= timestamp_ns && known_present_ns > frame_interval_ns.count()) { + known_present_ns -= frame_interval_ns.count(); + } + return known_present_ns; +} +uint64_t +getNextPresentAfterTimestamp(uint64_t timestamp_ns, uint64_t known_present_ns) +{ + auto present_before_ns = getPresentBefore(timestamp_ns, known_present_ns); + return getNextPresentAfterTimestampAndKnownPresent(timestamp_ns, present_before_ns); +} + +struct CompositorPredictions +{ + int64_t frame_id{0}; + uint64_t wake_up_time_ns{0}; + uint64_t desired_present_time_ns{0}; + uint64_t present_slop_ns{0}; + uint64_t predicted_display_time_ns{0}; + uint64_t predicted_display_period_ns{0}; + uint64_t min_display_period_ns{0}; +}; +} // namespace + +static void +basicPredictionConsistencyChecks(uint64_t now_ns, CompositorPredictions const &predictions) +{ + INFO(predictions.frame_id); + INFO(now_ns); + CHECK(predictions.wake_up_time_ns >= now_ns); + CHECK(predictions.desired_present_time_ns > now_ns); + CHECK(predictions.desired_present_time_ns > predictions.wake_up_time_ns); + CHECK(predictions.predicted_display_time_ns > now_ns); + CHECK(predictions.predicted_display_time_ns > predictions.desired_present_time_ns); + // display period predicted to be +- 2ms (arbitrary) + CHECK(unanoseconds(predictions.predicted_display_period_ns) < (frame_interval_ns + unanoseconds(2ms))); + CHECK(unanoseconds(predictions.predicted_display_period_ns) > (frame_interval_ns - unanoseconds(2ms))); +} + +struct SimulatedDisplayTimingData +{ + SimulatedDisplayTimingData(int64_t id, uint64_t desired_present_time, uint64_t gpu_finish, uint64_t now) + : frame_id(id), desired_present_time_ns(desired_present_time), + actual_present_time_ns(getNextPresentAfterTimestampAndKnownPresent(gpu_finish, desired_present_time)), + earliest_present_time_ns(getNextPresentAfterTimestamp(gpu_finish, desired_present_time)), + present_margin_ns(earliest_present_time_ns - gpu_finish), now_ns(now) + {} + + int64_t frame_id; + uint64_t desired_present_time_ns; + uint64_t actual_present_time_ns; + uint64_t earliest_present_time_ns; + uint64_t present_margin_ns; + uint64_t now_ns; + void + call_u_pc_info(u_pacing_compositor *upc) const + { + std::cout << "frame_id: " << frame_id << std::endl; + std::cout << "desired_present_time_ns: " << desired_present_time_ns << std::endl; + std::cout << "actual_present_time_ns: " << actual_present_time_ns << std::endl; + std::cout << "earliest_present_time_ns: " << earliest_present_time_ns << std::endl; + std::cout << "present_margin_ns: " << present_margin_ns << std::endl; + std::cout << "now_ns: " << now_ns << "\n" << std::endl; + u_pc_info(upc, frame_id, desired_present_time_ns, actual_present_time_ns, earliest_present_time_ns, + present_margin_ns, now_ns); + } +}; +static inline bool +operator>(SimulatedDisplayTimingData const &lhs, SimulatedDisplayTimingData const &rhs) +{ + return lhs.now_ns > rhs.now_ns; +} +using SimulatedDisplayTimingQueue = std::priority_queue, + std::greater>; + +//! Process all simulated timing data in the queue that should be processed by now. +static void +processDisplayTimingQueue(SimulatedDisplayTimingQueue &display_timing_queue, uint64_t now_ns, u_pacing_compositor *upc) +{ + while (!display_timing_queue.empty() && display_timing_queue.top().now_ns <= now_ns) { + display_timing_queue.top().call_u_pc_info(upc); + display_timing_queue.pop(); + } +} +//! Process all remaining simulated timing data in the queue and return the timestamp of the last one. +static uint64_t +drainDisplayTimingQueue(SimulatedDisplayTimingQueue &display_timing_queue, uint64_t now_ns, u_pacing_compositor *upc) +{ + while (!display_timing_queue.empty()) { + now_ns = display_timing_queue.top().now_ns; + display_timing_queue.top().call_u_pc_info(upc); + display_timing_queue.pop(); + } + return now_ns; +} + +static void +doFrame(SimulatedDisplayTimingQueue &display_timing_queue, + u_pacing_compositor *upc, + MockClock &clock, + uint64_t wake_time_ns, + uint64_t desired_present_time_ns, + int64_t frame_id, + unanoseconds wake_delay, + unanoseconds begin_delay, + unanoseconds submit_delay, + unanoseconds gpu_time_after_submit) +{ + REQUIRE(clock.now() <= wake_time_ns); + // wake up (after delay) + clock.advance_to(wake_time_ns); + clock.advance(wake_delay); + processDisplayTimingQueue(display_timing_queue, clock.now(), upc); + u_pc_mark_point(upc, U_TIMING_POINT_WAKE_UP, frame_id, clock.now()); + + // begin (after delay) + clock.advance(begin_delay); + processDisplayTimingQueue(display_timing_queue, clock.now(), upc); + u_pc_mark_point(upc, U_TIMING_POINT_BEGIN, frame_id, clock.now()); + + // spend cpu time before submit + clock.advance(submit_delay); + processDisplayTimingQueue(display_timing_queue, clock.now(), upc); + u_pc_mark_point(upc, U_TIMING_POINT_SUBMIT, frame_id, clock.now()); + + // spend gpu time before present + clock.advance(gpu_time_after_submit); + auto gpu_finish = clock.now(); + auto next_scanout_timepoint = getNextPresentAfterTimestampAndKnownPresent(gpu_finish, desired_present_time_ns); + + REQUIRE(next_scanout_timepoint >= gpu_finish); + + // our wisdom arrives after scanout + MockClock infoClock; + infoClock.advance_to(next_scanout_timepoint); + infoClock.advance(1ms); + display_timing_queue.push({frame_id, desired_present_time_ns, gpu_finish, infoClock.now()}); +} + +// u_pc is for the compositor, we should take way less than a frame to do our job. +static constexpr auto wakeDelay = microseconds(20); + +static constexpr auto shortBeginDelay = microseconds(20); +static constexpr auto shortSubmitDelay = 200us; +static constexpr auto shortGpuTime = 1ms; + + +static constexpr auto longBeginDelay = 1ms; +static constexpr auto longSubmitDelay = 2ms; +static constexpr auto longGpuTime = 2ms; + +TEST_CASE("u_pacing_compositor_display_timing") +{ + u_pacing_compositor *upc = nullptr; + MockClock clock; + REQUIRE(XRT_SUCCESS == + u_pc_display_timing_create(frame_interval_ns.count(), &U_PC_DISPLAY_TIMING_CONFIG_DEFAULT, &upc)); + REQUIRE(upc != nullptr); + + clock.advance(1ms); + + CompositorPredictions predictions; + u_pc_predict(upc, clock.now(), &predictions.frame_id, &predictions.wake_up_time_ns, + &predictions.desired_present_time_ns, &predictions.present_slop_ns, + &predictions.predicted_display_time_ns, &predictions.predicted_display_period_ns, + &predictions.min_display_period_ns); + basicPredictionConsistencyChecks(clock.now(), predictions); + + auto frame_id = predictions.frame_id; + SimulatedDisplayTimingQueue queue; + + + SECTION("faster than expected") + { + // We have a 16ms period + // wake promptly + clock.advance(wakeDelay); + u_pc_mark_point(upc, U_TIMING_POINT_WAKE_UP, frame_id, clock.now()); + + // start promptly + clock.advance(shortBeginDelay); + u_pc_mark_point(upc, U_TIMING_POINT_BEGIN, frame_id, clock.now()); + + // spend cpu time before submit + clock.advance(shortSubmitDelay); + u_pc_mark_point(upc, U_TIMING_POINT_SUBMIT, frame_id, clock.now()); + + // spend time in gpu rendering until present + clock.advance(shortGpuTime); + auto gpu_finish = clock.now(); + + auto next_scanout_timepoint = + getNextPresentAfterTimestampAndKnownPresent(gpu_finish, predictions.desired_present_time_ns); + + // our wisdom arrives after scanout + MockClock infoClock; + infoClock.advance_to(next_scanout_timepoint); + infoClock.advance(1ms); + queue.push({frame_id, predictions.desired_present_time_ns, gpu_finish, infoClock.now()}); + + // Do basically the same thing a few more frames. + for (int i = 0; i < 20; ++i) { + CompositorPredictions loopPred; + u_pc_predict(upc, clock.now(), &loopPred.frame_id, &loopPred.wake_up_time_ns, + &loopPred.desired_present_time_ns, &loopPred.present_slop_ns, + &loopPred.predicted_display_time_ns, &loopPred.predicted_display_period_ns, + &loopPred.min_display_period_ns); + CHECK(loopPred.frame_id > i); + INFO("frame id" << loopPred.frame_id); + INFO(clock.now()); + basicPredictionConsistencyChecks(clock.now(), loopPred); + doFrame(queue, upc, clock, loopPred.wake_up_time_ns, loopPred.desired_present_time_ns, + loopPred.frame_id, wakeDelay, shortBeginDelay, shortSubmitDelay, shortGpuTime); + } + // we should now get a shorter time before present to wake up. + CompositorPredictions newPred; + u_pc_predict(upc, clock.now(), &newPred.frame_id, &newPred.wake_up_time_ns, + &newPred.desired_present_time_ns, &newPred.present_slop_ns, + &newPred.predicted_display_time_ns, &newPred.predicted_display_period_ns, + &newPred.min_display_period_ns); + basicPredictionConsistencyChecks(clock.now(), newPred); + CHECK(unanoseconds(newPred.desired_present_time_ns - newPred.wake_up_time_ns) < + unanoseconds(predictions.desired_present_time_ns - predictions.wake_up_time_ns)); + CHECK(unanoseconds(newPred.desired_present_time_ns - newPred.wake_up_time_ns) > + unanoseconds(shortSubmitDelay + shortGpuTime)); + } + + SECTION("slower than desired") + { + // We have a 16ms period + // wake promptly + clock.advance(wakeDelay); + u_pc_mark_point(upc, U_TIMING_POINT_WAKE_UP, frame_id, clock.now()); + + // waste time before beginframe + clock.advance(longBeginDelay); + u_pc_mark_point(upc, U_TIMING_POINT_BEGIN, frame_id, clock.now()); + + // spend cpu time before submit + clock.advance(longSubmitDelay); + u_pc_mark_point(upc, U_TIMING_POINT_SUBMIT, frame_id, clock.now()); + + // spend time in gpu rendering until present + clock.advance(longGpuTime); + auto gpu_finish = clock.now(); + + auto next_scanout_timepoint = + getNextPresentAfterTimestampAndKnownPresent(gpu_finish, predictions.desired_present_time_ns); + + REQUIRE(next_scanout_timepoint > gpu_finish); + + + // our wisdom arrives after scanout + MockClock infoClock; + infoClock.advance_to(next_scanout_timepoint); + infoClock.advance(1ms); + queue.push({frame_id, predictions.desired_present_time_ns, gpu_finish, infoClock.now()}); + + // Do basically the same thing a few more frames. + for (int i = 0; i < 50; ++i) { + CompositorPredictions loopPred; + u_pc_predict(upc, clock.now(), &loopPred.frame_id, &loopPred.wake_up_time_ns, + &loopPred.desired_present_time_ns, &loopPred.present_slop_ns, + &loopPred.predicted_display_time_ns, &loopPred.predicted_display_period_ns, + &loopPred.min_display_period_ns); + INFO(loopPred.frame_id); + INFO(clock.now()); + basicPredictionConsistencyChecks(clock.now(), loopPred); + doFrame(queue, upc, clock, loopPred.wake_up_time_ns, loopPred.desired_present_time_ns, + loopPred.frame_id, wakeDelay, longBeginDelay, longSubmitDelay, longGpuTime); + } + + // we should now get a bigger time before present to wake up. + CompositorPredictions newPred; + u_pc_predict(upc, clock.now(), &newPred.frame_id, &newPred.wake_up_time_ns, + &newPred.desired_present_time_ns, &newPred.present_slop_ns, + &newPred.predicted_display_time_ns, &newPred.predicted_display_period_ns, + &newPred.min_display_period_ns); + basicPredictionConsistencyChecks(clock.now(), newPred); + CHECK(unanoseconds(newPred.desired_present_time_ns - newPred.wake_up_time_ns) > + unanoseconds(longBeginDelay + longSubmitDelay + longGpuTime)); + } + + u_pc_destroy(&upc); +} + +TEST_CASE("u_pacing_compositor_fake") +{ + MockClock clock; + u_pacing_compositor *upc = nullptr; + REQUIRE(XRT_SUCCESS == u_pc_fake_create(frame_interval_ns.count(), clock.now(), &upc)); + REQUIRE(upc != nullptr); + + clock.advance(1ms); + + SECTION("Standalone predictions") + { + CompositorPredictions predictions; + u_pc_predict(upc, clock.now(), &predictions.frame_id, &predictions.wake_up_time_ns, + &predictions.desired_present_time_ns, &predictions.present_slop_ns, + &predictions.predicted_display_time_ns, &predictions.predicted_display_period_ns, + &predictions.min_display_period_ns); + basicPredictionConsistencyChecks(clock.now(), predictions); + } + SECTION("Consistency in loop") + { + SimulatedDisplayTimingQueue queue; + SECTION("Fast") + { + + for (int i = 0; i < 10; ++i) { + CompositorPredictions predictions; + u_pc_predict(upc, clock.now(), &predictions.frame_id, &predictions.wake_up_time_ns, + &predictions.desired_present_time_ns, &predictions.present_slop_ns, + &predictions.predicted_display_time_ns, + &predictions.predicted_display_period_ns, + &predictions.min_display_period_ns); + INFO(predictions.frame_id); + INFO(clock.now()); + basicPredictionConsistencyChecks(clock.now(), predictions); + doFrame(queue, upc, clock, predictions.wake_up_time_ns, + predictions.desired_present_time_ns, predictions.frame_id, wakeDelay, + shortBeginDelay, shortSubmitDelay, shortGpuTime); + } + drainDisplayTimingQueue(queue, clock.now(), upc); + } + SECTION("Slow") + { + for (int i = 0; i < 10; ++i) { + CompositorPredictions predictions; + u_pc_predict(upc, clock.now(), &predictions.frame_id, &predictions.wake_up_time_ns, + &predictions.desired_present_time_ns, &predictions.present_slop_ns, + &predictions.predicted_display_time_ns, + &predictions.predicted_display_period_ns, + &predictions.min_display_period_ns); + INFO(predictions.frame_id); + INFO(clock.now()); + basicPredictionConsistencyChecks(clock.now(), predictions); + doFrame(queue, upc, clock, predictions.wake_up_time_ns, + predictions.desired_present_time_ns, predictions.frame_id, wakeDelay, + longBeginDelay, longSubmitDelay, longGpuTime); + } + drainDisplayTimingQueue(queue, clock.now(), upc); + } + } + u_pc_destroy(&upc); +} diff --git a/tests/time_utils.hpp b/tests/time_utils.hpp new file mode 100644 index 000000000..9ef5a26d6 --- /dev/null +++ b/tests/time_utils.hpp @@ -0,0 +1,135 @@ +// Copyright 2022, Collabora, Ltd. +// SPDX-License-Identifier: BSL-1.0 +/*! + * @file + * @brief Utilities for tests involving time points and durations + * @author Ryan Pavlik + */ + +#pragma once + +#include +#include +#include +#include +#include + +using unanoseconds = std::chrono::duration; + + +template +static inline std::string +stringifyNanos(std::chrono::duration value) +{ + using namespace std::chrono; + std::ostringstream oss; + auto sec = duration_cast>(value); + if (duration(sec) == value) { + oss << sec.count() << "s"; + return oss.str(); + } + + auto millis = duration_cast>(value); + if (duration(millis) == value) { + oss << millis.count() << "ms"; + return oss.str(); + } + + auto micros = duration_cast>(value); + if (duration(micros) == value) { + oss << micros.count() << "us"; + return oss.str(); + } + + oss << value.count() << "ns"; + return oss.str(); +} + +static inline std::string +stringifyTimePoint(std::chrono::steady_clock::time_point tp) +{ + auto dur = tp.time_since_epoch(); + auto hr = std::chrono::duration_cast(dur); + dur -= hr; + auto sec = std::chrono::duration_cast(dur); + dur -= sec; + std::ostringstream oss; + if (hr.count() > 0) { + oss << hr.count() << ":"; + } + oss << sec.count() << "."; + using three_commas = + std::ratio_multiply, std::ratio_multiply, std::ratio<1, 1000>>>; + static_assert(std::ratio_equal::value); + // 9 because of the static assert above: there's no compile-time rational log10? :-O + oss << std::setfill('0') << std::setw(9); + oss << dur.count(); + return oss.str(); +} + +namespace Catch { +template <> struct StringMaker +{ + static std::string + convert(unanoseconds const &value) + { + return stringifyNanos(value); + } +}; +template <> struct StringMaker +{ + static std::string + convert(std::chrono::nanoseconds const &value) + { + return stringifyNanos(value); + } +}; +template <> struct StringMaker +{ + static std::string + convert(std::chrono::steady_clock::time_point const &value) + { + return stringifyTimePoint(value); + } +}; +} // namespace Catch + + +class MockClock +{ +public: + uint64_t + now() const noexcept + { + return std::chrono::duration_cast(now_.time_since_epoch()).count(); + } + + std::chrono::steady_clock::time_point + now_typed() const noexcept + { + return now_; + } + + void + advance(unanoseconds ns) + { + now_ += ns; + } + + void + advance_to(uint64_t timestamp_ns) + { + CHECK(now() <= timestamp_ns); + now_ = std::chrono::steady_clock::time_point( + std::chrono::steady_clock::duration(unanoseconds(timestamp_ns))); + } + +private: + std::chrono::steady_clock::time_point now_{std::chrono::steady_clock::duration(std::chrono::seconds(1000000))}; +}; + +struct FutureEvent +{ + std::chrono::steady_clock::time_point time_point; + std::function action; +};