From 65a7155cd55c4e6d025c9df4c62795748730fbb6 Mon Sep 17 00:00:00 2001 From: psucien Date: Tue, 11 Jun 2024 21:52:48 +0200 Subject: [PATCH] tracy: added Vulkan GPU profiling --- src/common/debug.h | 5 +- .../renderer_vulkan/renderer_vulkan.cpp | 109 +++++++++--------- .../renderer_vulkan/vk_instance.cpp | 23 ++++ src/video_core/renderer_vulkan/vk_instance.h | 8 ++ .../renderer_vulkan/vk_scheduler.cpp | 15 ++- src/video_core/renderer_vulkan/vk_scheduler.h | 2 + 6 files changed, 107 insertions(+), 55 deletions(-) diff --git a/src/common/debug.h b/src/common/debug.h index 98f6d3eb2..ea1dff7d6 100644 --- a/src/common/debug.h +++ b/src/common/debug.h @@ -33,7 +33,7 @@ enum MarkersPallete : int { EmulatorMarkerColor = 0x264653, RendererMarkerColor = 0x2a9d8f, HleMarkerColor = 0xe9c46a, - Reserved0 = 0xf4a261, + GpuMarkerColor = 0xf4a261, Reserved1 = 0xe76f51, }; @@ -48,4 +48,7 @@ enum MarkersPallete : int { #define TRACE_CRIT(msg) \ [](const auto& msg) { TracyMessageC(msg.c_str(), msg.size(), tracy::Color::HotPink); }(msg) +#define GPU_SCOPE_LOCATION(name, color) \ + tracy::SourceLocationData{name, TracyFunction, TracyFile, (uint32_t)TracyLine, color}; + #define FRAME_END FrameMark diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index 572316af6..ecce9bb54 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include "common/config.h" +#include "common/debug.h" #include "common/singleton.h" #include "core/file_format/splash.h" #include "core/libraries/system/systemservice.h" @@ -270,14 +271,50 @@ void RendererVulkan::Present(Frame* frame) { }; const vk::CommandBuffer cmdbuf = frame->cmdbuf; cmdbuf.begin(begin_info); + { + TracyVkZoneC(instance.GetProfilerContext(), cmdbuf, "Host frame", + MarkersPallete::GpuMarkerColor); - const vk::Extent2D extent = swapchain.GetExtent(); - const std::array pre_barriers{ - vk::ImageMemoryBarrier{ - .srcAccessMask = vk::AccessFlagBits::eNone, - .dstAccessMask = vk::AccessFlagBits::eTransferWrite, - .oldLayout = vk::ImageLayout::eUndefined, - .newLayout = vk::ImageLayout::eTransferDstOptimal, + const vk::Extent2D extent = swapchain.GetExtent(); + const std::array pre_barriers{ + vk::ImageMemoryBarrier{ + .srcAccessMask = vk::AccessFlagBits::eNone, + .dstAccessMask = vk::AccessFlagBits::eTransferWrite, + .oldLayout = vk::ImageLayout::eUndefined, + .newLayout = vk::ImageLayout::eTransferDstOptimal, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = swapchain_image, + .subresourceRange{ + .aspectMask = vk::ImageAspectFlagBits::eColor, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + }, + }, + vk::ImageMemoryBarrier{ + .srcAccessMask = vk::AccessFlagBits::eColorAttachmentWrite, + .dstAccessMask = vk::AccessFlagBits::eTransferRead, + .oldLayout = vk::ImageLayout::eGeneral, + .newLayout = vk::ImageLayout::eTransferSrcOptimal, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = frame->image, + .subresourceRange{ + .aspectMask = vk::ImageAspectFlagBits::eColor, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + }, + }, + }; + const vk::ImageMemoryBarrier post_barrier{ + .srcAccessMask = vk::AccessFlagBits::eTransferWrite, + .dstAccessMask = vk::AccessFlagBits::eMemoryRead, + .oldLayout = vk::ImageLayout::eTransferDstOptimal, + .newLayout = vk::ImageLayout::ePresentSrcKHR, .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .image = swapchain_image, @@ -288,54 +325,22 @@ void RendererVulkan::Present(Frame* frame) { .baseArrayLayer = 0, .layerCount = VK_REMAINING_ARRAY_LAYERS, }, - }, - vk::ImageMemoryBarrier{ - .srcAccessMask = vk::AccessFlagBits::eColorAttachmentWrite, - .dstAccessMask = vk::AccessFlagBits::eTransferRead, - .oldLayout = vk::ImageLayout::eGeneral, - .newLayout = vk::ImageLayout::eTransferSrcOptimal, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .image = frame->image, - .subresourceRange{ - .aspectMask = vk::ImageAspectFlagBits::eColor, - .baseMipLevel = 0, - .levelCount = 1, - .baseArrayLayer = 0, - .layerCount = VK_REMAINING_ARRAY_LAYERS, - }, - }, - }; - const vk::ImageMemoryBarrier post_barrier{ - .srcAccessMask = vk::AccessFlagBits::eTransferWrite, - .dstAccessMask = vk::AccessFlagBits::eMemoryRead, - .oldLayout = vk::ImageLayout::eTransferDstOptimal, - .newLayout = vk::ImageLayout::ePresentSrcKHR, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .image = swapchain_image, - .subresourceRange{ - .aspectMask = vk::ImageAspectFlagBits::eColor, - .baseMipLevel = 0, - .levelCount = 1, - .baseArrayLayer = 0, - .layerCount = VK_REMAINING_ARRAY_LAYERS, - }, - }; + }; - cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eColorAttachmentOutput, - vk::PipelineStageFlagBits::eTransfer, vk::DependencyFlagBits::eByRegion, - {}, {}, pre_barriers); + cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eColorAttachmentOutput, + vk::PipelineStageFlagBits::eTransfer, + vk::DependencyFlagBits::eByRegion, {}, {}, pre_barriers); - cmdbuf.blitImage(frame->image, vk::ImageLayout::eTransferSrcOptimal, swapchain_image, - vk::ImageLayout::eTransferDstOptimal, - MakeImageBlit(frame->width, frame->height, extent.width, extent.height), - vk::Filter::eLinear); - - cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands, - vk::PipelineStageFlagBits::eAllCommands, - vk::DependencyFlagBits::eByRegion, {}, {}, post_barrier); + cmdbuf.blitImage(frame->image, vk::ImageLayout::eTransferSrcOptimal, swapchain_image, + vk::ImageLayout::eTransferDstOptimal, + MakeImageBlit(frame->width, frame->height, extent.width, extent.height), + vk::Filter::eLinear); + cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands, + vk::PipelineStageFlagBits::eAllCommands, + vk::DependencyFlagBits::eByRegion, {}, {}, post_barrier); + } + TracyVkCollect(instance.GetProfilerContext(), cmdbuf); cmdbuf.end(); static constexpr std::array wait_stage_masks = { diff --git a/src/video_core/renderer_vulkan/vk_instance.cpp b/src/video_core/renderer_vulkan/vk_instance.cpp index 6d19452da..06a47675c 100644 --- a/src/video_core/renderer_vulkan/vk_instance.cpp +++ b/src/video_core/renderer_vulkan/vk_instance.cpp @@ -160,6 +160,7 @@ bool Instance::CreateDevice() { // The next two extensions are required to be available together in order to support write masks color_write_en = add_extension(VK_EXT_COLOR_WRITE_ENABLE_EXTENSION_NAME); color_write_en &= add_extension(VK_EXT_EXTENDED_DYNAMIC_STATE_3_EXTENSION_NAME); + const auto calibrated_timestamps = add_extension(VK_EXT_CALIBRATED_TIMESTAMPS_EXTENSION_NAME); const auto family_properties = physical_device.getQueueFamilyProperties(); if (family_properties.empty()) { @@ -212,6 +213,7 @@ bool Instance::CreateDevice() { }, vk::PhysicalDeviceVulkan12Features{ .scalarBlockLayout = true, + .hostQueryReset = true, .timelineSemaphore = true, }, vk::PhysicalDeviceVulkan13Features{ @@ -251,6 +253,27 @@ bool Instance::CreateDevice() { graphics_queue = device->getQueue(queue_family_index, 0); present_queue = device->getQueue(queue_family_index, 0); + if (calibrated_timestamps) { + const auto& time_domains = physical_device.getCalibrateableTimeDomainsEXT(); +#if _WIN64 + const bool has_host_time_domain = + std::find(time_domains.cbegin(), time_domains.cend(), + vk::TimeDomainEXT::eQueryPerformanceCounter) != time_domains.cend(); +#else + const bool has_host_time_domain = + std::find(time_domains.cbegin(), time_domains.cend(), + vk::TimeDomainEXT::eClockMonotonicRaw) != time_domains.cend(); +#endif + if (has_host_time_domain) { + static constexpr std::string_view context_name{"vk_rasterizer"}; + profiler_context = + TracyVkContextHostCalibrated(*instance, physical_device, *device, + VULKAN_HPP_DEFAULT_DISPATCHER.vkGetInstanceProcAddr, + VULKAN_HPP_DEFAULT_DISPATCHER.vkGetDeviceProcAddr); + TracyVkContextName(profiler_context, context_name.data(), context_name.size()); + } + } + CreateAllocator(); return true; } diff --git a/src/video_core/renderer_vulkan/vk_instance.h b/src/video_core/renderer_vulkan/vk_instance.h index 797eb8869..f8e3c2e9d 100644 --- a/src/video_core/renderer_vulkan/vk_instance.h +++ b/src/video_core/renderer_vulkan/vk_instance.h @@ -7,6 +7,9 @@ #include "video_core/renderer_vulkan/vk_platform.h" +#define TRACY_VK_USE_SYMBOL_TABLE +#include + namespace Frontend { class WindowSDL; } @@ -67,6 +70,10 @@ public: return present_queue; } + TracyVkCtx GetProfilerContext() const { + return profiler_context; + } + /// Returns true when a known debugging tool is attached. bool HasDebuggingToolAttached() const { return has_renderdoc || has_nsight_graphics; @@ -208,6 +215,7 @@ private: vk::Queue graphics_queue; std::vector physical_devices; std::vector available_extensions; + TracyVkCtx profiler_context{}; u32 queue_family_index{0}; bool image_view_reinterpretation{true}; bool timeline_semaphores{}; diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index 8e265f728..54cd69742 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp @@ -2,17 +2,21 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include +#include "common/debug.h" #include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_scheduler.h" namespace Vulkan { Scheduler::Scheduler(const Instance& instance) - : master_semaphore{instance}, command_pool{instance, &master_semaphore} { + : instance{instance}, master_semaphore{instance}, command_pool{instance, &master_semaphore} { + profiler_scope = reinterpret_cast(std::malloc(sizeof(tracy::VkCtxScope))); AllocateWorkerCommandBuffers(); } -Scheduler::~Scheduler() = default; +Scheduler::~Scheduler() { + std::free(profiler_scope); +} void Scheduler::Flush(vk::Semaphore signal, vk::Semaphore wait) { // When flushing, we only send data to the worker thread; no waiting is necessary. @@ -41,11 +45,18 @@ void Scheduler::AllocateWorkerCommandBuffers() { current_cmdbuf = command_pool.Commit(); current_cmdbuf.begin(begin_info); + + static const auto scope_loc = GPU_SCOPE_LOCATION("Guest Frame", MarkersPallete::GpuMarkerColor); + new (profiler_scope) + tracy::VkCtxScope{instance.GetProfilerContext(), &scope_loc, current_cmdbuf, true}; } void Scheduler::SubmitExecution(vk::Semaphore signal_semaphore, vk::Semaphore wait_semaphore) { const u64 signal_value = master_semaphore.NextTick(); + profiler_scope->~VkCtxScope(); + TracyVkCollect(instance.GetProfilerContext(), current_cmdbuf); + std::scoped_lock lk{submit_mutex}; master_semaphore.SubmitWork(current_cmdbuf, wait_semaphore, signal_semaphore, signal_value); master_semaphore.Refresh(); diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h index fde48824d..284c288a7 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.h +++ b/src/video_core/renderer_vulkan/vk_scheduler.h @@ -54,10 +54,12 @@ private: void SubmitExecution(vk::Semaphore signal_semaphore, vk::Semaphore wait_semaphore); private: + const Instance& instance; MasterSemaphore master_semaphore; CommandPool command_pool; vk::CommandBuffer current_cmdbuf; std::condition_variable_any event_cv; + tracy::VkCtxScope* profiler_scope{}; }; } // namespace Vulkan