tracy: added Vulkan GPU profiling

This commit is contained in:
psucien 2024-06-11 21:52:48 +02:00
parent ce62ae31e5
commit 65a7155cd5
6 changed files with 107 additions and 55 deletions

View file

@ -33,7 +33,7 @@ enum MarkersPallete : int {
EmulatorMarkerColor = 0x264653, EmulatorMarkerColor = 0x264653,
RendererMarkerColor = 0x2a9d8f, RendererMarkerColor = 0x2a9d8f,
HleMarkerColor = 0xe9c46a, HleMarkerColor = 0xe9c46a,
Reserved0 = 0xf4a261, GpuMarkerColor = 0xf4a261,
Reserved1 = 0xe76f51, Reserved1 = 0xe76f51,
}; };
@ -48,4 +48,7 @@ enum MarkersPallete : int {
#define TRACE_CRIT(msg) \ #define TRACE_CRIT(msg) \
[](const auto& msg) { TracyMessageC(msg.c_str(), msg.size(), tracy::Color::HotPink); }(msg) [](const auto& msg) { TracyMessageC(msg.c_str(), msg.size(), tracy::Color::HotPink); }(msg)
#define GPU_SCOPE_LOCATION(name, color) \
tracy::SourceLocationData{name, TracyFunction, TracyFile, (uint32_t)TracyLine, color};
#define FRAME_END FrameMark #define FRAME_END FrameMark

View file

@ -2,6 +2,7 @@
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
#include "common/config.h" #include "common/config.h"
#include "common/debug.h"
#include "common/singleton.h" #include "common/singleton.h"
#include "core/file_format/splash.h" #include "core/file_format/splash.h"
#include "core/libraries/system/systemservice.h" #include "core/libraries/system/systemservice.h"
@ -270,14 +271,50 @@ void RendererVulkan::Present(Frame* frame) {
}; };
const vk::CommandBuffer cmdbuf = frame->cmdbuf; const vk::CommandBuffer cmdbuf = frame->cmdbuf;
cmdbuf.begin(begin_info); cmdbuf.begin(begin_info);
{
TracyVkZoneC(instance.GetProfilerContext(), cmdbuf, "Host frame",
MarkersPallete::GpuMarkerColor);
const vk::Extent2D extent = swapchain.GetExtent(); const vk::Extent2D extent = swapchain.GetExtent();
const std::array pre_barriers{ const std::array pre_barriers{
vk::ImageMemoryBarrier{ vk::ImageMemoryBarrier{
.srcAccessMask = vk::AccessFlagBits::eNone, .srcAccessMask = vk::AccessFlagBits::eNone,
.dstAccessMask = vk::AccessFlagBits::eTransferWrite, .dstAccessMask = vk::AccessFlagBits::eTransferWrite,
.oldLayout = vk::ImageLayout::eUndefined, .oldLayout = vk::ImageLayout::eUndefined,
.newLayout = vk::ImageLayout::eTransferDstOptimal, .newLayout = vk::ImageLayout::eTransferDstOptimal,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = swapchain_image,
.subresourceRange{
.aspectMask = vk::ImageAspectFlagBits::eColor,
.baseMipLevel = 0,
.levelCount = 1,
.baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
},
},
vk::ImageMemoryBarrier{
.srcAccessMask = vk::AccessFlagBits::eColorAttachmentWrite,
.dstAccessMask = vk::AccessFlagBits::eTransferRead,
.oldLayout = vk::ImageLayout::eGeneral,
.newLayout = vk::ImageLayout::eTransferSrcOptimal,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = frame->image,
.subresourceRange{
.aspectMask = vk::ImageAspectFlagBits::eColor,
.baseMipLevel = 0,
.levelCount = 1,
.baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
},
},
};
const vk::ImageMemoryBarrier post_barrier{
.srcAccessMask = vk::AccessFlagBits::eTransferWrite,
.dstAccessMask = vk::AccessFlagBits::eMemoryRead,
.oldLayout = vk::ImageLayout::eTransferDstOptimal,
.newLayout = vk::ImageLayout::ePresentSrcKHR,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = swapchain_image, .image = swapchain_image,
@ -288,54 +325,22 @@ void RendererVulkan::Present(Frame* frame) {
.baseArrayLayer = 0, .baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS, .layerCount = VK_REMAINING_ARRAY_LAYERS,
}, },
}, };
vk::ImageMemoryBarrier{
.srcAccessMask = vk::AccessFlagBits::eColorAttachmentWrite,
.dstAccessMask = vk::AccessFlagBits::eTransferRead,
.oldLayout = vk::ImageLayout::eGeneral,
.newLayout = vk::ImageLayout::eTransferSrcOptimal,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = frame->image,
.subresourceRange{
.aspectMask = vk::ImageAspectFlagBits::eColor,
.baseMipLevel = 0,
.levelCount = 1,
.baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
},
},
};
const vk::ImageMemoryBarrier post_barrier{
.srcAccessMask = vk::AccessFlagBits::eTransferWrite,
.dstAccessMask = vk::AccessFlagBits::eMemoryRead,
.oldLayout = vk::ImageLayout::eTransferDstOptimal,
.newLayout = vk::ImageLayout::ePresentSrcKHR,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = swapchain_image,
.subresourceRange{
.aspectMask = vk::ImageAspectFlagBits::eColor,
.baseMipLevel = 0,
.levelCount = 1,
.baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
},
};
cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eColorAttachmentOutput, cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eColorAttachmentOutput,
vk::PipelineStageFlagBits::eTransfer, vk::DependencyFlagBits::eByRegion, vk::PipelineStageFlagBits::eTransfer,
{}, {}, pre_barriers); vk::DependencyFlagBits::eByRegion, {}, {}, pre_barriers);
cmdbuf.blitImage(frame->image, vk::ImageLayout::eTransferSrcOptimal, swapchain_image, cmdbuf.blitImage(frame->image, vk::ImageLayout::eTransferSrcOptimal, swapchain_image,
vk::ImageLayout::eTransferDstOptimal, vk::ImageLayout::eTransferDstOptimal,
MakeImageBlit(frame->width, frame->height, extent.width, extent.height), MakeImageBlit(frame->width, frame->height, extent.width, extent.height),
vk::Filter::eLinear); vk::Filter::eLinear);
cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands,
vk::PipelineStageFlagBits::eAllCommands,
vk::DependencyFlagBits::eByRegion, {}, {}, post_barrier);
cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands,
vk::PipelineStageFlagBits::eAllCommands,
vk::DependencyFlagBits::eByRegion, {}, {}, post_barrier);
}
TracyVkCollect(instance.GetProfilerContext(), cmdbuf);
cmdbuf.end(); cmdbuf.end();
static constexpr std::array<vk::PipelineStageFlags, 2> wait_stage_masks = { static constexpr std::array<vk::PipelineStageFlags, 2> wait_stage_masks = {

View file

@ -160,6 +160,7 @@ bool Instance::CreateDevice() {
// The next two extensions are required to be available together in order to support write masks // The next two extensions are required to be available together in order to support write masks
color_write_en = add_extension(VK_EXT_COLOR_WRITE_ENABLE_EXTENSION_NAME); color_write_en = add_extension(VK_EXT_COLOR_WRITE_ENABLE_EXTENSION_NAME);
color_write_en &= add_extension(VK_EXT_EXTENDED_DYNAMIC_STATE_3_EXTENSION_NAME); color_write_en &= add_extension(VK_EXT_EXTENDED_DYNAMIC_STATE_3_EXTENSION_NAME);
const auto calibrated_timestamps = add_extension(VK_EXT_CALIBRATED_TIMESTAMPS_EXTENSION_NAME);
const auto family_properties = physical_device.getQueueFamilyProperties(); const auto family_properties = physical_device.getQueueFamilyProperties();
if (family_properties.empty()) { if (family_properties.empty()) {
@ -212,6 +213,7 @@ bool Instance::CreateDevice() {
}, },
vk::PhysicalDeviceVulkan12Features{ vk::PhysicalDeviceVulkan12Features{
.scalarBlockLayout = true, .scalarBlockLayout = true,
.hostQueryReset = true,
.timelineSemaphore = true, .timelineSemaphore = true,
}, },
vk::PhysicalDeviceVulkan13Features{ vk::PhysicalDeviceVulkan13Features{
@ -251,6 +253,27 @@ bool Instance::CreateDevice() {
graphics_queue = device->getQueue(queue_family_index, 0); graphics_queue = device->getQueue(queue_family_index, 0);
present_queue = device->getQueue(queue_family_index, 0); present_queue = device->getQueue(queue_family_index, 0);
if (calibrated_timestamps) {
const auto& time_domains = physical_device.getCalibrateableTimeDomainsEXT();
#if _WIN64
const bool has_host_time_domain =
std::find(time_domains.cbegin(), time_domains.cend(),
vk::TimeDomainEXT::eQueryPerformanceCounter) != time_domains.cend();
#else
const bool has_host_time_domain =
std::find(time_domains.cbegin(), time_domains.cend(),
vk::TimeDomainEXT::eClockMonotonicRaw) != time_domains.cend();
#endif
if (has_host_time_domain) {
static constexpr std::string_view context_name{"vk_rasterizer"};
profiler_context =
TracyVkContextHostCalibrated(*instance, physical_device, *device,
VULKAN_HPP_DEFAULT_DISPATCHER.vkGetInstanceProcAddr,
VULKAN_HPP_DEFAULT_DISPATCHER.vkGetDeviceProcAddr);
TracyVkContextName(profiler_context, context_name.data(), context_name.size());
}
}
CreateAllocator(); CreateAllocator();
return true; return true;
} }

View file

@ -7,6 +7,9 @@
#include "video_core/renderer_vulkan/vk_platform.h" #include "video_core/renderer_vulkan/vk_platform.h"
#define TRACY_VK_USE_SYMBOL_TABLE
#include <tracy/TracyVulkan.hpp>
namespace Frontend { namespace Frontend {
class WindowSDL; class WindowSDL;
} }
@ -67,6 +70,10 @@ public:
return present_queue; return present_queue;
} }
TracyVkCtx GetProfilerContext() const {
return profiler_context;
}
/// Returns true when a known debugging tool is attached. /// Returns true when a known debugging tool is attached.
bool HasDebuggingToolAttached() const { bool HasDebuggingToolAttached() const {
return has_renderdoc || has_nsight_graphics; return has_renderdoc || has_nsight_graphics;
@ -208,6 +215,7 @@ private:
vk::Queue graphics_queue; vk::Queue graphics_queue;
std::vector<vk::PhysicalDevice> physical_devices; std::vector<vk::PhysicalDevice> physical_devices;
std::vector<std::string> available_extensions; std::vector<std::string> available_extensions;
TracyVkCtx profiler_context{};
u32 queue_family_index{0}; u32 queue_family_index{0};
bool image_view_reinterpretation{true}; bool image_view_reinterpretation{true};
bool timeline_semaphores{}; bool timeline_semaphores{};

View file

@ -2,17 +2,21 @@
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
#include <mutex> #include <mutex>
#include "common/debug.h"
#include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_scheduler.h"
namespace Vulkan { namespace Vulkan {
Scheduler::Scheduler(const Instance& instance) Scheduler::Scheduler(const Instance& instance)
: master_semaphore{instance}, command_pool{instance, &master_semaphore} { : instance{instance}, master_semaphore{instance}, command_pool{instance, &master_semaphore} {
profiler_scope = reinterpret_cast<tracy::VkCtxScope*>(std::malloc(sizeof(tracy::VkCtxScope)));
AllocateWorkerCommandBuffers(); AllocateWorkerCommandBuffers();
} }
Scheduler::~Scheduler() = default; Scheduler::~Scheduler() {
std::free(profiler_scope);
}
void Scheduler::Flush(vk::Semaphore signal, vk::Semaphore wait) { void Scheduler::Flush(vk::Semaphore signal, vk::Semaphore wait) {
// When flushing, we only send data to the worker thread; no waiting is necessary. // When flushing, we only send data to the worker thread; no waiting is necessary.
@ -41,11 +45,18 @@ void Scheduler::AllocateWorkerCommandBuffers() {
current_cmdbuf = command_pool.Commit(); current_cmdbuf = command_pool.Commit();
current_cmdbuf.begin(begin_info); current_cmdbuf.begin(begin_info);
static const auto scope_loc = GPU_SCOPE_LOCATION("Guest Frame", MarkersPallete::GpuMarkerColor);
new (profiler_scope)
tracy::VkCtxScope{instance.GetProfilerContext(), &scope_loc, current_cmdbuf, true};
} }
void Scheduler::SubmitExecution(vk::Semaphore signal_semaphore, vk::Semaphore wait_semaphore) { void Scheduler::SubmitExecution(vk::Semaphore signal_semaphore, vk::Semaphore wait_semaphore) {
const u64 signal_value = master_semaphore.NextTick(); const u64 signal_value = master_semaphore.NextTick();
profiler_scope->~VkCtxScope();
TracyVkCollect(instance.GetProfilerContext(), current_cmdbuf);
std::scoped_lock lk{submit_mutex}; std::scoped_lock lk{submit_mutex};
master_semaphore.SubmitWork(current_cmdbuf, wait_semaphore, signal_semaphore, signal_value); master_semaphore.SubmitWork(current_cmdbuf, wait_semaphore, signal_semaphore, signal_value);
master_semaphore.Refresh(); master_semaphore.Refresh();

View file

@ -54,10 +54,12 @@ private:
void SubmitExecution(vk::Semaphore signal_semaphore, vk::Semaphore wait_semaphore); void SubmitExecution(vk::Semaphore signal_semaphore, vk::Semaphore wait_semaphore);
private: private:
const Instance& instance;
MasterSemaphore master_semaphore; MasterSemaphore master_semaphore;
CommandPool command_pool; CommandPool command_pool;
vk::CommandBuffer current_cmdbuf; vk::CommandBuffer current_cmdbuf;
std::condition_variable_any event_cv; std::condition_variable_any event_cv;
tracy::VkCtxScope* profiler_scope{};
}; };
} // namespace Vulkan } // namespace Vulkan