mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2024-12-29 11:06:07 +00:00
Merge pull request #184 from shadps4-emu/externals/tracy
Tracy profiler
This commit is contained in:
commit
400d910743
3
.gitmodules
vendored
3
.gitmodules
vendored
|
@ -55,3 +55,6 @@
|
||||||
[submodule "externals/xxhash"]
|
[submodule "externals/xxhash"]
|
||||||
path = externals/xxhash
|
path = externals/xxhash
|
||||||
url = https://github.com/Cyan4973/xxHash.git
|
url = https://github.com/Cyan4973/xxHash.git
|
||||||
|
[submodule "externals/tracy"]
|
||||||
|
path = externals/tracy
|
||||||
|
url = https://github.com/shadps4-emu/tracy
|
||||||
|
|
|
@ -34,5 +34,6 @@ Files: CMakeSettings.json
|
||||||
src/shadps4.rc
|
src/shadps4.rc
|
||||||
src/shadps4.qrc
|
src/shadps4.qrc
|
||||||
externals/stb_image.h
|
externals/stb_image.h
|
||||||
|
externals/tracy/*
|
||||||
Copyright: shadPS4 Emulator Project
|
Copyright: shadPS4 Emulator Project
|
||||||
License: GPL-2.0-or-later
|
License: GPL-2.0-or-later
|
||||||
|
|
|
@ -500,7 +500,7 @@ endif()
|
||||||
|
|
||||||
create_target_directory_groups(shadps4)
|
create_target_directory_groups(shadps4)
|
||||||
|
|
||||||
target_link_libraries(shadps4 PRIVATE magic_enum::magic_enum fmt::fmt toml11::toml11 tsl::robin_map xbyak)
|
target_link_libraries(shadps4 PRIVATE magic_enum::magic_enum fmt::fmt toml11::toml11 tsl::robin_map xbyak Tracy::TracyClient)
|
||||||
target_link_libraries(shadps4 PRIVATE discord-rpc boost vma sirit vulkan-headers xxhash Zydis SPIRV glslang SDL3-shared)
|
target_link_libraries(shadps4 PRIVATE discord-rpc boost vma sirit vulkan-headers xxhash Zydis SPIRV glslang SDL3-shared)
|
||||||
|
|
||||||
if (NOT ENABLE_QT_GUI)
|
if (NOT ENABLE_QT_GUI)
|
||||||
|
|
8
externals/CMakeLists.txt
vendored
8
externals/CMakeLists.txt
vendored
|
@ -93,3 +93,11 @@ add_subdirectory(sirit EXCLUDE_FROM_ALL)
|
||||||
if (WIN32)
|
if (WIN32)
|
||||||
target_compile_options(sirit PUBLIC "-Wno-error=unused-command-line-argument")
|
target_compile_options(sirit PUBLIC "-Wno-error=unused-command-line-argument")
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
# Tracy
|
||||||
|
option(TRACY_ENABLE "" ON)
|
||||||
|
option(TRACY_NO_CRASH_HANDLER "" ON) # Otherwise texture cache exceptions will be treaten as a crash
|
||||||
|
option(TRACY_ON_DEMAND "" ON)
|
||||||
|
option(TRACY_NO_FRAME_IMAGE "" ON)
|
||||||
|
option(TRACY_FIBERS "" ON) # For AmdGpu frontend profiling
|
||||||
|
add_subdirectory(tracy EXCLUDE_FROM_ALL)
|
||||||
|
|
1
externals/tracy
vendored
Submodule
1
externals/tracy
vendored
Submodule
|
@ -0,0 +1 @@
|
||||||
|
Subproject commit c6d779d78508514102fbe1b8eb28bda10d95bb2a
|
|
@ -10,3 +10,45 @@
|
||||||
#else
|
#else
|
||||||
#error What the fuck is this compiler
|
#error What the fuck is this compiler
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#include <tracy/Tracy.hpp>
|
||||||
|
|
||||||
|
static inline bool IsProfilerConnected() {
|
||||||
|
return tracy::GetProfiler().IsConnected();
|
||||||
|
}
|
||||||
|
|
||||||
|
#define CUSTOM_LOCK(type, varname) \
|
||||||
|
tracy::LockableCtx varname { \
|
||||||
|
[]() -> const tracy::SourceLocationData* { \
|
||||||
|
static constexpr tracy::SourceLocationData srcloc{nullptr, #type " " #varname, \
|
||||||
|
TracyFile, TracyLine, 0}; \
|
||||||
|
return &srcloc; \
|
||||||
|
}() \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define TRACK_ALLOC(ptr, size, pool) TracyAllocN(std::bit_cast<void*>(ptr), (size), (pool))
|
||||||
|
#define TRACK_FREE(ptr, pool) TracyFreeN(std::bit_cast<void*>(ptr), (pool))
|
||||||
|
|
||||||
|
enum MarkersPallete : int {
|
||||||
|
EmulatorMarkerColor = 0x264653,
|
||||||
|
RendererMarkerColor = 0x2a9d8f,
|
||||||
|
HleMarkerColor = 0xe9c46a,
|
||||||
|
GpuMarkerColor = 0xf4a261,
|
||||||
|
Reserved1 = 0xe76f51,
|
||||||
|
};
|
||||||
|
|
||||||
|
#define EMULATOR_TRACE ZoneScopedC(EmulatorMarkerColor)
|
||||||
|
#define RENDERER_TRACE ZoneScopedC(RendererMarkerColor)
|
||||||
|
#define HLE_TRACE ZoneScopedC(HleMarkerColor)
|
||||||
|
|
||||||
|
#define TRACE_WARN(msg) \
|
||||||
|
[](const auto& msg) { TracyMessageC(msg.c_str(), msg.size(), tracy::Color::DarkOrange); }(msg);
|
||||||
|
#define TRACE_ERROR(msg) \
|
||||||
|
[](const auto& msg) { TracyMessageC(msg.c_str(), msg.size(), tracy::Color::Red); }(msg)
|
||||||
|
#define TRACE_CRIT(msg) \
|
||||||
|
[](const auto& msg) { TracyMessageC(msg.c_str(), msg.size(), tracy::Color::HotPink); }(msg)
|
||||||
|
|
||||||
|
#define GPU_SCOPE_LOCATION(name, color) \
|
||||||
|
tracy::SourceLocationData{name, TracyFunction, TracyFile, (uint32_t)TracyLine, color};
|
||||||
|
|
||||||
|
#define FRAME_END FrameMark
|
||||||
|
|
|
@ -13,6 +13,7 @@
|
||||||
|
|
||||||
#include "common/bounded_threadsafe_queue.h"
|
#include "common/bounded_threadsafe_queue.h"
|
||||||
#include "common/config.h"
|
#include "common/config.h"
|
||||||
|
#include "common/debug.h"
|
||||||
#include "common/io_file.h"
|
#include "common/io_file.h"
|
||||||
#include "common/logging/backend.h"
|
#include "common/logging/backend.h"
|
||||||
#include "common/logging/log.h"
|
#include "common/logging/log.h"
|
||||||
|
@ -167,6 +168,24 @@ public:
|
||||||
|
|
||||||
void PushEntry(Class log_class, Level log_level, const char* filename, unsigned int line_num,
|
void PushEntry(Class log_class, Level log_level, const char* filename, unsigned int line_num,
|
||||||
const char* function, std::string message) {
|
const char* function, std::string message) {
|
||||||
|
// Propagate important log messages to the profiler
|
||||||
|
if (IsProfilerConnected()) {
|
||||||
|
const auto& msg_str = std::format("[{}] {}", GetLogClassName(log_class), message);
|
||||||
|
switch (log_level) {
|
||||||
|
case Level::Warning:
|
||||||
|
TRACE_WARN(msg_str);
|
||||||
|
break;
|
||||||
|
case Level::Error:
|
||||||
|
TRACE_ERROR(msg_str);
|
||||||
|
break;
|
||||||
|
case Level::Critical:
|
||||||
|
TRACE_CRIT(msg_str);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (!filter.CheckMessage(log_class, log_level)) {
|
if (!filter.CheckMessage(log_class, log_level)) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
|
@ -3,6 +3,7 @@
|
||||||
|
|
||||||
#include "common/alignment.h"
|
#include "common/alignment.h"
|
||||||
#include "common/assert.h"
|
#include "common/assert.h"
|
||||||
|
#include "common/debug.h"
|
||||||
#include "common/scope_exit.h"
|
#include "common/scope_exit.h"
|
||||||
#include "core/libraries/error_codes.h"
|
#include "core/libraries/error_codes.h"
|
||||||
#include "core/libraries/kernel/memory_management.h"
|
#include "core/libraries/kernel/memory_management.h"
|
||||||
|
@ -123,6 +124,7 @@ int MemoryManager::MapMemory(void** out_addr, VAddr virtual_addr, size_t size, M
|
||||||
|
|
||||||
// Perform the mapping.
|
// Perform the mapping.
|
||||||
*out_addr = impl.Map(mapped_addr, size, alignment, phys_addr, is_exec);
|
*out_addr = impl.Map(mapped_addr, size, alignment, phys_addr, is_exec);
|
||||||
|
TRACK_ALLOC(*out_addr, size, "VMEM");
|
||||||
return ORBIS_OK;
|
return ORBIS_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -149,6 +151,7 @@ void MemoryManager::UnmapMemory(VAddr virtual_addr, size_t size) {
|
||||||
|
|
||||||
// Unmap the memory region.
|
// Unmap the memory region.
|
||||||
impl.Unmap(virtual_addr, size, phys_addr);
|
impl.Unmap(virtual_addr, size, phys_addr);
|
||||||
|
TRACK_FREE(virtual_addr, "VMEM");
|
||||||
}
|
}
|
||||||
|
|
||||||
int MemoryManager::QueryProtection(VAddr addr, void** start, void** end, u32* prot) {
|
int MemoryManager::QueryProtection(VAddr addr, void** start, void** end, u32* prot) {
|
||||||
|
|
|
@ -8,6 +8,7 @@
|
||||||
#include <core/libraries/videoout/video_out.h>
|
#include <core/libraries/videoout/video_out.h>
|
||||||
#include <fmt/core.h>
|
#include <fmt/core.h>
|
||||||
#include "common/config.h"
|
#include "common/config.h"
|
||||||
|
#include "common/debug.h"
|
||||||
#include "common/logging/backend.h"
|
#include "common/logging/backend.h"
|
||||||
#include "common/path_util.h"
|
#include "common/path_util.h"
|
||||||
#include "common/singleton.h"
|
#include "common/singleton.h"
|
||||||
|
@ -121,6 +122,7 @@ void Emulator::Run(const std::filesystem::path& file) {
|
||||||
window.waitEvent();
|
window.waitEvent();
|
||||||
Libraries::VideoOut::Flip(FlipPeriod);
|
Libraries::VideoOut::Flip(FlipPeriod);
|
||||||
Libraries::VideoOut::Vblank();
|
Libraries::VideoOut::Vblank();
|
||||||
|
FRAME_END;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::exit(0);
|
std::exit(0);
|
||||||
|
|
|
@ -2,6 +2,7 @@
|
||||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
#include "common/assert.h"
|
#include "common/assert.h"
|
||||||
|
#include "common/debug.h"
|
||||||
#include "common/thread.h"
|
#include "common/thread.h"
|
||||||
#include "video_core/amdgpu/liverpool.h"
|
#include "video_core/amdgpu/liverpool.h"
|
||||||
#include "video_core/amdgpu/pm4_cmds.h"
|
#include "video_core/amdgpu/pm4_cmds.h"
|
||||||
|
@ -9,6 +10,10 @@
|
||||||
|
|
||||||
namespace AmdGpu {
|
namespace AmdGpu {
|
||||||
|
|
||||||
|
static const char* dcb_task_name{"DCB_TASK"};
|
||||||
|
static const char* ccb_task_name{"CCB_TASK"};
|
||||||
|
static const char* asc_task_name{"ACB_TASK"};
|
||||||
|
|
||||||
std::array<u8, 48_KB> Liverpool::ConstantEngine::constants_heap;
|
std::array<u8, 48_KB> Liverpool::ConstantEngine::constants_heap;
|
||||||
|
|
||||||
Liverpool::Liverpool() {
|
Liverpool::Liverpool() {
|
||||||
|
@ -69,12 +74,16 @@ void Liverpool::Process(std::stop_token stoken) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void Liverpool::WaitGpuIdle() {
|
void Liverpool::WaitGpuIdle() {
|
||||||
|
RENDERER_TRACE;
|
||||||
|
|
||||||
while (const auto old = num_submits.load()) {
|
while (const auto old = num_submits.load()) {
|
||||||
num_submits.wait(old);
|
num_submits.wait(old);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Liverpool::Task Liverpool::ProcessCeUpdate(std::span<const u32> ccb) {
|
Liverpool::Task Liverpool::ProcessCeUpdate(std::span<const u32> ccb) {
|
||||||
|
TracyFiberEnter(ccb_task_name);
|
||||||
|
|
||||||
while (!ccb.empty()) {
|
while (!ccb.empty()) {
|
||||||
const auto* header = reinterpret_cast<const PM4Header*>(ccb.data());
|
const auto* header = reinterpret_cast<const PM4Header*>(ccb.data());
|
||||||
const u32 type = header->type;
|
const u32 type = header->type;
|
||||||
|
@ -109,7 +118,9 @@ Liverpool::Task Liverpool::ProcessCeUpdate(std::span<const u32> ccb) {
|
||||||
case PM4ItOpcode::WaitOnDeCounterDiff: {
|
case PM4ItOpcode::WaitOnDeCounterDiff: {
|
||||||
const auto diff = it_body[0];
|
const auto diff = it_body[0];
|
||||||
while ((cblock.de_count - cblock.ce_count) >= diff) {
|
while ((cblock.de_count - cblock.ce_count) >= diff) {
|
||||||
|
TracyFiberLeave;
|
||||||
co_yield {};
|
co_yield {};
|
||||||
|
TracyFiberEnter(ccb_task_name);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -120,9 +131,13 @@ Liverpool::Task Liverpool::ProcessCeUpdate(std::span<const u32> ccb) {
|
||||||
}
|
}
|
||||||
ccb = ccb.subspan(header->type3.NumWords() + 1);
|
ccb = ccb.subspan(header->type3.NumWords() + 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TracyFiberLeave;
|
||||||
}
|
}
|
||||||
|
|
||||||
Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<const u32> ccb) {
|
Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<const u32> ccb) {
|
||||||
|
TracyFiberEnter(dcb_task_name);
|
||||||
|
|
||||||
cblock.Reset();
|
cblock.Reset();
|
||||||
|
|
||||||
// TODO: potentially, ASCs also can depend on CE and in this case the
|
// TODO: potentially, ASCs also can depend on CE and in this case the
|
||||||
|
@ -132,7 +147,9 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
|
||||||
if (!ccb.empty()) {
|
if (!ccb.empty()) {
|
||||||
// In case of CCB provided kick off CE asap to have the constant heap ready to use
|
// In case of CCB provided kick off CE asap to have the constant heap ready to use
|
||||||
ce_task = ProcessCeUpdate(ccb);
|
ce_task = ProcessCeUpdate(ccb);
|
||||||
|
TracyFiberLeave;
|
||||||
ce_task.handle.resume();
|
ce_task.handle.resume();
|
||||||
|
TracyFiberEnter(dcb_task_name);
|
||||||
}
|
}
|
||||||
|
|
||||||
while (!dcb.empty()) {
|
while (!dcb.empty()) {
|
||||||
|
@ -330,7 +347,9 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
|
||||||
const auto* wait_reg_mem = reinterpret_cast<const PM4CmdWaitRegMem*>(header);
|
const auto* wait_reg_mem = reinterpret_cast<const PM4CmdWaitRegMem*>(header);
|
||||||
ASSERT(wait_reg_mem->engine.Value() == PM4CmdWaitRegMem::Engine::Me);
|
ASSERT(wait_reg_mem->engine.Value() == PM4CmdWaitRegMem::Engine::Me);
|
||||||
while (!wait_reg_mem->Test()) {
|
while (!wait_reg_mem->Test()) {
|
||||||
|
TracyFiberLeave;
|
||||||
co_yield {};
|
co_yield {};
|
||||||
|
TracyFiberEnter(dcb_task_name);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -340,7 +359,9 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
|
||||||
}
|
}
|
||||||
case PM4ItOpcode::WaitOnCeCounter: {
|
case PM4ItOpcode::WaitOnCeCounter: {
|
||||||
while (cblock.ce_count <= cblock.de_count) {
|
while (cblock.ce_count <= cblock.de_count) {
|
||||||
|
TracyFiberLeave;
|
||||||
ce_task.handle.resume();
|
ce_task.handle.resume();
|
||||||
|
TracyFiberEnter(dcb_task_name);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -356,6 +377,8 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
|
||||||
ASSERT_MSG(ce_task.handle.done(), "Partially processed CCB");
|
ASSERT_MSG(ce_task.handle.done(), "Partially processed CCB");
|
||||||
ce_task.handle.destroy();
|
ce_task.handle.destroy();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TracyFiberLeave;
|
||||||
}
|
}
|
||||||
|
|
||||||
Liverpool::Task Liverpool::ProcessCompute(std::span<const u32> acb) {
|
Liverpool::Task Liverpool::ProcessCompute(std::span<const u32> acb) {
|
||||||
|
|
|
@ -2,6 +2,7 @@
|
||||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
#include "common/config.h"
|
#include "common/config.h"
|
||||||
|
#include "common/debug.h"
|
||||||
#include "common/singleton.h"
|
#include "common/singleton.h"
|
||||||
#include "core/file_format/splash.h"
|
#include "core/file_format/splash.h"
|
||||||
#include "core/libraries/system/systemservice.h"
|
#include "core/libraries/system/systemservice.h"
|
||||||
|
@ -270,14 +271,51 @@ void RendererVulkan::Present(Frame* frame) {
|
||||||
};
|
};
|
||||||
const vk::CommandBuffer cmdbuf = frame->cmdbuf;
|
const vk::CommandBuffer cmdbuf = frame->cmdbuf;
|
||||||
cmdbuf.begin(begin_info);
|
cmdbuf.begin(begin_info);
|
||||||
|
{
|
||||||
|
auto* profiler_ctx = instance.GetProfilerContext();
|
||||||
|
TracyVkNamedZoneC(profiler_ctx, renderer_gpu_zone, cmdbuf, "Host frame",
|
||||||
|
MarkersPallete::GpuMarkerColor, profiler_ctx != nullptr);
|
||||||
|
|
||||||
const vk::Extent2D extent = swapchain.GetExtent();
|
const vk::Extent2D extent = swapchain.GetExtent();
|
||||||
const std::array pre_barriers{
|
const std::array pre_barriers{
|
||||||
vk::ImageMemoryBarrier{
|
vk::ImageMemoryBarrier{
|
||||||
.srcAccessMask = vk::AccessFlagBits::eNone,
|
.srcAccessMask = vk::AccessFlagBits::eNone,
|
||||||
.dstAccessMask = vk::AccessFlagBits::eTransferWrite,
|
.dstAccessMask = vk::AccessFlagBits::eTransferWrite,
|
||||||
.oldLayout = vk::ImageLayout::eUndefined,
|
.oldLayout = vk::ImageLayout::eUndefined,
|
||||||
.newLayout = vk::ImageLayout::eTransferDstOptimal,
|
.newLayout = vk::ImageLayout::eTransferDstOptimal,
|
||||||
|
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||||
|
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||||
|
.image = swapchain_image,
|
||||||
|
.subresourceRange{
|
||||||
|
.aspectMask = vk::ImageAspectFlagBits::eColor,
|
||||||
|
.baseMipLevel = 0,
|
||||||
|
.levelCount = 1,
|
||||||
|
.baseArrayLayer = 0,
|
||||||
|
.layerCount = VK_REMAINING_ARRAY_LAYERS,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
vk::ImageMemoryBarrier{
|
||||||
|
.srcAccessMask = vk::AccessFlagBits::eColorAttachmentWrite,
|
||||||
|
.dstAccessMask = vk::AccessFlagBits::eTransferRead,
|
||||||
|
.oldLayout = vk::ImageLayout::eGeneral,
|
||||||
|
.newLayout = vk::ImageLayout::eTransferSrcOptimal,
|
||||||
|
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||||
|
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||||
|
.image = frame->image,
|
||||||
|
.subresourceRange{
|
||||||
|
.aspectMask = vk::ImageAspectFlagBits::eColor,
|
||||||
|
.baseMipLevel = 0,
|
||||||
|
.levelCount = 1,
|
||||||
|
.baseArrayLayer = 0,
|
||||||
|
.layerCount = VK_REMAINING_ARRAY_LAYERS,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
};
|
||||||
|
const vk::ImageMemoryBarrier post_barrier{
|
||||||
|
.srcAccessMask = vk::AccessFlagBits::eTransferWrite,
|
||||||
|
.dstAccessMask = vk::AccessFlagBits::eMemoryRead,
|
||||||
|
.oldLayout = vk::ImageLayout::eTransferDstOptimal,
|
||||||
|
.newLayout = vk::ImageLayout::ePresentSrcKHR,
|
||||||
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||||
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||||
.image = swapchain_image,
|
.image = swapchain_image,
|
||||||
|
@ -288,54 +326,25 @@ void RendererVulkan::Present(Frame* frame) {
|
||||||
.baseArrayLayer = 0,
|
.baseArrayLayer = 0,
|
||||||
.layerCount = VK_REMAINING_ARRAY_LAYERS,
|
.layerCount = VK_REMAINING_ARRAY_LAYERS,
|
||||||
},
|
},
|
||||||
},
|
};
|
||||||
vk::ImageMemoryBarrier{
|
|
||||||
.srcAccessMask = vk::AccessFlagBits::eColorAttachmentWrite,
|
|
||||||
.dstAccessMask = vk::AccessFlagBits::eTransferRead,
|
|
||||||
.oldLayout = vk::ImageLayout::eGeneral,
|
|
||||||
.newLayout = vk::ImageLayout::eTransferSrcOptimal,
|
|
||||||
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
|
||||||
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
|
||||||
.image = frame->image,
|
|
||||||
.subresourceRange{
|
|
||||||
.aspectMask = vk::ImageAspectFlagBits::eColor,
|
|
||||||
.baseMipLevel = 0,
|
|
||||||
.levelCount = 1,
|
|
||||||
.baseArrayLayer = 0,
|
|
||||||
.layerCount = VK_REMAINING_ARRAY_LAYERS,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
};
|
|
||||||
const vk::ImageMemoryBarrier post_barrier{
|
|
||||||
.srcAccessMask = vk::AccessFlagBits::eTransferWrite,
|
|
||||||
.dstAccessMask = vk::AccessFlagBits::eMemoryRead,
|
|
||||||
.oldLayout = vk::ImageLayout::eTransferDstOptimal,
|
|
||||||
.newLayout = vk::ImageLayout::ePresentSrcKHR,
|
|
||||||
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
|
||||||
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
|
||||||
.image = swapchain_image,
|
|
||||||
.subresourceRange{
|
|
||||||
.aspectMask = vk::ImageAspectFlagBits::eColor,
|
|
||||||
.baseMipLevel = 0,
|
|
||||||
.levelCount = 1,
|
|
||||||
.baseArrayLayer = 0,
|
|
||||||
.layerCount = VK_REMAINING_ARRAY_LAYERS,
|
|
||||||
},
|
|
||||||
};
|
|
||||||
|
|
||||||
cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eColorAttachmentOutput,
|
cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eColorAttachmentOutput,
|
||||||
vk::PipelineStageFlagBits::eTransfer, vk::DependencyFlagBits::eByRegion,
|
vk::PipelineStageFlagBits::eTransfer,
|
||||||
{}, {}, pre_barriers);
|
vk::DependencyFlagBits::eByRegion, {}, {}, pre_barriers);
|
||||||
|
|
||||||
cmdbuf.blitImage(frame->image, vk::ImageLayout::eTransferSrcOptimal, swapchain_image,
|
cmdbuf.blitImage(frame->image, vk::ImageLayout::eTransferSrcOptimal, swapchain_image,
|
||||||
vk::ImageLayout::eTransferDstOptimal,
|
vk::ImageLayout::eTransferDstOptimal,
|
||||||
MakeImageBlit(frame->width, frame->height, extent.width, extent.height),
|
MakeImageBlit(frame->width, frame->height, extent.width, extent.height),
|
||||||
vk::Filter::eLinear);
|
vk::Filter::eLinear);
|
||||||
|
|
||||||
cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands,
|
cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands,
|
||||||
vk::PipelineStageFlagBits::eAllCommands,
|
vk::PipelineStageFlagBits::eAllCommands,
|
||||||
vk::DependencyFlagBits::eByRegion, {}, {}, post_barrier);
|
vk::DependencyFlagBits::eByRegion, {}, {}, post_barrier);
|
||||||
|
|
||||||
|
if (profiler_ctx) {
|
||||||
|
TracyVkCollect(profiler_ctx, cmdbuf);
|
||||||
|
}
|
||||||
|
}
|
||||||
cmdbuf.end();
|
cmdbuf.end();
|
||||||
|
|
||||||
static constexpr std::array<vk::PipelineStageFlags, 2> wait_stage_masks = {
|
static constexpr std::array<vk::PipelineStageFlags, 2> wait_stage_masks = {
|
||||||
|
|
|
@ -160,6 +160,7 @@ bool Instance::CreateDevice() {
|
||||||
// The next two extensions are required to be available together in order to support write masks
|
// The next two extensions are required to be available together in order to support write masks
|
||||||
color_write_en = add_extension(VK_EXT_COLOR_WRITE_ENABLE_EXTENSION_NAME);
|
color_write_en = add_extension(VK_EXT_COLOR_WRITE_ENABLE_EXTENSION_NAME);
|
||||||
color_write_en &= add_extension(VK_EXT_EXTENDED_DYNAMIC_STATE_3_EXTENSION_NAME);
|
color_write_en &= add_extension(VK_EXT_EXTENDED_DYNAMIC_STATE_3_EXTENSION_NAME);
|
||||||
|
const auto calibrated_timestamps = add_extension(VK_EXT_CALIBRATED_TIMESTAMPS_EXTENSION_NAME);
|
||||||
|
|
||||||
const auto family_properties = physical_device.getQueueFamilyProperties();
|
const auto family_properties = physical_device.getQueueFamilyProperties();
|
||||||
if (family_properties.empty()) {
|
if (family_properties.empty()) {
|
||||||
|
@ -212,6 +213,7 @@ bool Instance::CreateDevice() {
|
||||||
},
|
},
|
||||||
vk::PhysicalDeviceVulkan12Features{
|
vk::PhysicalDeviceVulkan12Features{
|
||||||
.scalarBlockLayout = true,
|
.scalarBlockLayout = true,
|
||||||
|
.hostQueryReset = true,
|
||||||
.timelineSemaphore = true,
|
.timelineSemaphore = true,
|
||||||
},
|
},
|
||||||
vk::PhysicalDeviceVulkan13Features{
|
vk::PhysicalDeviceVulkan13Features{
|
||||||
|
@ -251,6 +253,27 @@ bool Instance::CreateDevice() {
|
||||||
graphics_queue = device->getQueue(queue_family_index, 0);
|
graphics_queue = device->getQueue(queue_family_index, 0);
|
||||||
present_queue = device->getQueue(queue_family_index, 0);
|
present_queue = device->getQueue(queue_family_index, 0);
|
||||||
|
|
||||||
|
if (calibrated_timestamps) {
|
||||||
|
const auto& time_domains = physical_device.getCalibrateableTimeDomainsEXT();
|
||||||
|
#if _WIN64
|
||||||
|
const bool has_host_time_domain =
|
||||||
|
std::find(time_domains.cbegin(), time_domains.cend(),
|
||||||
|
vk::TimeDomainEXT::eQueryPerformanceCounter) != time_domains.cend();
|
||||||
|
#else
|
||||||
|
const bool has_host_time_domain =
|
||||||
|
std::find(time_domains.cbegin(), time_domains.cend(),
|
||||||
|
vk::TimeDomainEXT::eClockMonotonicRaw) != time_domains.cend();
|
||||||
|
#endif
|
||||||
|
if (has_host_time_domain) {
|
||||||
|
static constexpr std::string_view context_name{"vk_rasterizer"};
|
||||||
|
profiler_context =
|
||||||
|
TracyVkContextHostCalibrated(*instance, physical_device, *device,
|
||||||
|
VULKAN_HPP_DEFAULT_DISPATCHER.vkGetInstanceProcAddr,
|
||||||
|
VULKAN_HPP_DEFAULT_DISPATCHER.vkGetDeviceProcAddr);
|
||||||
|
TracyVkContextName(profiler_context, context_name.data(), context_name.size());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
CreateAllocator();
|
CreateAllocator();
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
|
@ -7,6 +7,9 @@
|
||||||
|
|
||||||
#include "video_core/renderer_vulkan/vk_platform.h"
|
#include "video_core/renderer_vulkan/vk_platform.h"
|
||||||
|
|
||||||
|
#define TRACY_VK_USE_SYMBOL_TABLE
|
||||||
|
#include <tracy/TracyVulkan.hpp>
|
||||||
|
|
||||||
namespace Frontend {
|
namespace Frontend {
|
||||||
class WindowSDL;
|
class WindowSDL;
|
||||||
}
|
}
|
||||||
|
@ -67,6 +70,10 @@ public:
|
||||||
return present_queue;
|
return present_queue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TracyVkCtx GetProfilerContext() const {
|
||||||
|
return profiler_context;
|
||||||
|
}
|
||||||
|
|
||||||
/// Returns true when a known debugging tool is attached.
|
/// Returns true when a known debugging tool is attached.
|
||||||
bool HasDebuggingToolAttached() const {
|
bool HasDebuggingToolAttached() const {
|
||||||
return has_renderdoc || has_nsight_graphics;
|
return has_renderdoc || has_nsight_graphics;
|
||||||
|
@ -208,6 +215,7 @@ private:
|
||||||
vk::Queue graphics_queue;
|
vk::Queue graphics_queue;
|
||||||
std::vector<vk::PhysicalDevice> physical_devices;
|
std::vector<vk::PhysicalDevice> physical_devices;
|
||||||
std::vector<std::string> available_extensions;
|
std::vector<std::string> available_extensions;
|
||||||
|
TracyVkCtx profiler_context{};
|
||||||
u32 queue_family_index{0};
|
u32 queue_family_index{0};
|
||||||
bool image_view_reinterpretation{true};
|
bool image_view_reinterpretation{true};
|
||||||
bool timeline_semaphores{};
|
bool timeline_semaphores{};
|
||||||
|
|
|
@ -2,6 +2,7 @@
|
||||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
#include "common/config.h"
|
#include "common/config.h"
|
||||||
|
#include "common/debug.h"
|
||||||
#include "core/memory.h"
|
#include "core/memory.h"
|
||||||
#include "video_core/amdgpu/liverpool.h"
|
#include "video_core/amdgpu/liverpool.h"
|
||||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||||
|
@ -33,6 +34,8 @@ Rasterizer::Rasterizer(const Instance& instance_, Scheduler& scheduler_,
|
||||||
Rasterizer::~Rasterizer() = default;
|
Rasterizer::~Rasterizer() = default;
|
||||||
|
|
||||||
void Rasterizer::Draw(bool is_indexed, u32 index_offset) {
|
void Rasterizer::Draw(bool is_indexed, u32 index_offset) {
|
||||||
|
RENDERER_TRACE;
|
||||||
|
|
||||||
const auto cmdbuf = scheduler.CommandBuffer();
|
const auto cmdbuf = scheduler.CommandBuffer();
|
||||||
const auto& regs = liverpool->regs;
|
const auto& regs = liverpool->regs;
|
||||||
const u32 num_indices = SetupIndexBuffer(is_indexed, index_offset);
|
const u32 num_indices = SetupIndexBuffer(is_indexed, index_offset);
|
||||||
|
@ -104,6 +107,8 @@ void Rasterizer::Draw(bool is_indexed, u32 index_offset) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void Rasterizer::DispatchDirect() {
|
void Rasterizer::DispatchDirect() {
|
||||||
|
RENDERER_TRACE;
|
||||||
|
|
||||||
const auto cmdbuf = scheduler.CommandBuffer();
|
const auto cmdbuf = scheduler.CommandBuffer();
|
||||||
const auto& cs_program = liverpool->regs.cs_program;
|
const auto& cs_program = liverpool->regs.cs_program;
|
||||||
const ComputePipeline* pipeline = pipeline_cache.GetComputePipeline();
|
const ComputePipeline* pipeline = pipeline_cache.GetComputePipeline();
|
||||||
|
|
|
@ -2,17 +2,21 @@
|
||||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
#include <mutex>
|
#include <mutex>
|
||||||
|
#include "common/debug.h"
|
||||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||||
|
|
||||||
namespace Vulkan {
|
namespace Vulkan {
|
||||||
|
|
||||||
Scheduler::Scheduler(const Instance& instance)
|
Scheduler::Scheduler(const Instance& instance)
|
||||||
: master_semaphore{instance}, command_pool{instance, &master_semaphore} {
|
: instance{instance}, master_semaphore{instance}, command_pool{instance, &master_semaphore} {
|
||||||
|
profiler_scope = reinterpret_cast<tracy::VkCtxScope*>(std::malloc(sizeof(tracy::VkCtxScope)));
|
||||||
AllocateWorkerCommandBuffers();
|
AllocateWorkerCommandBuffers();
|
||||||
}
|
}
|
||||||
|
|
||||||
Scheduler::~Scheduler() = default;
|
Scheduler::~Scheduler() {
|
||||||
|
std::free(profiler_scope);
|
||||||
|
}
|
||||||
|
|
||||||
void Scheduler::Flush(vk::Semaphore signal, vk::Semaphore wait) {
|
void Scheduler::Flush(vk::Semaphore signal, vk::Semaphore wait) {
|
||||||
// When flushing, we only send data to the worker thread; no waiting is necessary.
|
// When flushing, we only send data to the worker thread; no waiting is necessary.
|
||||||
|
@ -41,11 +45,24 @@ void Scheduler::AllocateWorkerCommandBuffers() {
|
||||||
|
|
||||||
current_cmdbuf = command_pool.Commit();
|
current_cmdbuf = command_pool.Commit();
|
||||||
current_cmdbuf.begin(begin_info);
|
current_cmdbuf.begin(begin_info);
|
||||||
|
|
||||||
|
auto* profiler_ctx = instance.GetProfilerContext();
|
||||||
|
if (profiler_ctx) {
|
||||||
|
static const auto scope_loc =
|
||||||
|
GPU_SCOPE_LOCATION("Guest Frame", MarkersPallete::GpuMarkerColor);
|
||||||
|
new (profiler_scope) tracy::VkCtxScope{profiler_ctx, &scope_loc, current_cmdbuf, true};
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void Scheduler::SubmitExecution(vk::Semaphore signal_semaphore, vk::Semaphore wait_semaphore) {
|
void Scheduler::SubmitExecution(vk::Semaphore signal_semaphore, vk::Semaphore wait_semaphore) {
|
||||||
const u64 signal_value = master_semaphore.NextTick();
|
const u64 signal_value = master_semaphore.NextTick();
|
||||||
|
|
||||||
|
auto* profiler_ctx = instance.GetProfilerContext();
|
||||||
|
if (profiler_ctx) {
|
||||||
|
profiler_scope->~VkCtxScope();
|
||||||
|
TracyVkCollect(profiler_ctx, current_cmdbuf);
|
||||||
|
}
|
||||||
|
|
||||||
std::scoped_lock lk{submit_mutex};
|
std::scoped_lock lk{submit_mutex};
|
||||||
master_semaphore.SubmitWork(current_cmdbuf, wait_semaphore, signal_semaphore, signal_value);
|
master_semaphore.SubmitWork(current_cmdbuf, wait_semaphore, signal_semaphore, signal_value);
|
||||||
master_semaphore.Refresh();
|
master_semaphore.Refresh();
|
||||||
|
|
|
@ -54,10 +54,12 @@ private:
|
||||||
void SubmitExecution(vk::Semaphore signal_semaphore, vk::Semaphore wait_semaphore);
|
void SubmitExecution(vk::Semaphore signal_semaphore, vk::Semaphore wait_semaphore);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
const Instance& instance;
|
||||||
MasterSemaphore master_semaphore;
|
MasterSemaphore master_semaphore;
|
||||||
CommandPool command_pool;
|
CommandPool command_pool;
|
||||||
vk::CommandBuffer current_cmdbuf;
|
vk::CommandBuffer current_cmdbuf;
|
||||||
std::condition_variable_any event_cv;
|
std::condition_variable_any event_cv;
|
||||||
|
tracy::VkCtxScope* profiler_scope{};
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace Vulkan
|
} // namespace Vulkan
|
||||||
|
|
Loading…
Reference in a new issue