From 9c304b9af84d0c55f125df4a8bb7c227fbf6cb93 Mon Sep 17 00:00:00 2001 From: psucien Date: Sat, 14 Dec 2024 20:44:13 +0100 Subject: [PATCH] final touches and review notes --- src/core/debug_state.cpp | 69 ++++++++++++------- src/core/debug_state.h | 8 ++- src/video_core/amdgpu/liverpool.cpp | 39 +++++++---- src/video_core/amdgpu/liverpool.h | 12 ++-- .../renderer_vulkan/vk_pipeline_cache.cpp | 2 +- .../renderer_vulkan/vk_rasterizer.cpp | 2 +- .../renderer_vulkan/vk_shader_hle.cpp | 11 +-- .../renderer_vulkan/vk_shader_hle.h | 5 +- 8 files changed, 95 insertions(+), 53 deletions(-) diff --git a/src/core/debug_state.cpp b/src/core/debug_state.cpp index 13e0049d..daf614bd 100644 --- a/src/core/debug_state.cpp +++ b/src/core/debug_state.cpp @@ -15,7 +15,6 @@ using namespace DebugStateType; DebugStateImpl& DebugState = *Common::Singleton::Instance(); -extern std::unique_ptr liverpool; static ThreadID ThisThreadID() { #ifdef _WIN32 @@ -143,41 +142,61 @@ void DebugStateImpl::PushQueueDump(QueueDump dump) { frame.queues.push_back(std::move(dump)); } -void DebugStateImpl::PushRegsDump(uintptr_t base_addr, uintptr_t header_addr, bool is_compute) { - std::scoped_lock lock{frame_dump_list_mutex}; +std::optional DebugStateImpl::GetRegDump(uintptr_t base_addr, uintptr_t header_addr) { const auto it = waiting_reg_dumps.find(header_addr); if (it == waiting_reg_dumps.end()) { - return; + return std::nullopt; } auto& frame = *it->second; waiting_reg_dumps.erase(it); waiting_reg_dumps_dbg.erase(waiting_reg_dumps_dbg.find(header_addr)); - auto& dump = frame.regs[header_addr - base_addr]; - dump.regs = liverpool->regs; - if (is_compute) { - dump.is_compute = true; - auto& cs = dump.regs.cs_program; - cs = liverpool->GetCsRegs(); - dump.cs_data = PipelineComputerProgramDump{ - .cs_program = cs, - .code = std::vector{cs.Code().begin(), cs.Code().end()}, - }; - } else { - for (int i = 0; i < RegDump::MaxShaderStages; i++) { - if (dump.regs.stage_enable.IsStageEnabled(i)) { - auto stage = dump.regs.ProgramForStage(i); - if (stage->address_lo != 0) { - auto code = stage->Code(); - dump.stages[i] = PipelineShaderProgramDump{ - .user_data = *stage, - .code = std::vector{code.begin(), code.end()}, - }; - } + return &frame.regs[header_addr - base_addr]; +} + +void DebugStateImpl::PushRegsDump(uintptr_t base_addr, uintptr_t header_addr, + const AmdGpu::Liverpool::Regs& regs) { + std::scoped_lock lock{frame_dump_list_mutex}; + + auto dump = GetRegDump(base_addr, header_addr); + if (!dump) { + return; + } + + (*dump)->regs = regs; + + for (int i = 0; i < RegDump::MaxShaderStages; i++) { + if ((*dump)->regs.stage_enable.IsStageEnabled(i)) { + auto stage = (*dump)->regs.ProgramForStage(i); + if (stage->address_lo != 0) { + auto code = stage->Code(); + (*dump)->stages[i] = PipelineShaderProgramDump{ + .user_data = *stage, + .code = std::vector{code.begin(), code.end()}, + }; } } } } +void DebugStateImpl::PushRegsDumpCompute(uintptr_t base_addr, uintptr_t header_addr, + const CsState& cs_state) { + std::scoped_lock lock{frame_dump_list_mutex}; + + auto dump = GetRegDump(base_addr, header_addr); + if (!dump) { + return; + } + + (*dump)->is_compute = true; + auto& cs = (*dump)->regs.cs_program; + cs = cs_state; + + (*dump)->cs_data = PipelineComputerProgramDump{ + .cs_program = cs, + .code = std::vector{cs.Code().begin(), cs.Code().end()}, + }; +} + void DebugStateImpl::CollectShader(const std::string& name, Shader::LogicalStage l_stage, vk::ShaderModule module, std::span spv, std::span raw_code, std::span patch_spv, diff --git a/src/core/debug_state.h b/src/core/debug_state.h index f8370ab2..a0e428b6 100644 --- a/src/core/debug_state.h +++ b/src/core/debug_state.h @@ -202,12 +202,18 @@ public: void PushQueueDump(QueueDump dump); - void PushRegsDump(uintptr_t base_addr, uintptr_t header_addr, bool is_compute = false); + void PushRegsDump(uintptr_t base_addr, uintptr_t header_addr, + const AmdGpu::Liverpool::Regs& regs); + using CsState = AmdGpu::Liverpool::ComputeProgram; + void PushRegsDumpCompute(uintptr_t base_addr, uintptr_t header_addr, const CsState& cs_state); void CollectShader(const std::string& name, Shader::LogicalStage l_stage, vk::ShaderModule module, std::span spv, std::span raw_code, std::span patch_spv, bool is_patched); + +private: + std::optional GetRegDump(uintptr_t base_addr, uintptr_t header_addr); }; } // namespace DebugStateType diff --git a/src/video_core/amdgpu/liverpool.cpp b/src/video_core/amdgpu/liverpool.cpp index 3a917da1..7e99fcb2 100644 --- a/src/video_core/amdgpu/liverpool.cpp +++ b/src/video_core/amdgpu/liverpool.cpp @@ -375,8 +375,18 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::span(header); - std::memcpy(®s.reg_array[ShRegWordOffset + set_data->reg_offset], header + 2, - (count - 1) * sizeof(u32)); + const auto set_size = (count - 1) * sizeof(u32); + + if (set_data->reg_offset >= 0x200 && + set_data->reg_offset <= (0x200 + sizeof(ComputeProgram) / 4)) { + ASSERT(set_size <= sizeof(ComputeProgram)); + auto* addr = reinterpret_cast(&mapped_queues[GfxQueueId].cs_state) + + (set_data->reg_offset - 0x200); + std::memcpy(addr, header + 2, set_size); + } else { + std::memcpy(®s.reg_array[ShRegWordOffset + set_data->reg_offset], header + 2, + set_size); + } break; } case PM4ItOpcode::SetUconfigReg: { @@ -398,7 +408,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::spanindex_count; regs.draw_initiator = draw_index->draw_initiator; if (DebugState.DumpingCurrentReg()) { - DebugState.PushRegsDump(base_addr, reinterpret_cast(header)); + DebugState.PushRegsDump(base_addr, reinterpret_cast(header), regs); } if (rasterizer) { const auto cmd_address = reinterpret_cast(header); @@ -415,7 +425,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::spanindex_count; regs.draw_initiator = draw_index_off->draw_initiator; if (DebugState.DumpingCurrentReg()) { - DebugState.PushRegsDump(base_addr, reinterpret_cast(header)); + DebugState.PushRegsDump(base_addr, reinterpret_cast(header), regs); } if (rasterizer) { const auto cmd_address = reinterpret_cast(header); @@ -431,7 +441,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::spanindex_count; regs.draw_initiator = draw_index->draw_initiator; if (DebugState.DumpingCurrentReg()) { - DebugState.PushRegsDump(base_addr, reinterpret_cast(header)); + DebugState.PushRegsDump(base_addr, reinterpret_cast(header), regs); } if (rasterizer) { const auto cmd_address = reinterpret_cast(header); @@ -447,7 +457,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::span(header)); + DebugState.PushRegsDump(base_addr, reinterpret_cast(header), regs); } if (rasterizer) { const auto cmd_address = reinterpret_cast(header); @@ -464,7 +474,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::span(header)); + DebugState.PushRegsDump(base_addr, reinterpret_cast(header), regs); } if (rasterizer) { const auto cmd_address = reinterpret_cast(header); @@ -481,7 +491,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::spandata_offset; const auto ib_address = mapped_queues[GfxQueueId].indirect_args_addr; if (DebugState.DumpingCurrentReg()) { - DebugState.PushRegsDump(base_addr, reinterpret_cast(header)); + DebugState.PushRegsDump(base_addr, reinterpret_cast(header), regs); } if (rasterizer) { const auto cmd_address = reinterpret_cast(header); @@ -503,7 +513,8 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::spandim_z; cs_program.dispatch_initiator = dispatch_direct->dispatch_initiator; if (DebugState.DumpingCurrentReg()) { - DebugState.PushRegsDump(base_addr, reinterpret_cast(header), true); + DebugState.PushRegsDumpCompute(base_addr, reinterpret_cast(header), + cs_program); } if (rasterizer && (cs_program.dispatch_initiator & 1)) { const auto cmd_address = reinterpret_cast(header); @@ -522,7 +533,8 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::span(header), true); + DebugState.PushRegsDumpCompute(base_addr, reinterpret_cast(header), + cs_program); } if (rasterizer && (cs_program.dispatch_initiator & 1)) { const auto cmd_address = reinterpret_cast(header); @@ -782,8 +794,8 @@ Liverpool::Task Liverpool::ProcessCompute(std::span acb, u32 vqid) { if (set_data->reg_offset >= 0x200 && set_data->reg_offset <= (0x200 + sizeof(ComputeProgram) / 4)) { ASSERT(set_size <= sizeof(ComputeProgram)); - auto* addr = - reinterpret_cast(&asc_sh_regs[vqid]) + (set_data->reg_offset - 0x200); + auto* addr = reinterpret_cast(&mapped_queues[vqid + 1].cs_state) + + (set_data->reg_offset - 0x200); std::memcpy(addr, header + 2, set_size); } else { std::memcpy(®s.reg_array[ShRegWordOffset + set_data->reg_offset], header + 2, @@ -800,7 +812,8 @@ Liverpool::Task Liverpool::ProcessCompute(std::span acb, u32 vqid) { cs_program.dim_z = dispatch_direct->dim_z; cs_program.dispatch_initiator = dispatch_direct->dispatch_initiator; if (DebugState.DumpingCurrentReg()) { - DebugState.PushRegsDump(base_addr, reinterpret_cast(header), true); + DebugState.PushRegsDumpCompute(base_addr, reinterpret_cast(header), + cs_program); } if (rasterizer && (cs_program.dispatch_initiator & 1)) { const auto cmd_address = reinterpret_cast(header); diff --git a/src/video_core/amdgpu/liverpool.h b/src/video_core/amdgpu/liverpool.h index 9f677391..ffda28db 100644 --- a/src/video_core/amdgpu/liverpool.h +++ b/src/video_core/amdgpu/liverpool.h @@ -1145,7 +1145,7 @@ struct Liverpool { INSERT_PADDING_WORDS(0x2D48 - 0x2d08 - 20); ShaderProgram ls_program; INSERT_PADDING_WORDS(0xA4); - ComputeProgram cs_program; + ComputeProgram cs_program; // shadowed by `cs_state` in `mapped_queues` INSERT_PADDING_WORDS(0xA008 - 0x2E00 - 80 - 3 - 5); DepthRenderControl depth_render_control; INSERT_PADDING_WORDS(1); @@ -1279,7 +1279,6 @@ struct Liverpool { }; Regs regs{}; - std::array asc_sh_regs{}; // See for a comment in context reg parsing code union CbDbExtent { @@ -1345,7 +1344,7 @@ public: } inline ComputeProgram& GetCsRegs() { - return *curr_cs_regs; + return mapped_queues[curr_gnm_queue_id].cs_state; } struct AscQueueInfo { @@ -1399,11 +1398,11 @@ private: void Process(std::stop_token stoken); inline void SaveDispatchContext() { - curr_cs_regs = ®s.cs_program; + curr_gnm_queue_id = GfxQueueId; } inline void SaveDispatchContext(u32 vqid) { - curr_cs_regs = &asc_sh_regs[vqid]; + curr_gnm_queue_id = vqid + 1; } struct GpuQueue { @@ -1413,6 +1412,7 @@ private: std::vector dcb_buffer; std::vector ccb_buffer; std::queue submits{}; + ComputeProgram cs_state{}; VAddr indirect_args_addr{}; }; std::array mapped_queues{}; @@ -1445,7 +1445,7 @@ private: std::mutex submit_mutex; std::condition_variable_any submit_cv; std::queue> command_queue{}; - ComputeProgram* curr_cs_regs{®s.cs_program}; + u32 curr_gnm_queue_id{GfxQueueId}; // Gnm queue processing dispatch }; static_assert(GFX6_3D_REG_INDEX(ps_program) == 0x2C08); diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index ddf34465..50396287 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -172,7 +172,7 @@ Shader::RuntimeInfo PipelineCache::BuildRuntimeInfo(Stage stage, LogicalStage l_ } break; } - case Shader::Stage::Compute: { + case Stage::Compute: { const auto& cs_pgm = liverpool->GetCsRegs(); info.num_user_data = cs_pgm.settings.num_user_regs; info.num_allocated_vgprs = cs_pgm.settings.num_vgprs * 4; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index d0a23446..bd8906f8 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -324,7 +324,7 @@ void Rasterizer::DispatchDirect() { } const auto& cs = pipeline->GetStage(Shader::LogicalStage::Compute); - if (ExecuteShaderHLE(cs, *this)) { + if (ExecuteShaderHLE(cs, liverpool->regs, cs_program, *this)) { return; } diff --git a/src/video_core/renderer_vulkan/vk_shader_hle.cpp b/src/video_core/renderer_vulkan/vk_shader_hle.cpp index 0b5d3227..ff78f5d2 100644 --- a/src/video_core/renderer_vulkan/vk_shader_hle.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_hle.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include "shader_recompiler/info.h" +#include "video_core/renderer_vulkan/vk_rasterizer.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_shader_hle.h" @@ -11,10 +12,11 @@ namespace Vulkan { static constexpr u64 COPY_SHADER_HASH = 0xfefebf9f; -bool ExecuteCopyShaderHLE(const Shader::Info& info, Rasterizer& rasterizer) { +static bool ExecuteCopyShaderHLE(const Shader::Info& info, + const AmdGpu::Liverpool::ComputeProgram& cs_program, + Rasterizer& rasterizer) { auto& scheduler = rasterizer.GetScheduler(); auto& buffer_cache = rasterizer.GetBufferCache(); - const auto& cs_program = liverpool->GetCsRegs(); // Copy shader defines three formatted buffers as inputs: control, source, and destination. const auto ctl_buf_sharp = info.texture_buffers[0].GetSharp(info); @@ -121,10 +123,11 @@ bool ExecuteCopyShaderHLE(const Shader::Info& info, Rasterizer& rasterizer) { return true; } -bool ExecuteShaderHLE(const Shader::Info& info, Rasterizer& rasterizer) { +bool ExecuteShaderHLE(const Shader::Info& info, const AmdGpu::Liverpool::Regs& regs, + const AmdGpu::Liverpool::ComputeProgram& cs_program, Rasterizer& rasterizer) { switch (info.pgm_hash) { case COPY_SHADER_HASH: - return ExecuteCopyShaderHLE(info, rasterizer); + return ExecuteCopyShaderHLE(info, cs_program, rasterizer); default: return false; } diff --git a/src/video_core/renderer_vulkan/vk_shader_hle.h b/src/video_core/renderer_vulkan/vk_shader_hle.h index 975c2a12..008de800 100644 --- a/src/video_core/renderer_vulkan/vk_shader_hle.h +++ b/src/video_core/renderer_vulkan/vk_shader_hle.h @@ -3,7 +3,7 @@ #pragma once -#include "video_core/renderer_vulkan/vk_rasterizer.h" +#include "video_core/amdgpu/liverpool.h" namespace Shader { struct Info; @@ -14,6 +14,7 @@ namespace Vulkan { class Rasterizer; /// Attempts to execute a shader using HLE if possible. -bool ExecuteShaderHLE(const Shader::Info& info, Rasterizer& rasterizer); +bool ExecuteShaderHLE(const Shader::Info& info, const AmdGpu::Liverpool::Regs& regs, + const AmdGpu::Liverpool::ComputeProgram& cs_program, Rasterizer& rasterizer); } // namespace Vulkan