final touches and review notes

This commit is contained in:
psucien 2024-12-14 20:44:13 +01:00
parent 472cfebc39
commit 9c304b9af8
8 changed files with 95 additions and 53 deletions

View file

@ -15,7 +15,6 @@
using namespace DebugStateType;
DebugStateImpl& DebugState = *Common::Singleton<DebugStateImpl>::Instance();
extern std::unique_ptr<AmdGpu::Liverpool> liverpool;
static ThreadID ThisThreadID() {
#ifdef _WIN32
@ -143,41 +142,61 @@ void DebugStateImpl::PushQueueDump(QueueDump dump) {
frame.queues.push_back(std::move(dump));
}
void DebugStateImpl::PushRegsDump(uintptr_t base_addr, uintptr_t header_addr, bool is_compute) {
std::scoped_lock lock{frame_dump_list_mutex};
std::optional<RegDump*> DebugStateImpl::GetRegDump(uintptr_t base_addr, uintptr_t header_addr) {
const auto it = waiting_reg_dumps.find(header_addr);
if (it == waiting_reg_dumps.end()) {
return;
return std::nullopt;
}
auto& frame = *it->second;
waiting_reg_dumps.erase(it);
waiting_reg_dumps_dbg.erase(waiting_reg_dumps_dbg.find(header_addr));
auto& dump = frame.regs[header_addr - base_addr];
dump.regs = liverpool->regs;
if (is_compute) {
dump.is_compute = true;
auto& cs = dump.regs.cs_program;
cs = liverpool->GetCsRegs();
dump.cs_data = PipelineComputerProgramDump{
.cs_program = cs,
.code = std::vector<u32>{cs.Code().begin(), cs.Code().end()},
};
} else {
for (int i = 0; i < RegDump::MaxShaderStages; i++) {
if (dump.regs.stage_enable.IsStageEnabled(i)) {
auto stage = dump.regs.ProgramForStage(i);
if (stage->address_lo != 0) {
auto code = stage->Code();
dump.stages[i] = PipelineShaderProgramDump{
.user_data = *stage,
.code = std::vector<u32>{code.begin(), code.end()},
};
}
return &frame.regs[header_addr - base_addr];
}
void DebugStateImpl::PushRegsDump(uintptr_t base_addr, uintptr_t header_addr,
const AmdGpu::Liverpool::Regs& regs) {
std::scoped_lock lock{frame_dump_list_mutex};
auto dump = GetRegDump(base_addr, header_addr);
if (!dump) {
return;
}
(*dump)->regs = regs;
for (int i = 0; i < RegDump::MaxShaderStages; i++) {
if ((*dump)->regs.stage_enable.IsStageEnabled(i)) {
auto stage = (*dump)->regs.ProgramForStage(i);
if (stage->address_lo != 0) {
auto code = stage->Code();
(*dump)->stages[i] = PipelineShaderProgramDump{
.user_data = *stage,
.code = std::vector<u32>{code.begin(), code.end()},
};
}
}
}
}
void DebugStateImpl::PushRegsDumpCompute(uintptr_t base_addr, uintptr_t header_addr,
const CsState& cs_state) {
std::scoped_lock lock{frame_dump_list_mutex};
auto dump = GetRegDump(base_addr, header_addr);
if (!dump) {
return;
}
(*dump)->is_compute = true;
auto& cs = (*dump)->regs.cs_program;
cs = cs_state;
(*dump)->cs_data = PipelineComputerProgramDump{
.cs_program = cs,
.code = std::vector<u32>{cs.Code().begin(), cs.Code().end()},
};
}
void DebugStateImpl::CollectShader(const std::string& name, Shader::LogicalStage l_stage,
vk::ShaderModule module, std::span<const u32> spv,
std::span<const u32> raw_code, std::span<const u32> patch_spv,

View file

@ -202,12 +202,18 @@ public:
void PushQueueDump(QueueDump dump);
void PushRegsDump(uintptr_t base_addr, uintptr_t header_addr, bool is_compute = false);
void PushRegsDump(uintptr_t base_addr, uintptr_t header_addr,
const AmdGpu::Liverpool::Regs& regs);
using CsState = AmdGpu::Liverpool::ComputeProgram;
void PushRegsDumpCompute(uintptr_t base_addr, uintptr_t header_addr, const CsState& cs_state);
void CollectShader(const std::string& name, Shader::LogicalStage l_stage,
vk::ShaderModule module, std::span<const u32> spv,
std::span<const u32> raw_code, std::span<const u32> patch_spv,
bool is_patched);
private:
std::optional<RegDump*> GetRegDump(uintptr_t base_addr, uintptr_t header_addr);
};
} // namespace DebugStateType

View file

@ -375,8 +375,18 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
}
case PM4ItOpcode::SetShReg: {
const auto* set_data = reinterpret_cast<const PM4CmdSetData*>(header);
std::memcpy(&regs.reg_array[ShRegWordOffset + set_data->reg_offset], header + 2,
(count - 1) * sizeof(u32));
const auto set_size = (count - 1) * sizeof(u32);
if (set_data->reg_offset >= 0x200 &&
set_data->reg_offset <= (0x200 + sizeof(ComputeProgram) / 4)) {
ASSERT(set_size <= sizeof(ComputeProgram));
auto* addr = reinterpret_cast<u32*>(&mapped_queues[GfxQueueId].cs_state) +
(set_data->reg_offset - 0x200);
std::memcpy(addr, header + 2, set_size);
} else {
std::memcpy(&regs.reg_array[ShRegWordOffset + set_data->reg_offset], header + 2,
set_size);
}
break;
}
case PM4ItOpcode::SetUconfigReg: {
@ -398,7 +408,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
regs.num_indices = draw_index->index_count;
regs.draw_initiator = draw_index->draw_initiator;
if (DebugState.DumpingCurrentReg()) {
DebugState.PushRegsDump(base_addr, reinterpret_cast<uintptr_t>(header));
DebugState.PushRegsDump(base_addr, reinterpret_cast<uintptr_t>(header), regs);
}
if (rasterizer) {
const auto cmd_address = reinterpret_cast<const void*>(header);
@ -415,7 +425,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
regs.num_indices = draw_index_off->index_count;
regs.draw_initiator = draw_index_off->draw_initiator;
if (DebugState.DumpingCurrentReg()) {
DebugState.PushRegsDump(base_addr, reinterpret_cast<uintptr_t>(header));
DebugState.PushRegsDump(base_addr, reinterpret_cast<uintptr_t>(header), regs);
}
if (rasterizer) {
const auto cmd_address = reinterpret_cast<const void*>(header);
@ -431,7 +441,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
regs.num_indices = draw_index->index_count;
regs.draw_initiator = draw_index->draw_initiator;
if (DebugState.DumpingCurrentReg()) {
DebugState.PushRegsDump(base_addr, reinterpret_cast<uintptr_t>(header));
DebugState.PushRegsDump(base_addr, reinterpret_cast<uintptr_t>(header), regs);
}
if (rasterizer) {
const auto cmd_address = reinterpret_cast<const void*>(header);
@ -447,7 +457,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
const auto ib_address = mapped_queues[GfxQueueId].indirect_args_addr;
const auto size = sizeof(DrawIndirectArgs);
if (DebugState.DumpingCurrentReg()) {
DebugState.PushRegsDump(base_addr, reinterpret_cast<uintptr_t>(header));
DebugState.PushRegsDump(base_addr, reinterpret_cast<uintptr_t>(header), regs);
}
if (rasterizer) {
const auto cmd_address = reinterpret_cast<const void*>(header);
@ -464,7 +474,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
const auto ib_address = mapped_queues[GfxQueueId].indirect_args_addr;
const auto size = sizeof(DrawIndexedIndirectArgs);
if (DebugState.DumpingCurrentReg()) {
DebugState.PushRegsDump(base_addr, reinterpret_cast<uintptr_t>(header));
DebugState.PushRegsDump(base_addr, reinterpret_cast<uintptr_t>(header), regs);
}
if (rasterizer) {
const auto cmd_address = reinterpret_cast<const void*>(header);
@ -481,7 +491,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
const auto offset = draw_index_indirect->data_offset;
const auto ib_address = mapped_queues[GfxQueueId].indirect_args_addr;
if (DebugState.DumpingCurrentReg()) {
DebugState.PushRegsDump(base_addr, reinterpret_cast<uintptr_t>(header));
DebugState.PushRegsDump(base_addr, reinterpret_cast<uintptr_t>(header), regs);
}
if (rasterizer) {
const auto cmd_address = reinterpret_cast<const void*>(header);
@ -503,7 +513,8 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
cs_program.dim_z = dispatch_direct->dim_z;
cs_program.dispatch_initiator = dispatch_direct->dispatch_initiator;
if (DebugState.DumpingCurrentReg()) {
DebugState.PushRegsDump(base_addr, reinterpret_cast<uintptr_t>(header), true);
DebugState.PushRegsDumpCompute(base_addr, reinterpret_cast<uintptr_t>(header),
cs_program);
}
if (rasterizer && (cs_program.dispatch_initiator & 1)) {
const auto cmd_address = reinterpret_cast<const void*>(header);
@ -522,7 +533,8 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
const auto ib_address = mapped_queues[GfxQueueId].indirect_args_addr;
const auto size = sizeof(PM4CmdDispatchIndirect::GroupDimensions);
if (DebugState.DumpingCurrentReg()) {
DebugState.PushRegsDump(base_addr, reinterpret_cast<uintptr_t>(header), true);
DebugState.PushRegsDumpCompute(base_addr, reinterpret_cast<uintptr_t>(header),
cs_program);
}
if (rasterizer && (cs_program.dispatch_initiator & 1)) {
const auto cmd_address = reinterpret_cast<const void*>(header);
@ -782,8 +794,8 @@ Liverpool::Task Liverpool::ProcessCompute(std::span<const u32> acb, u32 vqid) {
if (set_data->reg_offset >= 0x200 &&
set_data->reg_offset <= (0x200 + sizeof(ComputeProgram) / 4)) {
ASSERT(set_size <= sizeof(ComputeProgram));
auto* addr =
reinterpret_cast<u32*>(&asc_sh_regs[vqid]) + (set_data->reg_offset - 0x200);
auto* addr = reinterpret_cast<u32*>(&mapped_queues[vqid + 1].cs_state) +
(set_data->reg_offset - 0x200);
std::memcpy(addr, header + 2, set_size);
} else {
std::memcpy(&regs.reg_array[ShRegWordOffset + set_data->reg_offset], header + 2,
@ -800,7 +812,8 @@ Liverpool::Task Liverpool::ProcessCompute(std::span<const u32> acb, u32 vqid) {
cs_program.dim_z = dispatch_direct->dim_z;
cs_program.dispatch_initiator = dispatch_direct->dispatch_initiator;
if (DebugState.DumpingCurrentReg()) {
DebugState.PushRegsDump(base_addr, reinterpret_cast<uintptr_t>(header), true);
DebugState.PushRegsDumpCompute(base_addr, reinterpret_cast<uintptr_t>(header),
cs_program);
}
if (rasterizer && (cs_program.dispatch_initiator & 1)) {
const auto cmd_address = reinterpret_cast<const void*>(header);

View file

@ -1145,7 +1145,7 @@ struct Liverpool {
INSERT_PADDING_WORDS(0x2D48 - 0x2d08 - 20);
ShaderProgram ls_program;
INSERT_PADDING_WORDS(0xA4);
ComputeProgram cs_program;
ComputeProgram cs_program; // shadowed by `cs_state` in `mapped_queues`
INSERT_PADDING_WORDS(0xA008 - 0x2E00 - 80 - 3 - 5);
DepthRenderControl depth_render_control;
INSERT_PADDING_WORDS(1);
@ -1279,7 +1279,6 @@ struct Liverpool {
};
Regs regs{};
std::array<ComputeProgram, NumComputeRings> asc_sh_regs{};
// See for a comment in context reg parsing code
union CbDbExtent {
@ -1345,7 +1344,7 @@ public:
}
inline ComputeProgram& GetCsRegs() {
return *curr_cs_regs;
return mapped_queues[curr_gnm_queue_id].cs_state;
}
struct AscQueueInfo {
@ -1399,11 +1398,11 @@ private:
void Process(std::stop_token stoken);
inline void SaveDispatchContext() {
curr_cs_regs = &regs.cs_program;
curr_gnm_queue_id = GfxQueueId;
}
inline void SaveDispatchContext(u32 vqid) {
curr_cs_regs = &asc_sh_regs[vqid];
curr_gnm_queue_id = vqid + 1;
}
struct GpuQueue {
@ -1413,6 +1412,7 @@ private:
std::vector<u32> dcb_buffer;
std::vector<u32> ccb_buffer;
std::queue<Task::Handle> submits{};
ComputeProgram cs_state{};
VAddr indirect_args_addr{};
};
std::array<GpuQueue, NumTotalQueues> mapped_queues{};
@ -1445,7 +1445,7 @@ private:
std::mutex submit_mutex;
std::condition_variable_any submit_cv;
std::queue<Common::UniqueFunction<void>> command_queue{};
ComputeProgram* curr_cs_regs{&regs.cs_program};
u32 curr_gnm_queue_id{GfxQueueId}; // Gnm queue processing dispatch
};
static_assert(GFX6_3D_REG_INDEX(ps_program) == 0x2C08);

View file

@ -172,7 +172,7 @@ Shader::RuntimeInfo PipelineCache::BuildRuntimeInfo(Stage stage, LogicalStage l_
}
break;
}
case Shader::Stage::Compute: {
case Stage::Compute: {
const auto& cs_pgm = liverpool->GetCsRegs();
info.num_user_data = cs_pgm.settings.num_user_regs;
info.num_allocated_vgprs = cs_pgm.settings.num_vgprs * 4;

View file

@ -324,7 +324,7 @@ void Rasterizer::DispatchDirect() {
}
const auto& cs = pipeline->GetStage(Shader::LogicalStage::Compute);
if (ExecuteShaderHLE(cs, *this)) {
if (ExecuteShaderHLE(cs, liverpool->regs, cs_program, *this)) {
return;
}

View file

@ -2,6 +2,7 @@
// SPDX-License-Identifier: GPL-2.0-or-later
#include "shader_recompiler/info.h"
#include "video_core/renderer_vulkan/vk_rasterizer.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_shader_hle.h"
@ -11,10 +12,11 @@ namespace Vulkan {
static constexpr u64 COPY_SHADER_HASH = 0xfefebf9f;
bool ExecuteCopyShaderHLE(const Shader::Info& info, Rasterizer& rasterizer) {
static bool ExecuteCopyShaderHLE(const Shader::Info& info,
const AmdGpu::Liverpool::ComputeProgram& cs_program,
Rasterizer& rasterizer) {
auto& scheduler = rasterizer.GetScheduler();
auto& buffer_cache = rasterizer.GetBufferCache();
const auto& cs_program = liverpool->GetCsRegs();
// Copy shader defines three formatted buffers as inputs: control, source, and destination.
const auto ctl_buf_sharp = info.texture_buffers[0].GetSharp(info);
@ -121,10 +123,11 @@ bool ExecuteCopyShaderHLE(const Shader::Info& info, Rasterizer& rasterizer) {
return true;
}
bool ExecuteShaderHLE(const Shader::Info& info, Rasterizer& rasterizer) {
bool ExecuteShaderHLE(const Shader::Info& info, const AmdGpu::Liverpool::Regs& regs,
const AmdGpu::Liverpool::ComputeProgram& cs_program, Rasterizer& rasterizer) {
switch (info.pgm_hash) {
case COPY_SHADER_HASH:
return ExecuteCopyShaderHLE(info, rasterizer);
return ExecuteCopyShaderHLE(info, cs_program, rasterizer);
default:
return false;
}

View file

@ -3,7 +3,7 @@
#pragma once
#include "video_core/renderer_vulkan/vk_rasterizer.h"
#include "video_core/amdgpu/liverpool.h"
namespace Shader {
struct Info;
@ -14,6 +14,7 @@ namespace Vulkan {
class Rasterizer;
/// Attempts to execute a shader using HLE if possible.
bool ExecuteShaderHLE(const Shader::Info& info, Rasterizer& rasterizer);
bool ExecuteShaderHLE(const Shader::Info& info, const AmdGpu::Liverpool::Regs& regs,
const AmdGpu::Liverpool::ComputeProgram& cs_program, Rasterizer& rasterizer);
} // namespace Vulkan