mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2024-12-28 02:26:07 +00:00
final touches and review notes
This commit is contained in:
parent
472cfebc39
commit
9c304b9af8
|
@ -15,7 +15,6 @@
|
|||
using namespace DebugStateType;
|
||||
|
||||
DebugStateImpl& DebugState = *Common::Singleton<DebugStateImpl>::Instance();
|
||||
extern std::unique_ptr<AmdGpu::Liverpool> liverpool;
|
||||
|
||||
static ThreadID ThisThreadID() {
|
||||
#ifdef _WIN32
|
||||
|
@ -143,41 +142,61 @@ void DebugStateImpl::PushQueueDump(QueueDump dump) {
|
|||
frame.queues.push_back(std::move(dump));
|
||||
}
|
||||
|
||||
void DebugStateImpl::PushRegsDump(uintptr_t base_addr, uintptr_t header_addr, bool is_compute) {
|
||||
std::scoped_lock lock{frame_dump_list_mutex};
|
||||
std::optional<RegDump*> DebugStateImpl::GetRegDump(uintptr_t base_addr, uintptr_t header_addr) {
|
||||
const auto it = waiting_reg_dumps.find(header_addr);
|
||||
if (it == waiting_reg_dumps.end()) {
|
||||
return;
|
||||
return std::nullopt;
|
||||
}
|
||||
auto& frame = *it->second;
|
||||
waiting_reg_dumps.erase(it);
|
||||
waiting_reg_dumps_dbg.erase(waiting_reg_dumps_dbg.find(header_addr));
|
||||
auto& dump = frame.regs[header_addr - base_addr];
|
||||
dump.regs = liverpool->regs;
|
||||
if (is_compute) {
|
||||
dump.is_compute = true;
|
||||
auto& cs = dump.regs.cs_program;
|
||||
cs = liverpool->GetCsRegs();
|
||||
dump.cs_data = PipelineComputerProgramDump{
|
||||
.cs_program = cs,
|
||||
.code = std::vector<u32>{cs.Code().begin(), cs.Code().end()},
|
||||
};
|
||||
} else {
|
||||
for (int i = 0; i < RegDump::MaxShaderStages; i++) {
|
||||
if (dump.regs.stage_enable.IsStageEnabled(i)) {
|
||||
auto stage = dump.regs.ProgramForStage(i);
|
||||
if (stage->address_lo != 0) {
|
||||
auto code = stage->Code();
|
||||
dump.stages[i] = PipelineShaderProgramDump{
|
||||
.user_data = *stage,
|
||||
.code = std::vector<u32>{code.begin(), code.end()},
|
||||
};
|
||||
}
|
||||
return &frame.regs[header_addr - base_addr];
|
||||
}
|
||||
|
||||
void DebugStateImpl::PushRegsDump(uintptr_t base_addr, uintptr_t header_addr,
|
||||
const AmdGpu::Liverpool::Regs& regs) {
|
||||
std::scoped_lock lock{frame_dump_list_mutex};
|
||||
|
||||
auto dump = GetRegDump(base_addr, header_addr);
|
||||
if (!dump) {
|
||||
return;
|
||||
}
|
||||
|
||||
(*dump)->regs = regs;
|
||||
|
||||
for (int i = 0; i < RegDump::MaxShaderStages; i++) {
|
||||
if ((*dump)->regs.stage_enable.IsStageEnabled(i)) {
|
||||
auto stage = (*dump)->regs.ProgramForStage(i);
|
||||
if (stage->address_lo != 0) {
|
||||
auto code = stage->Code();
|
||||
(*dump)->stages[i] = PipelineShaderProgramDump{
|
||||
.user_data = *stage,
|
||||
.code = std::vector<u32>{code.begin(), code.end()},
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void DebugStateImpl::PushRegsDumpCompute(uintptr_t base_addr, uintptr_t header_addr,
|
||||
const CsState& cs_state) {
|
||||
std::scoped_lock lock{frame_dump_list_mutex};
|
||||
|
||||
auto dump = GetRegDump(base_addr, header_addr);
|
||||
if (!dump) {
|
||||
return;
|
||||
}
|
||||
|
||||
(*dump)->is_compute = true;
|
||||
auto& cs = (*dump)->regs.cs_program;
|
||||
cs = cs_state;
|
||||
|
||||
(*dump)->cs_data = PipelineComputerProgramDump{
|
||||
.cs_program = cs,
|
||||
.code = std::vector<u32>{cs.Code().begin(), cs.Code().end()},
|
||||
};
|
||||
}
|
||||
|
||||
void DebugStateImpl::CollectShader(const std::string& name, Shader::LogicalStage l_stage,
|
||||
vk::ShaderModule module, std::span<const u32> spv,
|
||||
std::span<const u32> raw_code, std::span<const u32> patch_spv,
|
||||
|
|
|
@ -202,12 +202,18 @@ public:
|
|||
|
||||
void PushQueueDump(QueueDump dump);
|
||||
|
||||
void PushRegsDump(uintptr_t base_addr, uintptr_t header_addr, bool is_compute = false);
|
||||
void PushRegsDump(uintptr_t base_addr, uintptr_t header_addr,
|
||||
const AmdGpu::Liverpool::Regs& regs);
|
||||
using CsState = AmdGpu::Liverpool::ComputeProgram;
|
||||
void PushRegsDumpCompute(uintptr_t base_addr, uintptr_t header_addr, const CsState& cs_state);
|
||||
|
||||
void CollectShader(const std::string& name, Shader::LogicalStage l_stage,
|
||||
vk::ShaderModule module, std::span<const u32> spv,
|
||||
std::span<const u32> raw_code, std::span<const u32> patch_spv,
|
||||
bool is_patched);
|
||||
|
||||
private:
|
||||
std::optional<RegDump*> GetRegDump(uintptr_t base_addr, uintptr_t header_addr);
|
||||
};
|
||||
} // namespace DebugStateType
|
||||
|
||||
|
|
|
@ -375,8 +375,18 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
|
|||
}
|
||||
case PM4ItOpcode::SetShReg: {
|
||||
const auto* set_data = reinterpret_cast<const PM4CmdSetData*>(header);
|
||||
std::memcpy(®s.reg_array[ShRegWordOffset + set_data->reg_offset], header + 2,
|
||||
(count - 1) * sizeof(u32));
|
||||
const auto set_size = (count - 1) * sizeof(u32);
|
||||
|
||||
if (set_data->reg_offset >= 0x200 &&
|
||||
set_data->reg_offset <= (0x200 + sizeof(ComputeProgram) / 4)) {
|
||||
ASSERT(set_size <= sizeof(ComputeProgram));
|
||||
auto* addr = reinterpret_cast<u32*>(&mapped_queues[GfxQueueId].cs_state) +
|
||||
(set_data->reg_offset - 0x200);
|
||||
std::memcpy(addr, header + 2, set_size);
|
||||
} else {
|
||||
std::memcpy(®s.reg_array[ShRegWordOffset + set_data->reg_offset], header + 2,
|
||||
set_size);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case PM4ItOpcode::SetUconfigReg: {
|
||||
|
@ -398,7 +408,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
|
|||
regs.num_indices = draw_index->index_count;
|
||||
regs.draw_initiator = draw_index->draw_initiator;
|
||||
if (DebugState.DumpingCurrentReg()) {
|
||||
DebugState.PushRegsDump(base_addr, reinterpret_cast<uintptr_t>(header));
|
||||
DebugState.PushRegsDump(base_addr, reinterpret_cast<uintptr_t>(header), regs);
|
||||
}
|
||||
if (rasterizer) {
|
||||
const auto cmd_address = reinterpret_cast<const void*>(header);
|
||||
|
@ -415,7 +425,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
|
|||
regs.num_indices = draw_index_off->index_count;
|
||||
regs.draw_initiator = draw_index_off->draw_initiator;
|
||||
if (DebugState.DumpingCurrentReg()) {
|
||||
DebugState.PushRegsDump(base_addr, reinterpret_cast<uintptr_t>(header));
|
||||
DebugState.PushRegsDump(base_addr, reinterpret_cast<uintptr_t>(header), regs);
|
||||
}
|
||||
if (rasterizer) {
|
||||
const auto cmd_address = reinterpret_cast<const void*>(header);
|
||||
|
@ -431,7 +441,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
|
|||
regs.num_indices = draw_index->index_count;
|
||||
regs.draw_initiator = draw_index->draw_initiator;
|
||||
if (DebugState.DumpingCurrentReg()) {
|
||||
DebugState.PushRegsDump(base_addr, reinterpret_cast<uintptr_t>(header));
|
||||
DebugState.PushRegsDump(base_addr, reinterpret_cast<uintptr_t>(header), regs);
|
||||
}
|
||||
if (rasterizer) {
|
||||
const auto cmd_address = reinterpret_cast<const void*>(header);
|
||||
|
@ -447,7 +457,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
|
|||
const auto ib_address = mapped_queues[GfxQueueId].indirect_args_addr;
|
||||
const auto size = sizeof(DrawIndirectArgs);
|
||||
if (DebugState.DumpingCurrentReg()) {
|
||||
DebugState.PushRegsDump(base_addr, reinterpret_cast<uintptr_t>(header));
|
||||
DebugState.PushRegsDump(base_addr, reinterpret_cast<uintptr_t>(header), regs);
|
||||
}
|
||||
if (rasterizer) {
|
||||
const auto cmd_address = reinterpret_cast<const void*>(header);
|
||||
|
@ -464,7 +474,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
|
|||
const auto ib_address = mapped_queues[GfxQueueId].indirect_args_addr;
|
||||
const auto size = sizeof(DrawIndexedIndirectArgs);
|
||||
if (DebugState.DumpingCurrentReg()) {
|
||||
DebugState.PushRegsDump(base_addr, reinterpret_cast<uintptr_t>(header));
|
||||
DebugState.PushRegsDump(base_addr, reinterpret_cast<uintptr_t>(header), regs);
|
||||
}
|
||||
if (rasterizer) {
|
||||
const auto cmd_address = reinterpret_cast<const void*>(header);
|
||||
|
@ -481,7 +491,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
|
|||
const auto offset = draw_index_indirect->data_offset;
|
||||
const auto ib_address = mapped_queues[GfxQueueId].indirect_args_addr;
|
||||
if (DebugState.DumpingCurrentReg()) {
|
||||
DebugState.PushRegsDump(base_addr, reinterpret_cast<uintptr_t>(header));
|
||||
DebugState.PushRegsDump(base_addr, reinterpret_cast<uintptr_t>(header), regs);
|
||||
}
|
||||
if (rasterizer) {
|
||||
const auto cmd_address = reinterpret_cast<const void*>(header);
|
||||
|
@ -503,7 +513,8 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
|
|||
cs_program.dim_z = dispatch_direct->dim_z;
|
||||
cs_program.dispatch_initiator = dispatch_direct->dispatch_initiator;
|
||||
if (DebugState.DumpingCurrentReg()) {
|
||||
DebugState.PushRegsDump(base_addr, reinterpret_cast<uintptr_t>(header), true);
|
||||
DebugState.PushRegsDumpCompute(base_addr, reinterpret_cast<uintptr_t>(header),
|
||||
cs_program);
|
||||
}
|
||||
if (rasterizer && (cs_program.dispatch_initiator & 1)) {
|
||||
const auto cmd_address = reinterpret_cast<const void*>(header);
|
||||
|
@ -522,7 +533,8 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
|
|||
const auto ib_address = mapped_queues[GfxQueueId].indirect_args_addr;
|
||||
const auto size = sizeof(PM4CmdDispatchIndirect::GroupDimensions);
|
||||
if (DebugState.DumpingCurrentReg()) {
|
||||
DebugState.PushRegsDump(base_addr, reinterpret_cast<uintptr_t>(header), true);
|
||||
DebugState.PushRegsDumpCompute(base_addr, reinterpret_cast<uintptr_t>(header),
|
||||
cs_program);
|
||||
}
|
||||
if (rasterizer && (cs_program.dispatch_initiator & 1)) {
|
||||
const auto cmd_address = reinterpret_cast<const void*>(header);
|
||||
|
@ -782,8 +794,8 @@ Liverpool::Task Liverpool::ProcessCompute(std::span<const u32> acb, u32 vqid) {
|
|||
if (set_data->reg_offset >= 0x200 &&
|
||||
set_data->reg_offset <= (0x200 + sizeof(ComputeProgram) / 4)) {
|
||||
ASSERT(set_size <= sizeof(ComputeProgram));
|
||||
auto* addr =
|
||||
reinterpret_cast<u32*>(&asc_sh_regs[vqid]) + (set_data->reg_offset - 0x200);
|
||||
auto* addr = reinterpret_cast<u32*>(&mapped_queues[vqid + 1].cs_state) +
|
||||
(set_data->reg_offset - 0x200);
|
||||
std::memcpy(addr, header + 2, set_size);
|
||||
} else {
|
||||
std::memcpy(®s.reg_array[ShRegWordOffset + set_data->reg_offset], header + 2,
|
||||
|
@ -800,7 +812,8 @@ Liverpool::Task Liverpool::ProcessCompute(std::span<const u32> acb, u32 vqid) {
|
|||
cs_program.dim_z = dispatch_direct->dim_z;
|
||||
cs_program.dispatch_initiator = dispatch_direct->dispatch_initiator;
|
||||
if (DebugState.DumpingCurrentReg()) {
|
||||
DebugState.PushRegsDump(base_addr, reinterpret_cast<uintptr_t>(header), true);
|
||||
DebugState.PushRegsDumpCompute(base_addr, reinterpret_cast<uintptr_t>(header),
|
||||
cs_program);
|
||||
}
|
||||
if (rasterizer && (cs_program.dispatch_initiator & 1)) {
|
||||
const auto cmd_address = reinterpret_cast<const void*>(header);
|
||||
|
|
|
@ -1145,7 +1145,7 @@ struct Liverpool {
|
|||
INSERT_PADDING_WORDS(0x2D48 - 0x2d08 - 20);
|
||||
ShaderProgram ls_program;
|
||||
INSERT_PADDING_WORDS(0xA4);
|
||||
ComputeProgram cs_program;
|
||||
ComputeProgram cs_program; // shadowed by `cs_state` in `mapped_queues`
|
||||
INSERT_PADDING_WORDS(0xA008 - 0x2E00 - 80 - 3 - 5);
|
||||
DepthRenderControl depth_render_control;
|
||||
INSERT_PADDING_WORDS(1);
|
||||
|
@ -1279,7 +1279,6 @@ struct Liverpool {
|
|||
};
|
||||
|
||||
Regs regs{};
|
||||
std::array<ComputeProgram, NumComputeRings> asc_sh_regs{};
|
||||
|
||||
// See for a comment in context reg parsing code
|
||||
union CbDbExtent {
|
||||
|
@ -1345,7 +1344,7 @@ public:
|
|||
}
|
||||
|
||||
inline ComputeProgram& GetCsRegs() {
|
||||
return *curr_cs_regs;
|
||||
return mapped_queues[curr_gnm_queue_id].cs_state;
|
||||
}
|
||||
|
||||
struct AscQueueInfo {
|
||||
|
@ -1399,11 +1398,11 @@ private:
|
|||
void Process(std::stop_token stoken);
|
||||
|
||||
inline void SaveDispatchContext() {
|
||||
curr_cs_regs = ®s.cs_program;
|
||||
curr_gnm_queue_id = GfxQueueId;
|
||||
}
|
||||
|
||||
inline void SaveDispatchContext(u32 vqid) {
|
||||
curr_cs_regs = &asc_sh_regs[vqid];
|
||||
curr_gnm_queue_id = vqid + 1;
|
||||
}
|
||||
|
||||
struct GpuQueue {
|
||||
|
@ -1413,6 +1412,7 @@ private:
|
|||
std::vector<u32> dcb_buffer;
|
||||
std::vector<u32> ccb_buffer;
|
||||
std::queue<Task::Handle> submits{};
|
||||
ComputeProgram cs_state{};
|
||||
VAddr indirect_args_addr{};
|
||||
};
|
||||
std::array<GpuQueue, NumTotalQueues> mapped_queues{};
|
||||
|
@ -1445,7 +1445,7 @@ private:
|
|||
std::mutex submit_mutex;
|
||||
std::condition_variable_any submit_cv;
|
||||
std::queue<Common::UniqueFunction<void>> command_queue{};
|
||||
ComputeProgram* curr_cs_regs{®s.cs_program};
|
||||
u32 curr_gnm_queue_id{GfxQueueId}; // Gnm queue processing dispatch
|
||||
};
|
||||
|
||||
static_assert(GFX6_3D_REG_INDEX(ps_program) == 0x2C08);
|
||||
|
|
|
@ -172,7 +172,7 @@ Shader::RuntimeInfo PipelineCache::BuildRuntimeInfo(Stage stage, LogicalStage l_
|
|||
}
|
||||
break;
|
||||
}
|
||||
case Shader::Stage::Compute: {
|
||||
case Stage::Compute: {
|
||||
const auto& cs_pgm = liverpool->GetCsRegs();
|
||||
info.num_user_data = cs_pgm.settings.num_user_regs;
|
||||
info.num_allocated_vgprs = cs_pgm.settings.num_vgprs * 4;
|
||||
|
|
|
@ -324,7 +324,7 @@ void Rasterizer::DispatchDirect() {
|
|||
}
|
||||
|
||||
const auto& cs = pipeline->GetStage(Shader::LogicalStage::Compute);
|
||||
if (ExecuteShaderHLE(cs, *this)) {
|
||||
if (ExecuteShaderHLE(cs, liverpool->regs, cs_program, *this)) {
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "shader_recompiler/info.h"
|
||||
#include "video_core/renderer_vulkan/vk_rasterizer.h"
|
||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||
#include "video_core/renderer_vulkan/vk_shader_hle.h"
|
||||
|
||||
|
@ -11,10 +12,11 @@ namespace Vulkan {
|
|||
|
||||
static constexpr u64 COPY_SHADER_HASH = 0xfefebf9f;
|
||||
|
||||
bool ExecuteCopyShaderHLE(const Shader::Info& info, Rasterizer& rasterizer) {
|
||||
static bool ExecuteCopyShaderHLE(const Shader::Info& info,
|
||||
const AmdGpu::Liverpool::ComputeProgram& cs_program,
|
||||
Rasterizer& rasterizer) {
|
||||
auto& scheduler = rasterizer.GetScheduler();
|
||||
auto& buffer_cache = rasterizer.GetBufferCache();
|
||||
const auto& cs_program = liverpool->GetCsRegs();
|
||||
|
||||
// Copy shader defines three formatted buffers as inputs: control, source, and destination.
|
||||
const auto ctl_buf_sharp = info.texture_buffers[0].GetSharp(info);
|
||||
|
@ -121,10 +123,11 @@ bool ExecuteCopyShaderHLE(const Shader::Info& info, Rasterizer& rasterizer) {
|
|||
return true;
|
||||
}
|
||||
|
||||
bool ExecuteShaderHLE(const Shader::Info& info, Rasterizer& rasterizer) {
|
||||
bool ExecuteShaderHLE(const Shader::Info& info, const AmdGpu::Liverpool::Regs& regs,
|
||||
const AmdGpu::Liverpool::ComputeProgram& cs_program, Rasterizer& rasterizer) {
|
||||
switch (info.pgm_hash) {
|
||||
case COPY_SHADER_HASH:
|
||||
return ExecuteCopyShaderHLE(info, rasterizer);
|
||||
return ExecuteCopyShaderHLE(info, cs_program, rasterizer);
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include "video_core/renderer_vulkan/vk_rasterizer.h"
|
||||
#include "video_core/amdgpu/liverpool.h"
|
||||
|
||||
namespace Shader {
|
||||
struct Info;
|
||||
|
@ -14,6 +14,7 @@ namespace Vulkan {
|
|||
class Rasterizer;
|
||||
|
||||
/// Attempts to execute a shader using HLE if possible.
|
||||
bool ExecuteShaderHLE(const Shader::Info& info, Rasterizer& rasterizer);
|
||||
bool ExecuteShaderHLE(const Shader::Info& info, const AmdGpu::Liverpool::Regs& regs,
|
||||
const AmdGpu::Liverpool::ComputeProgram& cs_program, Rasterizer& rasterizer);
|
||||
|
||||
} // namespace Vulkan
|
||||
|
|
Loading…
Reference in a new issue