mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2024-12-28 02:26:07 +00:00
GPU processor refactoring (#1787)
Some checks are pending
Build and Release / reuse (push) Waiting to run
Build and Release / clang-format (push) Waiting to run
Build and Release / get-info (push) Waiting to run
Build and Release / windows-sdl (push) Blocked by required conditions
Build and Release / windows-qt (push) Blocked by required conditions
Build and Release / macos-sdl (push) Blocked by required conditions
Build and Release / macos-qt (push) Blocked by required conditions
Build and Release / linux-sdl (push) Blocked by required conditions
Build and Release / linux-qt (push) Blocked by required conditions
Build and Release / pre-release (push) Blocked by required conditions
Some checks are pending
Build and Release / reuse (push) Waiting to run
Build and Release / clang-format (push) Waiting to run
Build and Release / get-info (push) Waiting to run
Build and Release / windows-sdl (push) Blocked by required conditions
Build and Release / windows-qt (push) Blocked by required conditions
Build and Release / macos-sdl (push) Blocked by required conditions
Build and Release / macos-qt (push) Blocked by required conditions
Build and Release / linux-sdl (push) Blocked by required conditions
Build and Release / linux-qt (push) Blocked by required conditions
Build and Release / pre-release (push) Blocked by required conditions
* coroutine code prettification * asc queues submission refactoring * better asc ring context handling * final touches and review notes * even more simplification for context saving
This commit is contained in:
parent
af26c945b1
commit
0fd1ab674b
|
@ -57,3 +57,6 @@ enum MarkersPalette : int {
|
||||||
tracy::SourceLocationData{nullptr, name, TracyFile, (uint32_t)TracyLine, 0};
|
tracy::SourceLocationData{nullptr, name, TracyFile, (uint32_t)TracyLine, 0};
|
||||||
|
|
||||||
#define FRAME_END FrameMark
|
#define FRAME_END FrameMark
|
||||||
|
|
||||||
|
#define FIBER_ENTER(name) TracyFiberEnter(name)
|
||||||
|
#define FIBER_EXIT TracyFiberLeave
|
||||||
|
|
|
@ -142,41 +142,61 @@ void DebugStateImpl::PushQueueDump(QueueDump dump) {
|
||||||
frame.queues.push_back(std::move(dump));
|
frame.queues.push_back(std::move(dump));
|
||||||
}
|
}
|
||||||
|
|
||||||
void DebugStateImpl::PushRegsDump(uintptr_t base_addr, uintptr_t header_addr,
|
std::optional<RegDump*> DebugStateImpl::GetRegDump(uintptr_t base_addr, uintptr_t header_addr) {
|
||||||
const AmdGpu::Liverpool::Regs& regs, bool is_compute) {
|
|
||||||
std::scoped_lock lock{frame_dump_list_mutex};
|
|
||||||
const auto it = waiting_reg_dumps.find(header_addr);
|
const auto it = waiting_reg_dumps.find(header_addr);
|
||||||
if (it == waiting_reg_dumps.end()) {
|
if (it == waiting_reg_dumps.end()) {
|
||||||
return;
|
return std::nullopt;
|
||||||
}
|
}
|
||||||
auto& frame = *it->second;
|
auto& frame = *it->second;
|
||||||
waiting_reg_dumps.erase(it);
|
waiting_reg_dumps.erase(it);
|
||||||
waiting_reg_dumps_dbg.erase(waiting_reg_dumps_dbg.find(header_addr));
|
waiting_reg_dumps_dbg.erase(waiting_reg_dumps_dbg.find(header_addr));
|
||||||
auto& dump = frame.regs[header_addr - base_addr];
|
return &frame.regs[header_addr - base_addr];
|
||||||
dump.regs = regs;
|
}
|
||||||
if (is_compute) {
|
|
||||||
dump.is_compute = true;
|
void DebugStateImpl::PushRegsDump(uintptr_t base_addr, uintptr_t header_addr,
|
||||||
const auto& cs = dump.regs.cs_program;
|
const AmdGpu::Liverpool::Regs& regs) {
|
||||||
dump.cs_data = PipelineComputerProgramDump{
|
std::scoped_lock lock{frame_dump_list_mutex};
|
||||||
.cs_program = cs,
|
|
||||||
.code = std::vector<u32>{cs.Code().begin(), cs.Code().end()},
|
auto dump = GetRegDump(base_addr, header_addr);
|
||||||
};
|
if (!dump) {
|
||||||
} else {
|
return;
|
||||||
for (int i = 0; i < RegDump::MaxShaderStages; i++) {
|
}
|
||||||
if (regs.stage_enable.IsStageEnabled(i)) {
|
|
||||||
auto stage = regs.ProgramForStage(i);
|
(*dump)->regs = regs;
|
||||||
if (stage->address_lo != 0) {
|
|
||||||
auto code = stage->Code();
|
for (int i = 0; i < RegDump::MaxShaderStages; i++) {
|
||||||
dump.stages[i] = PipelineShaderProgramDump{
|
if ((*dump)->regs.stage_enable.IsStageEnabled(i)) {
|
||||||
.user_data = *stage,
|
auto stage = (*dump)->regs.ProgramForStage(i);
|
||||||
.code = std::vector<u32>{code.begin(), code.end()},
|
if (stage->address_lo != 0) {
|
||||||
};
|
auto code = stage->Code();
|
||||||
}
|
(*dump)->stages[i] = PipelineShaderProgramDump{
|
||||||
|
.user_data = *stage,
|
||||||
|
.code = std::vector<u32>{code.begin(), code.end()},
|
||||||
|
};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void DebugStateImpl::PushRegsDumpCompute(uintptr_t base_addr, uintptr_t header_addr,
|
||||||
|
const CsState& cs_state) {
|
||||||
|
std::scoped_lock lock{frame_dump_list_mutex};
|
||||||
|
|
||||||
|
auto dump = GetRegDump(base_addr, header_addr);
|
||||||
|
if (!dump) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
(*dump)->is_compute = true;
|
||||||
|
auto& cs = (*dump)->regs.cs_program;
|
||||||
|
cs = cs_state;
|
||||||
|
|
||||||
|
(*dump)->cs_data = PipelineComputerProgramDump{
|
||||||
|
.cs_program = cs,
|
||||||
|
.code = std::vector<u32>{cs.Code().begin(), cs.Code().end()},
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
void DebugStateImpl::CollectShader(const std::string& name, Shader::LogicalStage l_stage,
|
void DebugStateImpl::CollectShader(const std::string& name, Shader::LogicalStage l_stage,
|
||||||
vk::ShaderModule module, std::span<const u32> spv,
|
vk::ShaderModule module, std::span<const u32> spv,
|
||||||
std::span<const u32> raw_code, std::span<const u32> patch_spv,
|
std::span<const u32> raw_code, std::span<const u32> patch_spv,
|
||||||
|
|
|
@ -11,7 +11,6 @@
|
||||||
#include <queue>
|
#include <queue>
|
||||||
|
|
||||||
#include "common/types.h"
|
#include "common/types.h"
|
||||||
#include "video_core/amdgpu/liverpool.h"
|
|
||||||
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
|
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
|
||||||
|
|
||||||
#ifdef _WIN32
|
#ifdef _WIN32
|
||||||
|
@ -204,12 +203,17 @@ public:
|
||||||
void PushQueueDump(QueueDump dump);
|
void PushQueueDump(QueueDump dump);
|
||||||
|
|
||||||
void PushRegsDump(uintptr_t base_addr, uintptr_t header_addr,
|
void PushRegsDump(uintptr_t base_addr, uintptr_t header_addr,
|
||||||
const AmdGpu::Liverpool::Regs& regs, bool is_compute = false);
|
const AmdGpu::Liverpool::Regs& regs);
|
||||||
|
using CsState = AmdGpu::Liverpool::ComputeProgram;
|
||||||
|
void PushRegsDumpCompute(uintptr_t base_addr, uintptr_t header_addr, const CsState& cs_state);
|
||||||
|
|
||||||
void CollectShader(const std::string& name, Shader::LogicalStage l_stage,
|
void CollectShader(const std::string& name, Shader::LogicalStage l_stage,
|
||||||
vk::ShaderModule module, std::span<const u32> spv,
|
vk::ShaderModule module, std::span<const u32> spv,
|
||||||
std::span<const u32> raw_code, std::span<const u32> patch_spv,
|
std::span<const u32> raw_code, std::span<const u32> patch_spv,
|
||||||
bool is_patched);
|
bool is_patched);
|
||||||
|
|
||||||
|
private:
|
||||||
|
std::optional<RegDump*> GetRegDump(uintptr_t base_addr, uintptr_t header_addr);
|
||||||
};
|
};
|
||||||
} // namespace DebugStateType
|
} // namespace DebugStateType
|
||||||
|
|
||||||
|
|
|
@ -296,17 +296,12 @@ static_assert(CtxInitSequence400.size() == 0x61);
|
||||||
// In case if `submitDone` is issued we need to block submissions until GPU idle
|
// In case if `submitDone` is issued we need to block submissions until GPU idle
|
||||||
static u32 submission_lock{};
|
static u32 submission_lock{};
|
||||||
std::condition_variable cv_lock{};
|
std::condition_variable cv_lock{};
|
||||||
static std::mutex m_submission{};
|
std::mutex m_submission{};
|
||||||
static u64 frames_submitted{}; // frame counter
|
static u64 frames_submitted{}; // frame counter
|
||||||
static bool send_init_packet{true}; // initialize HW state before first game's submit in a frame
|
static bool send_init_packet{true}; // initialize HW state before first game's submit in a frame
|
||||||
static int sdk_version{0};
|
static int sdk_version{0};
|
||||||
|
|
||||||
struct AscQueueInfo {
|
static u32 asc_next_offs_dw[Liverpool::NumComputeRings];
|
||||||
VAddr map_addr;
|
|
||||||
u32* read_addr;
|
|
||||||
u32 ring_size_dw;
|
|
||||||
};
|
|
||||||
static Common::SlotVector<AscQueueInfo> asc_queues{};
|
|
||||||
static constexpr VAddr tessellation_factors_ring_addr = Core::SYSTEM_RESERVED_MAX - 0xFFFFFFF;
|
static constexpr VAddr tessellation_factors_ring_addr = Core::SYSTEM_RESERVED_MAX - 0xFFFFFFF;
|
||||||
static constexpr u32 tessellation_offchip_buffer_size = 0x800000u;
|
static constexpr u32 tessellation_offchip_buffer_size = 0x800000u;
|
||||||
|
|
||||||
|
@ -506,11 +501,19 @@ void PS4_SYSV_ABI sceGnmDingDong(u32 gnm_vqid, u32 next_offs_dw) {
|
||||||
}
|
}
|
||||||
|
|
||||||
auto vqid = gnm_vqid - 1;
|
auto vqid = gnm_vqid - 1;
|
||||||
auto& asc_queue = asc_queues[{vqid}];
|
auto& asc_queue = liverpool->asc_queues[{vqid}];
|
||||||
const auto* acb_ptr = reinterpret_cast<const u32*>(asc_queue.map_addr + *asc_queue.read_addr);
|
|
||||||
const auto acb_size = next_offs_dw ? (next_offs_dw << 2u) - *asc_queue.read_addr
|
const auto& offs_dw = asc_next_offs_dw[vqid];
|
||||||
: (asc_queue.ring_size_dw << 2u) - *asc_queue.read_addr;
|
|
||||||
const std::span acb_span{acb_ptr, acb_size >> 2u};
|
if (next_offs_dw < offs_dw) {
|
||||||
|
ASSERT_MSG(next_offs_dw == 0, "ACB submission is split at the end of ring buffer");
|
||||||
|
}
|
||||||
|
|
||||||
|
const auto* acb_ptr = reinterpret_cast<const u32*>(asc_queue.map_addr) + offs_dw;
|
||||||
|
const auto acb_size_dw = (next_offs_dw ? next_offs_dw : asc_queue.ring_size_dw) - offs_dw;
|
||||||
|
const std::span acb_span{acb_ptr, acb_size_dw};
|
||||||
|
|
||||||
|
asc_next_offs_dw[vqid] = next_offs_dw;
|
||||||
|
|
||||||
if (DebugState.DumpingCurrentFrame()) {
|
if (DebugState.DumpingCurrentFrame()) {
|
||||||
static auto last_frame_num = -1LL;
|
static auto last_frame_num = -1LL;
|
||||||
|
@ -545,9 +548,6 @@ void PS4_SYSV_ABI sceGnmDingDong(u32 gnm_vqid, u32 next_offs_dw) {
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
liverpool->SubmitAsc(gnm_vqid, acb_span);
|
liverpool->SubmitAsc(gnm_vqid, acb_span);
|
||||||
|
|
||||||
*asc_queue.read_addr += acb_size;
|
|
||||||
*asc_queue.read_addr %= asc_queue.ring_size_dw * 4;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void PS4_SYSV_ABI sceGnmDingDongForWorkload(u32 gnm_vqid, u32 next_offs_dw, u64 workload_id) {
|
void PS4_SYSV_ABI sceGnmDingDongForWorkload(u32 gnm_vqid, u32 next_offs_dw, u64 workload_id) {
|
||||||
|
@ -1266,12 +1266,16 @@ int PS4_SYSV_ABI sceGnmMapComputeQueue(u32 pipe_id, u32 queue_id, VAddr ring_bas
|
||||||
return ORBIS_GNM_ERROR_COMPUTEQUEUE_INVALID_READ_PTR_ADDR;
|
return ORBIS_GNM_ERROR_COMPUTEQUEUE_INVALID_READ_PTR_ADDR;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto vqid = asc_queues.insert(VAddr(ring_base_addr), read_ptr_addr, ring_size_dw);
|
const auto vqid =
|
||||||
|
liverpool->asc_queues.insert(VAddr(ring_base_addr), read_ptr_addr, ring_size_dw, pipe_id);
|
||||||
// We need to offset index as `dingDong` assumes it to be from the range [1..64]
|
// We need to offset index as `dingDong` assumes it to be from the range [1..64]
|
||||||
const auto gnm_vqid = vqid.index + 1;
|
const auto gnm_vqid = vqid.index + 1;
|
||||||
LOG_INFO(Lib_GnmDriver, "ASC pipe {} queue {} mapped to vqueue {}", pipe_id, queue_id,
|
LOG_INFO(Lib_GnmDriver, "ASC pipe {} queue {} mapped to vqueue {}", pipe_id, queue_id,
|
||||||
gnm_vqid);
|
gnm_vqid);
|
||||||
|
|
||||||
|
const auto& queue = liverpool->asc_queues[vqid];
|
||||||
|
*queue.read_addr = 0u;
|
||||||
|
|
||||||
return gnm_vqid;
|
return gnm_vqid;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -294,7 +294,7 @@ void EmitContext::DefineInputs() {
|
||||||
});
|
});
|
||||||
// Note that we pass index rather than Id
|
// Note that we pass index rather than Id
|
||||||
input_params[attrib.semantic] = SpirvAttribute{
|
input_params[attrib.semantic] = SpirvAttribute{
|
||||||
.id = rate_idx,
|
.id = {rate_idx},
|
||||||
.pointer_type = input_u32,
|
.pointer_type = input_u32,
|
||||||
.component_type = U32[1],
|
.component_type = U32[1],
|
||||||
.num_components = std::min<u16>(attrib.num_elements, num_components),
|
.num_components = std::min<u16>(attrib.num_elements, num_components),
|
||||||
|
|
|
@ -1,6 +1,8 @@
|
||||||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
|
#include <boost/preprocessor/stringize.hpp>
|
||||||
|
|
||||||
#include "common/assert.h"
|
#include "common/assert.h"
|
||||||
#include "common/config.h"
|
#include "common/config.h"
|
||||||
#include "common/debug.h"
|
#include "common/debug.h"
|
||||||
|
@ -18,7 +20,32 @@ namespace AmdGpu {
|
||||||
|
|
||||||
static const char* dcb_task_name{"DCB_TASK"};
|
static const char* dcb_task_name{"DCB_TASK"};
|
||||||
static const char* ccb_task_name{"CCB_TASK"};
|
static const char* ccb_task_name{"CCB_TASK"};
|
||||||
static const char* acb_task_name{"ACB_TASK"};
|
|
||||||
|
#define MAX_NAMES 56
|
||||||
|
static_assert(Liverpool::NumComputeRings <= MAX_NAMES);
|
||||||
|
|
||||||
|
#define NAME_NUM(z, n, name) BOOST_PP_STRINGIZE(name) BOOST_PP_STRINGIZE(n),
|
||||||
|
#define NAME_ARRAY(name, num) {BOOST_PP_REPEAT(num, NAME_NUM, name)}
|
||||||
|
|
||||||
|
static const char* acb_task_name[] = NAME_ARRAY(ACB_TASK, MAX_NAMES);
|
||||||
|
|
||||||
|
#define YIELD(name) \
|
||||||
|
FIBER_EXIT; \
|
||||||
|
co_yield {}; \
|
||||||
|
FIBER_ENTER(name);
|
||||||
|
|
||||||
|
#define YIELD_CE() YIELD(ccb_task_name)
|
||||||
|
#define YIELD_GFX() YIELD(dcb_task_name)
|
||||||
|
#define YIELD_ASC(id) YIELD(acb_task_name[id])
|
||||||
|
|
||||||
|
#define RESUME(task, name) \
|
||||||
|
FIBER_EXIT; \
|
||||||
|
task.handle.resume(); \
|
||||||
|
FIBER_ENTER(name);
|
||||||
|
|
||||||
|
#define RESUME_CE(task) RESUME(task, ccb_task_name)
|
||||||
|
#define RESUME_GFX(task) RESUME(task, dcb_task_name)
|
||||||
|
#define RESUME_ASC(task, id) RESUME(task, acb_task_name[id])
|
||||||
|
|
||||||
std::array<u8, 48_KB> Liverpool::ConstantEngine::constants_heap;
|
std::array<u8, 48_KB> Liverpool::ConstantEngine::constants_heap;
|
||||||
|
|
||||||
|
@ -60,7 +87,7 @@ void Liverpool::Process(std::stop_token stoken) {
|
||||||
|
|
||||||
VideoCore::StartCapture();
|
VideoCore::StartCapture();
|
||||||
|
|
||||||
int qid = -1;
|
curr_qid = -1;
|
||||||
|
|
||||||
while (num_submits || num_commands) {
|
while (num_submits || num_commands) {
|
||||||
|
|
||||||
|
@ -79,9 +106,9 @@ void Liverpool::Process(std::stop_token stoken) {
|
||||||
--num_commands;
|
--num_commands;
|
||||||
}
|
}
|
||||||
|
|
||||||
qid = (qid + 1) % NumTotalQueues;
|
curr_qid = (curr_qid + 1) % num_mapped_queues;
|
||||||
|
|
||||||
auto& queue = mapped_queues[qid];
|
auto& queue = mapped_queues[curr_qid];
|
||||||
|
|
||||||
Task::Handle task{};
|
Task::Handle task{};
|
||||||
{
|
{
|
||||||
|
@ -119,7 +146,7 @@ void Liverpool::Process(std::stop_token stoken) {
|
||||||
}
|
}
|
||||||
|
|
||||||
Liverpool::Task Liverpool::ProcessCeUpdate(std::span<const u32> ccb) {
|
Liverpool::Task Liverpool::ProcessCeUpdate(std::span<const u32> ccb) {
|
||||||
TracyFiberEnter(ccb_task_name);
|
FIBER_ENTER(ccb_task_name);
|
||||||
|
|
||||||
while (!ccb.empty()) {
|
while (!ccb.empty()) {
|
||||||
const auto* header = reinterpret_cast<const PM4Header*>(ccb.data());
|
const auto* header = reinterpret_cast<const PM4Header*>(ccb.data());
|
||||||
|
@ -155,9 +182,7 @@ Liverpool::Task Liverpool::ProcessCeUpdate(std::span<const u32> ccb) {
|
||||||
case PM4ItOpcode::WaitOnDeCounterDiff: {
|
case PM4ItOpcode::WaitOnDeCounterDiff: {
|
||||||
const auto diff = it_body[0];
|
const auto diff = it_body[0];
|
||||||
while ((cblock.de_count - cblock.ce_count) >= diff) {
|
while ((cblock.de_count - cblock.ce_count) >= diff) {
|
||||||
TracyFiberLeave;
|
YIELD_CE();
|
||||||
co_yield {};
|
|
||||||
TracyFiberEnter(ccb_task_name);
|
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -165,13 +190,12 @@ Liverpool::Task Liverpool::ProcessCeUpdate(std::span<const u32> ccb) {
|
||||||
const auto* indirect_buffer = reinterpret_cast<const PM4CmdIndirectBuffer*>(header);
|
const auto* indirect_buffer = reinterpret_cast<const PM4CmdIndirectBuffer*>(header);
|
||||||
auto task =
|
auto task =
|
||||||
ProcessCeUpdate({indirect_buffer->Address<const u32>(), indirect_buffer->ib_size});
|
ProcessCeUpdate({indirect_buffer->Address<const u32>(), indirect_buffer->ib_size});
|
||||||
while (!task.handle.done()) {
|
RESUME_CE(task);
|
||||||
task.handle.resume();
|
|
||||||
|
|
||||||
TracyFiberLeave;
|
while (!task.handle.done()) {
|
||||||
co_yield {};
|
YIELD_CE();
|
||||||
TracyFiberEnter(ccb_task_name);
|
RESUME_CE(task);
|
||||||
};
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
default:
|
default:
|
||||||
|
@ -182,11 +206,11 @@ Liverpool::Task Liverpool::ProcessCeUpdate(std::span<const u32> ccb) {
|
||||||
ccb = NextPacket(ccb, header->type3.NumWords() + 1);
|
ccb = NextPacket(ccb, header->type3.NumWords() + 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
TracyFiberLeave;
|
FIBER_EXIT;
|
||||||
}
|
}
|
||||||
|
|
||||||
Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<const u32> ccb) {
|
Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<const u32> ccb) {
|
||||||
TracyFiberEnter(dcb_task_name);
|
FIBER_ENTER(dcb_task_name);
|
||||||
|
|
||||||
cblock.Reset();
|
cblock.Reset();
|
||||||
|
|
||||||
|
@ -197,9 +221,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
|
||||||
if (!ccb.empty()) {
|
if (!ccb.empty()) {
|
||||||
// In case of CCB provided kick off CE asap to have the constant heap ready to use
|
// In case of CCB provided kick off CE asap to have the constant heap ready to use
|
||||||
ce_task = ProcessCeUpdate(ccb);
|
ce_task = ProcessCeUpdate(ccb);
|
||||||
TracyFiberLeave;
|
RESUME_GFX(ce_task);
|
||||||
ce_task.handle.resume();
|
|
||||||
TracyFiberEnter(dcb_task_name);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const auto base_addr = reinterpret_cast<uintptr_t>(dcb.data());
|
const auto base_addr = reinterpret_cast<uintptr_t>(dcb.data());
|
||||||
|
@ -353,8 +375,18 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
|
||||||
}
|
}
|
||||||
case PM4ItOpcode::SetShReg: {
|
case PM4ItOpcode::SetShReg: {
|
||||||
const auto* set_data = reinterpret_cast<const PM4CmdSetData*>(header);
|
const auto* set_data = reinterpret_cast<const PM4CmdSetData*>(header);
|
||||||
std::memcpy(®s.reg_array[ShRegWordOffset + set_data->reg_offset], header + 2,
|
const auto set_size = (count - 1) * sizeof(u32);
|
||||||
(count - 1) * sizeof(u32));
|
|
||||||
|
if (set_data->reg_offset >= 0x200 &&
|
||||||
|
set_data->reg_offset <= (0x200 + sizeof(ComputeProgram) / 4)) {
|
||||||
|
ASSERT(set_size <= sizeof(ComputeProgram));
|
||||||
|
auto* addr = reinterpret_cast<u32*>(&mapped_queues[GfxQueueId].cs_state) +
|
||||||
|
(set_data->reg_offset - 0x200);
|
||||||
|
std::memcpy(addr, header + 2, set_size);
|
||||||
|
} else {
|
||||||
|
std::memcpy(®s.reg_array[ShRegWordOffset + set_data->reg_offset], header + 2,
|
||||||
|
set_size);
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case PM4ItOpcode::SetUconfigReg: {
|
case PM4ItOpcode::SetUconfigReg: {
|
||||||
|
@ -474,15 +506,16 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
|
||||||
}
|
}
|
||||||
case PM4ItOpcode::DispatchDirect: {
|
case PM4ItOpcode::DispatchDirect: {
|
||||||
const auto* dispatch_direct = reinterpret_cast<const PM4CmdDispatchDirect*>(header);
|
const auto* dispatch_direct = reinterpret_cast<const PM4CmdDispatchDirect*>(header);
|
||||||
regs.cs_program.dim_x = dispatch_direct->dim_x;
|
auto& cs_program = GetCsRegs();
|
||||||
regs.cs_program.dim_y = dispatch_direct->dim_y;
|
cs_program.dim_x = dispatch_direct->dim_x;
|
||||||
regs.cs_program.dim_z = dispatch_direct->dim_z;
|
cs_program.dim_y = dispatch_direct->dim_y;
|
||||||
regs.cs_program.dispatch_initiator = dispatch_direct->dispatch_initiator;
|
cs_program.dim_z = dispatch_direct->dim_z;
|
||||||
|
cs_program.dispatch_initiator = dispatch_direct->dispatch_initiator;
|
||||||
if (DebugState.DumpingCurrentReg()) {
|
if (DebugState.DumpingCurrentReg()) {
|
||||||
DebugState.PushRegsDump(base_addr, reinterpret_cast<uintptr_t>(header), regs,
|
DebugState.PushRegsDumpCompute(base_addr, reinterpret_cast<uintptr_t>(header),
|
||||||
true);
|
cs_program);
|
||||||
}
|
}
|
||||||
if (rasterizer && (regs.cs_program.dispatch_initiator & 1)) {
|
if (rasterizer && (cs_program.dispatch_initiator & 1)) {
|
||||||
const auto cmd_address = reinterpret_cast<const void*>(header);
|
const auto cmd_address = reinterpret_cast<const void*>(header);
|
||||||
rasterizer->ScopeMarkerBegin(fmt::format("dcb:{}:Dispatch", cmd_address));
|
rasterizer->ScopeMarkerBegin(fmt::format("dcb:{}:Dispatch", cmd_address));
|
||||||
rasterizer->DispatchDirect();
|
rasterizer->DispatchDirect();
|
||||||
|
@ -493,14 +526,15 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
|
||||||
case PM4ItOpcode::DispatchIndirect: {
|
case PM4ItOpcode::DispatchIndirect: {
|
||||||
const auto* dispatch_indirect =
|
const auto* dispatch_indirect =
|
||||||
reinterpret_cast<const PM4CmdDispatchIndirect*>(header);
|
reinterpret_cast<const PM4CmdDispatchIndirect*>(header);
|
||||||
|
auto& cs_program = GetCsRegs();
|
||||||
const auto offset = dispatch_indirect->data_offset;
|
const auto offset = dispatch_indirect->data_offset;
|
||||||
const auto ib_address = mapped_queues[GfxQueueId].indirect_args_addr;
|
const auto ib_address = mapped_queues[GfxQueueId].indirect_args_addr;
|
||||||
const auto size = sizeof(PM4CmdDispatchIndirect::GroupDimensions);
|
const auto size = sizeof(PM4CmdDispatchIndirect::GroupDimensions);
|
||||||
if (DebugState.DumpingCurrentReg()) {
|
if (DebugState.DumpingCurrentReg()) {
|
||||||
DebugState.PushRegsDump(base_addr, reinterpret_cast<uintptr_t>(header), regs,
|
DebugState.PushRegsDumpCompute(base_addr, reinterpret_cast<uintptr_t>(header),
|
||||||
true);
|
cs_program);
|
||||||
}
|
}
|
||||||
if (rasterizer && (regs.cs_program.dispatch_initiator & 1)) {
|
if (rasterizer && (cs_program.dispatch_initiator & 1)) {
|
||||||
const auto cmd_address = reinterpret_cast<const void*>(header);
|
const auto cmd_address = reinterpret_cast<const void*>(header);
|
||||||
rasterizer->ScopeMarkerBegin(
|
rasterizer->ScopeMarkerBegin(
|
||||||
fmt::format("dcb:{}:DispatchIndirect", cmd_address));
|
fmt::format("dcb:{}:DispatchIndirect", cmd_address));
|
||||||
|
@ -613,11 +647,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
|
||||||
case PM4ItOpcode::Rewind: {
|
case PM4ItOpcode::Rewind: {
|
||||||
const PM4CmdRewind* rewind = reinterpret_cast<const PM4CmdRewind*>(header);
|
const PM4CmdRewind* rewind = reinterpret_cast<const PM4CmdRewind*>(header);
|
||||||
while (!rewind->Valid()) {
|
while (!rewind->Valid()) {
|
||||||
mapped_queues[GfxQueueId].cs_state = regs.cs_program;
|
YIELD_GFX();
|
||||||
TracyFiberLeave;
|
|
||||||
co_yield {};
|
|
||||||
TracyFiberEnter(dcb_task_name);
|
|
||||||
regs.cs_program = mapped_queues[GfxQueueId].cs_state;
|
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -633,11 +663,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
|
||||||
vo_port->WaitVoLabel([&] { return wait_reg_mem->Test(); });
|
vo_port->WaitVoLabel([&] { return wait_reg_mem->Test(); });
|
||||||
}
|
}
|
||||||
while (!wait_reg_mem->Test()) {
|
while (!wait_reg_mem->Test()) {
|
||||||
mapped_queues[GfxQueueId].cs_state = regs.cs_program;
|
YIELD_GFX();
|
||||||
TracyFiberLeave;
|
|
||||||
co_yield {};
|
|
||||||
TracyFiberEnter(dcb_task_name);
|
|
||||||
regs.cs_program = mapped_queues[GfxQueueId].cs_state;
|
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -645,13 +671,12 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
|
||||||
const auto* indirect_buffer = reinterpret_cast<const PM4CmdIndirectBuffer*>(header);
|
const auto* indirect_buffer = reinterpret_cast<const PM4CmdIndirectBuffer*>(header);
|
||||||
auto task = ProcessGraphics(
|
auto task = ProcessGraphics(
|
||||||
{indirect_buffer->Address<const u32>(), indirect_buffer->ib_size}, {});
|
{indirect_buffer->Address<const u32>(), indirect_buffer->ib_size}, {});
|
||||||
while (!task.handle.done()) {
|
RESUME_GFX(task);
|
||||||
task.handle.resume();
|
|
||||||
|
|
||||||
TracyFiberLeave;
|
while (!task.handle.done()) {
|
||||||
co_yield {};
|
YIELD_GFX();
|
||||||
TracyFiberEnter(dcb_task_name);
|
RESUME_GFX(task);
|
||||||
};
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case PM4ItOpcode::IncrementDeCounter: {
|
case PM4ItOpcode::IncrementDeCounter: {
|
||||||
|
@ -660,9 +685,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
|
||||||
}
|
}
|
||||||
case PM4ItOpcode::WaitOnCeCounter: {
|
case PM4ItOpcode::WaitOnCeCounter: {
|
||||||
while (cblock.ce_count <= cblock.de_count) {
|
while (cblock.ce_count <= cblock.de_count) {
|
||||||
TracyFiberLeave;
|
RESUME_GFX(ce_task);
|
||||||
ce_task.handle.resume();
|
|
||||||
TracyFiberEnter(dcb_task_name);
|
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -686,11 +709,13 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
|
||||||
ce_task.handle.destroy();
|
ce_task.handle.destroy();
|
||||||
}
|
}
|
||||||
|
|
||||||
TracyFiberLeave;
|
FIBER_EXIT;
|
||||||
}
|
}
|
||||||
|
|
||||||
Liverpool::Task Liverpool::ProcessCompute(std::span<const u32> acb, int vqid) {
|
template <bool is_indirect>
|
||||||
TracyFiberEnter(acb_task_name);
|
Liverpool::Task Liverpool::ProcessCompute(std::span<const u32> acb, u32 vqid) {
|
||||||
|
FIBER_ENTER(acb_task_name[vqid]);
|
||||||
|
const auto& queue = asc_queues[{vqid}];
|
||||||
|
|
||||||
auto base_addr = reinterpret_cast<uintptr_t>(acb.data());
|
auto base_addr = reinterpret_cast<uintptr_t>(acb.data());
|
||||||
while (!acb.empty()) {
|
while (!acb.empty()) {
|
||||||
|
@ -711,15 +736,14 @@ Liverpool::Task Liverpool::ProcessCompute(std::span<const u32> acb, int vqid) {
|
||||||
}
|
}
|
||||||
case PM4ItOpcode::IndirectBuffer: {
|
case PM4ItOpcode::IndirectBuffer: {
|
||||||
const auto* indirect_buffer = reinterpret_cast<const PM4CmdIndirectBuffer*>(header);
|
const auto* indirect_buffer = reinterpret_cast<const PM4CmdIndirectBuffer*>(header);
|
||||||
auto task = ProcessCompute(
|
auto task = ProcessCompute<true>(
|
||||||
{indirect_buffer->Address<const u32>(), indirect_buffer->ib_size}, vqid);
|
{indirect_buffer->Address<const u32>(), indirect_buffer->ib_size}, vqid);
|
||||||
while (!task.handle.done()) {
|
RESUME_ASC(task, vqid);
|
||||||
task.handle.resume();
|
|
||||||
|
|
||||||
TracyFiberLeave;
|
while (!task.handle.done()) {
|
||||||
co_yield {};
|
YIELD_ASC(vqid);
|
||||||
TracyFiberEnter(acb_task_name);
|
RESUME_ASC(task, vqid);
|
||||||
};
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case PM4ItOpcode::DmaData: {
|
case PM4ItOpcode::DmaData: {
|
||||||
|
@ -757,30 +781,38 @@ Liverpool::Task Liverpool::ProcessCompute(std::span<const u32> acb, int vqid) {
|
||||||
case PM4ItOpcode::Rewind: {
|
case PM4ItOpcode::Rewind: {
|
||||||
const PM4CmdRewind* rewind = reinterpret_cast<const PM4CmdRewind*>(header);
|
const PM4CmdRewind* rewind = reinterpret_cast<const PM4CmdRewind*>(header);
|
||||||
while (!rewind->Valid()) {
|
while (!rewind->Valid()) {
|
||||||
mapped_queues[vqid].cs_state = regs.cs_program;
|
YIELD_ASC(vqid);
|
||||||
TracyFiberLeave;
|
|
||||||
co_yield {};
|
|
||||||
TracyFiberEnter(acb_task_name);
|
|
||||||
regs.cs_program = mapped_queues[vqid].cs_state;
|
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case PM4ItOpcode::SetShReg: {
|
case PM4ItOpcode::SetShReg: {
|
||||||
const auto* set_data = reinterpret_cast<const PM4CmdSetData*>(header);
|
const auto* set_data = reinterpret_cast<const PM4CmdSetData*>(header);
|
||||||
std::memcpy(®s.reg_array[ShRegWordOffset + set_data->reg_offset], header + 2,
|
const auto set_size = (count - 1) * sizeof(u32);
|
||||||
(count - 1) * sizeof(u32));
|
|
||||||
|
if (set_data->reg_offset >= 0x200 &&
|
||||||
|
set_data->reg_offset <= (0x200 + sizeof(ComputeProgram) / 4)) {
|
||||||
|
ASSERT(set_size <= sizeof(ComputeProgram));
|
||||||
|
auto* addr = reinterpret_cast<u32*>(&mapped_queues[vqid + 1].cs_state) +
|
||||||
|
(set_data->reg_offset - 0x200);
|
||||||
|
std::memcpy(addr, header + 2, set_size);
|
||||||
|
} else {
|
||||||
|
std::memcpy(®s.reg_array[ShRegWordOffset + set_data->reg_offset], header + 2,
|
||||||
|
set_size);
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case PM4ItOpcode::DispatchDirect: {
|
case PM4ItOpcode::DispatchDirect: {
|
||||||
const auto* dispatch_direct = reinterpret_cast<const PM4CmdDispatchDirect*>(header);
|
const auto* dispatch_direct = reinterpret_cast<const PM4CmdDispatchDirect*>(header);
|
||||||
regs.cs_program.dim_x = dispatch_direct->dim_x;
|
auto& cs_program = GetCsRegs();
|
||||||
regs.cs_program.dim_y = dispatch_direct->dim_y;
|
cs_program.dim_x = dispatch_direct->dim_x;
|
||||||
regs.cs_program.dim_z = dispatch_direct->dim_z;
|
cs_program.dim_y = dispatch_direct->dim_y;
|
||||||
regs.cs_program.dispatch_initiator = dispatch_direct->dispatch_initiator;
|
cs_program.dim_z = dispatch_direct->dim_z;
|
||||||
|
cs_program.dispatch_initiator = dispatch_direct->dispatch_initiator;
|
||||||
if (DebugState.DumpingCurrentReg()) {
|
if (DebugState.DumpingCurrentReg()) {
|
||||||
DebugState.PushRegsDump(base_addr, reinterpret_cast<uintptr_t>(header), regs, true);
|
DebugState.PushRegsDumpCompute(base_addr, reinterpret_cast<uintptr_t>(header),
|
||||||
|
cs_program);
|
||||||
}
|
}
|
||||||
if (rasterizer && (regs.cs_program.dispatch_initiator & 1)) {
|
if (rasterizer && (cs_program.dispatch_initiator & 1)) {
|
||||||
const auto cmd_address = reinterpret_cast<const void*>(header);
|
const auto cmd_address = reinterpret_cast<const void*>(header);
|
||||||
rasterizer->ScopeMarkerBegin(fmt::format("acb[{}]:{}:Dispatch", vqid, cmd_address));
|
rasterizer->ScopeMarkerBegin(fmt::format("acb[{}]:{}:Dispatch", vqid, cmd_address));
|
||||||
rasterizer->DispatchDirect();
|
rasterizer->DispatchDirect();
|
||||||
|
@ -803,17 +835,13 @@ Liverpool::Task Liverpool::ProcessCompute(std::span<const u32> acb, int vqid) {
|
||||||
const auto* wait_reg_mem = reinterpret_cast<const PM4CmdWaitRegMem*>(header);
|
const auto* wait_reg_mem = reinterpret_cast<const PM4CmdWaitRegMem*>(header);
|
||||||
ASSERT(wait_reg_mem->engine.Value() == PM4CmdWaitRegMem::Engine::Me);
|
ASSERT(wait_reg_mem->engine.Value() == PM4CmdWaitRegMem::Engine::Me);
|
||||||
while (!wait_reg_mem->Test()) {
|
while (!wait_reg_mem->Test()) {
|
||||||
mapped_queues[vqid].cs_state = regs.cs_program;
|
YIELD_ASC(vqid);
|
||||||
TracyFiberLeave;
|
|
||||||
co_yield {};
|
|
||||||
TracyFiberEnter(acb_task_name);
|
|
||||||
regs.cs_program = mapped_queues[vqid].cs_state;
|
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case PM4ItOpcode::ReleaseMem: {
|
case PM4ItOpcode::ReleaseMem: {
|
||||||
const auto* release_mem = reinterpret_cast<const PM4CmdReleaseMem*>(header);
|
const auto* release_mem = reinterpret_cast<const PM4CmdReleaseMem*>(header);
|
||||||
release_mem->SignalFence(Platform::InterruptId::Compute0RelMem); // <---
|
release_mem->SignalFence(static_cast<Platform::InterruptId>(queue.pipe_id));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
default:
|
default:
|
||||||
|
@ -821,10 +849,16 @@ Liverpool::Task Liverpool::ProcessCompute(std::span<const u32> acb, int vqid) {
|
||||||
static_cast<u32>(opcode), count);
|
static_cast<u32>(opcode), count);
|
||||||
}
|
}
|
||||||
|
|
||||||
acb = NextPacket(acb, header->type3.NumWords() + 1);
|
const auto packet_size_dw = header->type3.NumWords() + 1;
|
||||||
|
acb = NextPacket(acb, packet_size_dw);
|
||||||
|
|
||||||
|
if constexpr (!is_indirect) {
|
||||||
|
*queue.read_addr += packet_size_dw;
|
||||||
|
*queue.read_addr %= queue.ring_size_dw;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
TracyFiberLeave;
|
FIBER_EXIT;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::pair<std::span<const u32>, std::span<const u32>> Liverpool::CopyCmdBuffers(
|
std::pair<std::span<const u32>, std::span<const u32>> Liverpool::CopyCmdBuffers(
|
||||||
|
@ -881,10 +915,11 @@ void Liverpool::SubmitGfx(std::span<const u32> dcb, std::span<const u32> ccb) {
|
||||||
submit_cv.notify_one();
|
submit_cv.notify_one();
|
||||||
}
|
}
|
||||||
|
|
||||||
void Liverpool::SubmitAsc(u32 vqid, std::span<const u32> acb) {
|
void Liverpool::SubmitAsc(u32 gnm_vqid, std::span<const u32> acb) {
|
||||||
ASSERT_MSG(vqid >= 0 && vqid < NumTotalQueues, "Invalid virtual ASC queue index");
|
ASSERT_MSG(gnm_vqid > 0 && gnm_vqid < NumTotalQueues, "Invalid virtual ASC queue index");
|
||||||
auto& queue = mapped_queues[vqid];
|
auto& queue = mapped_queues[gnm_vqid];
|
||||||
|
|
||||||
|
const auto vqid = gnm_vqid - 1;
|
||||||
const auto& task = ProcessCompute(acb, vqid);
|
const auto& task = ProcessCompute(acb, vqid);
|
||||||
{
|
{
|
||||||
std::scoped_lock lock{queue.m_access};
|
std::scoped_lock lock{queue.m_access};
|
||||||
|
@ -892,6 +927,7 @@ void Liverpool::SubmitAsc(u32 vqid, std::span<const u32> acb) {
|
||||||
}
|
}
|
||||||
|
|
||||||
std::scoped_lock lk{submit_mutex};
|
std::scoped_lock lk{submit_mutex};
|
||||||
|
num_mapped_queues = std::max(num_mapped_queues, gnm_vqid + 1);
|
||||||
++num_submits;
|
++num_submits;
|
||||||
submit_cv.notify_one();
|
submit_cv.notify_one();
|
||||||
}
|
}
|
||||||
|
|
|
@ -16,6 +16,7 @@
|
||||||
#include "common/assert.h"
|
#include "common/assert.h"
|
||||||
#include "common/bit_field.h"
|
#include "common/bit_field.h"
|
||||||
#include "common/polyfill_thread.h"
|
#include "common/polyfill_thread.h"
|
||||||
|
#include "common/slot_vector.h"
|
||||||
#include "common/types.h"
|
#include "common/types.h"
|
||||||
#include "common/unique_function.h"
|
#include "common/unique_function.h"
|
||||||
#include "shader_recompiler/params.h"
|
#include "shader_recompiler/params.h"
|
||||||
|
@ -45,7 +46,8 @@ struct Liverpool {
|
||||||
static constexpr u32 NumGfxRings = 1u; // actually 2, but HP is reserved by system software
|
static constexpr u32 NumGfxRings = 1u; // actually 2, but HP is reserved by system software
|
||||||
static constexpr u32 NumComputePipes = 7u; // actually 8, but #7 is reserved by system software
|
static constexpr u32 NumComputePipes = 7u; // actually 8, but #7 is reserved by system software
|
||||||
static constexpr u32 NumQueuesPerPipe = 8u;
|
static constexpr u32 NumQueuesPerPipe = 8u;
|
||||||
static constexpr u32 NumTotalQueues = NumGfxRings + (NumComputePipes * NumQueuesPerPipe);
|
static constexpr u32 NumComputeRings = NumComputePipes * NumQueuesPerPipe;
|
||||||
|
static constexpr u32 NumTotalQueues = NumGfxRings + NumComputeRings;
|
||||||
static_assert(NumTotalQueues < 64u); // need to fit into u64 bitmap for ffs
|
static_assert(NumTotalQueues < 64u); // need to fit into u64 bitmap for ffs
|
||||||
|
|
||||||
static constexpr u32 NumColorBuffers = 8;
|
static constexpr u32 NumColorBuffers = 8;
|
||||||
|
@ -1143,7 +1145,7 @@ struct Liverpool {
|
||||||
INSERT_PADDING_WORDS(0x2D48 - 0x2d08 - 20);
|
INSERT_PADDING_WORDS(0x2D48 - 0x2d08 - 20);
|
||||||
ShaderProgram ls_program;
|
ShaderProgram ls_program;
|
||||||
INSERT_PADDING_WORDS(0xA4);
|
INSERT_PADDING_WORDS(0xA4);
|
||||||
ComputeProgram cs_program;
|
ComputeProgram cs_program; // shadowed by `cs_state` in `mapped_queues`
|
||||||
INSERT_PADDING_WORDS(0xA008 - 0x2E00 - 80 - 3 - 5);
|
INSERT_PADDING_WORDS(0xA008 - 0x2E00 - 80 - 3 - 5);
|
||||||
DepthRenderControl depth_render_control;
|
DepthRenderControl depth_render_control;
|
||||||
INSERT_PADDING_WORDS(1);
|
INSERT_PADDING_WORDS(1);
|
||||||
|
@ -1298,7 +1300,7 @@ public:
|
||||||
~Liverpool();
|
~Liverpool();
|
||||||
|
|
||||||
void SubmitGfx(std::span<const u32> dcb, std::span<const u32> ccb);
|
void SubmitGfx(std::span<const u32> dcb, std::span<const u32> ccb);
|
||||||
void SubmitAsc(u32 vqid, std::span<const u32> acb);
|
void SubmitAsc(u32 gnm_vqid, std::span<const u32> acb);
|
||||||
|
|
||||||
void SubmitDone() noexcept {
|
void SubmitDone() noexcept {
|
||||||
std::scoped_lock lk{submit_mutex};
|
std::scoped_lock lk{submit_mutex};
|
||||||
|
@ -1341,6 +1343,18 @@ public:
|
||||||
gfx_queue.dcb_buffer.reserve(GfxReservedSize);
|
gfx_queue.dcb_buffer.reserve(GfxReservedSize);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
inline ComputeProgram& GetCsRegs() {
|
||||||
|
return mapped_queues[curr_qid].cs_state;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct AscQueueInfo {
|
||||||
|
VAddr map_addr;
|
||||||
|
u32* read_addr;
|
||||||
|
u32 ring_size_dw;
|
||||||
|
u32 pipe_id;
|
||||||
|
};
|
||||||
|
Common::SlotVector<AscQueueInfo> asc_queues{};
|
||||||
|
|
||||||
private:
|
private:
|
||||||
struct Task {
|
struct Task {
|
||||||
struct promise_type {
|
struct promise_type {
|
||||||
|
@ -1378,7 +1392,8 @@ private:
|
||||||
std::span<const u32> ccb);
|
std::span<const u32> ccb);
|
||||||
Task ProcessGraphics(std::span<const u32> dcb, std::span<const u32> ccb);
|
Task ProcessGraphics(std::span<const u32> dcb, std::span<const u32> ccb);
|
||||||
Task ProcessCeUpdate(std::span<const u32> ccb);
|
Task ProcessCeUpdate(std::span<const u32> ccb);
|
||||||
Task ProcessCompute(std::span<const u32> acb, int vqid);
|
template <bool is_indirect = false>
|
||||||
|
Task ProcessCompute(std::span<const u32> acb, u32 vqid);
|
||||||
|
|
||||||
void Process(std::stop_token stoken);
|
void Process(std::stop_token stoken);
|
||||||
|
|
||||||
|
@ -1393,6 +1408,7 @@ private:
|
||||||
VAddr indirect_args_addr{};
|
VAddr indirect_args_addr{};
|
||||||
};
|
};
|
||||||
std::array<GpuQueue, NumTotalQueues> mapped_queues{};
|
std::array<GpuQueue, NumTotalQueues> mapped_queues{};
|
||||||
|
u32 num_mapped_queues{1u}; // GFX is always available
|
||||||
|
|
||||||
struct ConstantEngine {
|
struct ConstantEngine {
|
||||||
void Reset() {
|
void Reset() {
|
||||||
|
@ -1421,6 +1437,7 @@ private:
|
||||||
std::mutex submit_mutex;
|
std::mutex submit_mutex;
|
||||||
std::condition_variable_any submit_cv;
|
std::condition_variable_any submit_cv;
|
||||||
std::queue<Common::UniqueFunction<void>> command_queue{};
|
std::queue<Common::UniqueFunction<void>> command_queue{};
|
||||||
|
int curr_qid{-1};
|
||||||
};
|
};
|
||||||
|
|
||||||
static_assert(GFX6_3D_REG_INDEX(ps_program) == 0x2C08);
|
static_assert(GFX6_3D_REG_INDEX(ps_program) == 0x2C08);
|
||||||
|
|
|
@ -173,9 +173,9 @@ Shader::RuntimeInfo PipelineCache::BuildRuntimeInfo(Stage stage, LogicalStage l_
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case Stage::Compute: {
|
case Stage::Compute: {
|
||||||
const auto& cs_pgm = regs.cs_program;
|
const auto& cs_pgm = liverpool->GetCsRegs();
|
||||||
info.num_user_data = cs_pgm.settings.num_user_regs;
|
info.num_user_data = cs_pgm.settings.num_user_regs;
|
||||||
info.num_allocated_vgprs = regs.cs_program.settings.num_vgprs * 4;
|
info.num_allocated_vgprs = cs_pgm.settings.num_vgprs * 4;
|
||||||
info.cs_info.workgroup_size = {cs_pgm.num_thread_x.full, cs_pgm.num_thread_y.full,
|
info.cs_info.workgroup_size = {cs_pgm.num_thread_x.full, cs_pgm.num_thread_y.full,
|
||||||
cs_pgm.num_thread_z.full};
|
cs_pgm.num_thread_z.full};
|
||||||
info.cs_info.tgid_enable = {cs_pgm.IsTgidEnabled(0), cs_pgm.IsTgidEnabled(1),
|
info.cs_info.tgid_enable = {cs_pgm.IsTgidEnabled(0), cs_pgm.IsTgidEnabled(1),
|
||||||
|
@ -476,8 +476,8 @@ bool PipelineCache::RefreshGraphicsKey() {
|
||||||
|
|
||||||
bool PipelineCache::RefreshComputeKey() {
|
bool PipelineCache::RefreshComputeKey() {
|
||||||
Shader::Backend::Bindings binding{};
|
Shader::Backend::Bindings binding{};
|
||||||
const auto* cs_pgm = &liverpool->regs.cs_program;
|
const auto& cs_pgm = liverpool->GetCsRegs();
|
||||||
const auto cs_params = Liverpool::GetParams(*cs_pgm);
|
const auto cs_params = Liverpool::GetParams(cs_pgm);
|
||||||
std::tie(infos[0], modules[0], fetch_shader, compute_key.value) =
|
std::tie(infos[0], modules[0], fetch_shader, compute_key.value) =
|
||||||
GetProgram(Shader::Stage::Compute, LogicalStage::Compute, cs_params, binding);
|
GetProgram(Shader::Stage::Compute, LogicalStage::Compute, cs_params, binding);
|
||||||
return true;
|
return true;
|
||||||
|
@ -529,6 +529,7 @@ PipelineCache::Result PipelineCache::GetProgram(Stage stage, LogicalStage l_stag
|
||||||
return std::make_tuple(&program->info, module, spec.fetch_shader_data,
|
return std::make_tuple(&program->info, module, spec.fetch_shader_data,
|
||||||
HashCombine(params.hash, 0));
|
HashCombine(params.hash, 0));
|
||||||
}
|
}
|
||||||
|
it_pgm.value()->info.user_data = params.user_data;
|
||||||
|
|
||||||
auto& program = it_pgm.value();
|
auto& program = it_pgm.value();
|
||||||
auto& info = program->info;
|
auto& info = program->info;
|
||||||
|
|
|
@ -317,14 +317,14 @@ void Rasterizer::DrawIndirect(bool is_indexed, VAddr arg_address, u32 offset, u3
|
||||||
void Rasterizer::DispatchDirect() {
|
void Rasterizer::DispatchDirect() {
|
||||||
RENDERER_TRACE;
|
RENDERER_TRACE;
|
||||||
|
|
||||||
const auto& cs_program = liverpool->regs.cs_program;
|
const auto& cs_program = liverpool->GetCsRegs();
|
||||||
const ComputePipeline* pipeline = pipeline_cache.GetComputePipeline();
|
const ComputePipeline* pipeline = pipeline_cache.GetComputePipeline();
|
||||||
if (!pipeline) {
|
if (!pipeline) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
const auto& cs = pipeline->GetStage(Shader::LogicalStage::Compute);
|
const auto& cs = pipeline->GetStage(Shader::LogicalStage::Compute);
|
||||||
if (ExecuteShaderHLE(cs, liverpool->regs, *this)) {
|
if (ExecuteShaderHLE(cs, liverpool->regs, cs_program, *this)) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -344,7 +344,7 @@ void Rasterizer::DispatchDirect() {
|
||||||
void Rasterizer::DispatchIndirect(VAddr address, u32 offset, u32 size) {
|
void Rasterizer::DispatchIndirect(VAddr address, u32 offset, u32 size) {
|
||||||
RENDERER_TRACE;
|
RENDERER_TRACE;
|
||||||
|
|
||||||
const auto& cs_program = liverpool->regs.cs_program;
|
const auto& cs_program = liverpool->GetCsRegs();
|
||||||
const ComputePipeline* pipeline = pipeline_cache.GetComputePipeline();
|
const ComputePipeline* pipeline = pipeline_cache.GetComputePipeline();
|
||||||
if (!pipeline) {
|
if (!pipeline) {
|
||||||
return;
|
return;
|
||||||
|
|
|
@ -2,17 +2,19 @@
|
||||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
#include "shader_recompiler/info.h"
|
#include "shader_recompiler/info.h"
|
||||||
|
#include "video_core/renderer_vulkan/vk_rasterizer.h"
|
||||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||||
#include "video_core/renderer_vulkan/vk_shader_hle.h"
|
#include "video_core/renderer_vulkan/vk_shader_hle.h"
|
||||||
|
|
||||||
#include "vk_rasterizer.h"
|
extern std::unique_ptr<AmdGpu::Liverpool> liverpool;
|
||||||
|
|
||||||
namespace Vulkan {
|
namespace Vulkan {
|
||||||
|
|
||||||
static constexpr u64 COPY_SHADER_HASH = 0xfefebf9f;
|
static constexpr u64 COPY_SHADER_HASH = 0xfefebf9f;
|
||||||
|
|
||||||
bool ExecuteCopyShaderHLE(const Shader::Info& info, const AmdGpu::Liverpool::Regs& regs,
|
static bool ExecuteCopyShaderHLE(const Shader::Info& info,
|
||||||
Rasterizer& rasterizer) {
|
const AmdGpu::Liverpool::ComputeProgram& cs_program,
|
||||||
|
Rasterizer& rasterizer) {
|
||||||
auto& scheduler = rasterizer.GetScheduler();
|
auto& scheduler = rasterizer.GetScheduler();
|
||||||
auto& buffer_cache = rasterizer.GetBufferCache();
|
auto& buffer_cache = rasterizer.GetBufferCache();
|
||||||
|
|
||||||
|
@ -34,9 +36,9 @@ bool ExecuteCopyShaderHLE(const Shader::Info& info, const AmdGpu::Liverpool::Reg
|
||||||
|
|
||||||
static std::vector<vk::BufferCopy> copies;
|
static std::vector<vk::BufferCopy> copies;
|
||||||
copies.clear();
|
copies.clear();
|
||||||
copies.reserve(regs.cs_program.dim_x);
|
copies.reserve(cs_program.dim_x);
|
||||||
|
|
||||||
for (u32 i = 0; i < regs.cs_program.dim_x; i++) {
|
for (u32 i = 0; i < cs_program.dim_x; i++) {
|
||||||
const auto& [dst_idx, src_idx, end] = ctl_buf[i];
|
const auto& [dst_idx, src_idx, end] = ctl_buf[i];
|
||||||
const u32 local_dst_offset = dst_idx * buf_stride;
|
const u32 local_dst_offset = dst_idx * buf_stride;
|
||||||
const u32 local_src_offset = src_idx * buf_stride;
|
const u32 local_src_offset = src_idx * buf_stride;
|
||||||
|
@ -122,10 +124,10 @@ bool ExecuteCopyShaderHLE(const Shader::Info& info, const AmdGpu::Liverpool::Reg
|
||||||
}
|
}
|
||||||
|
|
||||||
bool ExecuteShaderHLE(const Shader::Info& info, const AmdGpu::Liverpool::Regs& regs,
|
bool ExecuteShaderHLE(const Shader::Info& info, const AmdGpu::Liverpool::Regs& regs,
|
||||||
Rasterizer& rasterizer) {
|
const AmdGpu::Liverpool::ComputeProgram& cs_program, Rasterizer& rasterizer) {
|
||||||
switch (info.pgm_hash) {
|
switch (info.pgm_hash) {
|
||||||
case COPY_SHADER_HASH:
|
case COPY_SHADER_HASH:
|
||||||
return ExecuteCopyShaderHLE(info, regs, rasterizer);
|
return ExecuteCopyShaderHLE(info, cs_program, rasterizer);
|
||||||
default:
|
default:
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
|
@ -15,6 +15,6 @@ class Rasterizer;
|
||||||
|
|
||||||
/// Attempts to execute a shader using HLE if possible.
|
/// Attempts to execute a shader using HLE if possible.
|
||||||
bool ExecuteShaderHLE(const Shader::Info& info, const AmdGpu::Liverpool::Regs& regs,
|
bool ExecuteShaderHLE(const Shader::Info& info, const AmdGpu::Liverpool::Regs& regs,
|
||||||
Rasterizer& rasterizer);
|
const AmdGpu::Liverpool::ComputeProgram& cs_program, Rasterizer& rasterizer);
|
||||||
|
|
||||||
} // namespace Vulkan
|
} // namespace Vulkan
|
||||||
|
|
|
@ -212,6 +212,7 @@ vk::Format DemoteImageFormatForDetiling(vk::Format format) {
|
||||||
case vk::Format::eBc7SrgbBlock:
|
case vk::Format::eBc7SrgbBlock:
|
||||||
case vk::Format::eBc7UnormBlock:
|
case vk::Format::eBc7UnormBlock:
|
||||||
case vk::Format::eBc6HUfloatBlock:
|
case vk::Format::eBc6HUfloatBlock:
|
||||||
|
case vk::Format::eR32G32B32A32Uint:
|
||||||
case vk::Format::eR32G32B32A32Sfloat:
|
case vk::Format::eR32G32B32A32Sfloat:
|
||||||
return vk::Format::eR32G32B32A32Uint;
|
return vk::Format::eR32G32B32A32Uint;
|
||||||
default:
|
default:
|
||||||
|
|
Loading…
Reference in a new issue