diff --git a/src/common/config.cpp b/src/common/config.cpp index 824a70da4..8d87ed3c3 100644 --- a/src/common/config.cpp +++ b/src/common/config.cpp @@ -22,6 +22,7 @@ static int specialPadClass = 1; static bool isDebugDump = false; static bool isShowSplash = false; static bool isNullGpu = false; +static bool shouldCopyGPUBuffers = false; static bool shouldDumpShaders = false; static bool shouldDumpPM4 = false; static u32 vblankDivider = 1; @@ -103,6 +104,10 @@ bool nullGpu() { return isNullGpu; } +bool copyGPUCmdBuffers() { + return shouldCopyGPUBuffers; +} + bool dumpShaders() { return shouldDumpShaders; } @@ -159,6 +164,10 @@ void setNullGpu(bool enable) { isNullGpu = enable; } +void setCopyGPUCmdBuffers(bool enable) { + shouldCopyGPUBuffers = enable; +} + void setDumpShaders(bool enable) { shouldDumpShaders = enable; } @@ -361,6 +370,7 @@ void load(const std::filesystem::path& path) { screenWidth = toml::find_or(gpu, "screenWidth", screenWidth); screenHeight = toml::find_or(gpu, "screenHeight", screenHeight); isNullGpu = toml::find_or(gpu, "nullGpu", false); + shouldCopyGPUBuffers = toml::find_or(gpu, "copyGPUBuffers", false); shouldDumpShaders = toml::find_or(gpu, "dumpShaders", false); shouldDumpPM4 = toml::find_or(gpu, "dumpPM4", false); vblankDivider = toml::find_or(gpu, "vblankDivider", 1); @@ -441,6 +451,7 @@ void save(const std::filesystem::path& path) { data["GPU"]["screenWidth"] = screenWidth; data["GPU"]["screenHeight"] = screenHeight; data["GPU"]["nullGpu"] = isNullGpu; + data["GPU"]["copyGPUBuffers"] = shouldCopyGPUBuffers; data["GPU"]["dumpShaders"] = shouldDumpShaders; data["GPU"]["dumpPM4"] = shouldDumpPM4; data["GPU"]["vblankDivider"] = vblankDivider; diff --git a/src/common/config.h b/src/common/config.h index f5140bc9b..11e7d8827 100644 --- a/src/common/config.h +++ b/src/common/config.h @@ -27,6 +27,7 @@ s32 getGpuId(); bool debugDump(); bool showSplash(); bool nullGpu(); +bool copyGPUCmdBuffers(); bool dumpShaders(); bool dumpPM4(); bool isRdocEnabled(); @@ -36,6 +37,7 @@ u32 vblankDiv(); void setDebugDump(bool enable); void setShowSplash(bool enable); void setNullGpu(bool enable); +void setCopyGPUCmdBuffers(bool enable); void setDumpShaders(bool enable); void setDumpPM4(bool enable); void setVblankDiv(u32 value); diff --git a/src/video_core/amdgpu/liverpool.cpp b/src/video_core/amdgpu/liverpool.cpp index 7e6ca14da..8570a2908 100644 --- a/src/video_core/amdgpu/liverpool.cpp +++ b/src/video_core/amdgpu/liverpool.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include "common/assert.h" +#include "common/config.h" #include "common/debug.h" #include "common/polyfill_thread.h" #include "common/thread.h" @@ -579,9 +580,43 @@ Liverpool::Task Liverpool::ProcessCompute(std::span acb, int vqid) { TracyFiberLeave; } +std::pair, std::span> Liverpool::CopyCmdBuffers( + std::span dcb, std::span ccb) { + auto& queue = mapped_queues[GfxQueueId]; + + queue.dcb_buffer.resize( + std::max(queue.dcb_buffer.size(), queue.dcb_buffer_offset + dcb.size())); + queue.ccb_buffer.resize( + std::max(queue.ccb_buffer.size(), queue.ccb_buffer_offset + ccb.size())); + + u32 prev_dcb_buffer_offset = queue.dcb_buffer_offset; + u32 prev_ccb_buffer_offset = queue.ccb_buffer_offset; + if (!dcb.empty()) { + std::memcpy(queue.dcb_buffer.data() + queue.dcb_buffer_offset, dcb.data(), + dcb.size_bytes()); + queue.dcb_buffer_offset += dcb.size(); + dcb = std::span{queue.dcb_buffer.begin() + prev_dcb_buffer_offset, + queue.dcb_buffer.begin() + queue.dcb_buffer_offset}; + } + + if (!ccb.empty()) { + std::memcpy(queue.ccb_buffer.data() + queue.ccb_buffer_offset, ccb.data(), + ccb.size_bytes()); + queue.ccb_buffer_offset += ccb.size(); + ccb = std::span{queue.ccb_buffer.begin() + prev_ccb_buffer_offset, + queue.ccb_buffer.begin() + queue.ccb_buffer_offset}; + } + + return std::make_pair(dcb, ccb); +} + void Liverpool::SubmitGfx(std::span dcb, std::span ccb) { auto& queue = mapped_queues[GfxQueueId]; + if (Config::copyGPUCmdBuffers()) { + std::tie(dcb, ccb) = CopyCmdBuffers(dcb, ccb); + } + auto task = ProcessGraphics(dcb, ccb); { std::scoped_lock lock{queue.m_access}; diff --git a/src/video_core/amdgpu/liverpool.h b/src/video_core/amdgpu/liverpool.h index 2806f3308..14284bbc6 100644 --- a/src/video_core/amdgpu/liverpool.h +++ b/src/video_core/amdgpu/liverpool.h @@ -10,6 +10,7 @@ #include #include #include +#include #include #include "common/assert.h" @@ -1047,6 +1048,8 @@ public: void SubmitDone() noexcept { std::scoped_lock lk{submit_mutex}; + mapped_queues[GfxQueueId].ccb_buffer_offset = 0; + mapped_queues[GfxQueueId].dcb_buffer_offset = 0; submit_done = true; submit_cv.notify_one(); } @@ -1108,6 +1111,8 @@ private: Handle handle; }; + std::pair, std::span> CopyCmdBuffers(std::span dcb, + std::span ccb); Task ProcessGraphics(std::span dcb, std::span ccb); Task ProcessCeUpdate(std::span ccb); Task ProcessCompute(std::span acb, int vqid); @@ -1116,6 +1121,10 @@ private: struct GpuQueue { std::mutex m_access{}; + std::atomic dcb_buffer_offset; + std::atomic ccb_buffer_offset; + std::vector dcb_buffer; + std::vector ccb_buffer; std::queue submits{}; ComputeProgram cs_state{}; };