From 17e438b58361c931165292b8a16004401cd69a17 Mon Sep 17 00:00:00 2001 From: Anton Kovalev Date: Tue, 27 Aug 2024 23:16:14 +0200 Subject: [PATCH 1/7] video_core: Added copyGPUCmdBuffers option --- src/common/config.cpp | 11 +++++++++ src/common/config.h | 2 ++ src/video_core/amdgpu/liverpool.cpp | 37 +++++++++++++++++++++++++++++ src/video_core/amdgpu/liverpool.h | 8 +++++++ 4 files changed, 58 insertions(+) diff --git a/src/common/config.cpp b/src/common/config.cpp index 9f55cbd40..04d67d44b 100644 --- a/src/common/config.cpp +++ b/src/common/config.cpp @@ -20,6 +20,7 @@ static std::string userName = "shadPS4"; static bool isDebugDump = false; static bool isShowSplash = false; static bool isNullGpu = false; +static bool shouldCopyGPUBuffers = false; static bool shouldDumpShaders = false; static bool shouldDumpPM4 = false; static u32 vblankDivider = 1; @@ -93,6 +94,10 @@ bool nullGpu() { return isNullGpu; } +bool copyGPUCmdBuffers() { + return shouldCopyGPUBuffers; +} + bool dumpShaders() { return shouldDumpShaders; } @@ -149,6 +154,10 @@ void setNullGpu(bool enable) { isNullGpu = enable; } +void setCopyGPUCmdBuffers(bool enable) { + shouldCopyGPUBuffers = enable; +} + void setDumpShaders(bool enable) { shouldDumpShaders = enable; } @@ -336,6 +345,7 @@ void load(const std::filesystem::path& path) { screenWidth = toml::find_or(gpu, "screenWidth", screenWidth); screenHeight = toml::find_or(gpu, "screenHeight", screenHeight); isNullGpu = toml::find_or(gpu, "nullGpu", false); + shouldCopyGPUBuffers = toml::find_or(gpu, "copyGPUBuffers", false); shouldDumpShaders = toml::find_or(gpu, "dumpShaders", false); shouldDumpPM4 = toml::find_or(gpu, "dumpPM4", false); vblankDivider = toml::find_or(gpu, "vblankDivider", 1); @@ -414,6 +424,7 @@ void save(const std::filesystem::path& path) { data["GPU"]["screenWidth"] = screenWidth; data["GPU"]["screenHeight"] = screenHeight; data["GPU"]["nullGpu"] = isNullGpu; + data["GPU"]["copyGPUBuffers"] = shouldCopyGPUBuffers; data["GPU"]["dumpShaders"] = shouldDumpShaders; data["GPU"]["dumpPM4"] = shouldDumpPM4; data["GPU"]["vblankDivider"] = vblankDivider; diff --git a/src/common/config.h b/src/common/config.h index 554515a44..f1347b076 100644 --- a/src/common/config.h +++ b/src/common/config.h @@ -24,6 +24,7 @@ s32 getGpuId(); bool debugDump(); bool showSplash(); bool nullGpu(); +bool copyGPUCmdBuffers(); bool dumpShaders(); bool dumpPM4(); bool isRdocEnabled(); @@ -33,6 +34,7 @@ u32 vblankDiv(); void setDebugDump(bool enable); void setShowSplash(bool enable); void setNullGpu(bool enable); +void setCopyGPUCmdBuffers(bool enable); void setDumpShaders(bool enable); void setDumpPM4(bool enable); void setVblankDiv(u32 value); diff --git a/src/video_core/amdgpu/liverpool.cpp b/src/video_core/amdgpu/liverpool.cpp index 5b3db603a..cec3b06bb 100644 --- a/src/video_core/amdgpu/liverpool.cpp +++ b/src/video_core/amdgpu/liverpool.cpp @@ -5,6 +5,7 @@ #include "common/debug.h" #include "common/polyfill_thread.h" #include "common/thread.h" +#include "common/config.h" #include "core/libraries/videoout/driver.h" #include "video_core/amdgpu/liverpool.h" #include "video_core/amdgpu/pm4_cmds.h" @@ -568,9 +569,45 @@ Liverpool::Task Liverpool::ProcessCompute(std::span acb, int vqid) { TracyFiberLeave; } +void Liverpool::CopyCmdBuffers(std::span& dcb, std::span& ccb) { + auto& queue = mapped_queues[GfxQueueId]; + + // This is fine because resize doesn't reallocate the buffer on shrink + queue.dcb_buffer.resize(queue.dcb_buffer_offset + dcb.size()); + queue.ccb_buffer.resize(queue.ccb_buffer_offset + dcb.size()); + + u32 prev_dcb_buffer_offset = queue.dcb_buffer_offset; + u32 prev_ccb_buffer_offset = queue.ccb_buffer_offset; + if (!dcb.empty()) { + std::memcpy(queue.dcb_buffer.data() + queue.dcb_buffer_offset, dcb.data(), + dcb.size_bytes()); + queue.dcb_buffer_offset += dcb.size(); + } + + if (!ccb.empty()) { + std::memcpy(queue.ccb_buffer.data() + queue.ccb_buffer_offset, ccb.data(), + ccb.size_bytes()); + queue.ccb_buffer_offset += dcb.size(); + } + + if (!queue.dcb_buffer.empty()) { + dcb = std::span{queue.dcb_buffer.begin() + prev_dcb_buffer_offset, + queue.dcb_buffer.begin() + queue.dcb_buffer_offset}; + } + + if (!queue.ccb_buffer.empty()) { + ccb = std::span{queue.ccb_buffer.begin() + prev_ccb_buffer_offset, + queue.ccb_buffer.begin() + queue.ccb_buffer_offset}; + } +} + void Liverpool::SubmitGfx(std::span dcb, std::span ccb) { auto& queue = mapped_queues[GfxQueueId]; + if (Config::copyGPUCmdBuffers()) { + CopyCmdBuffers(dcb, ccb); + } + auto task = ProcessGraphics(dcb, ccb); { std::scoped_lock lock{queue.m_access}; diff --git a/src/video_core/amdgpu/liverpool.h b/src/video_core/amdgpu/liverpool.h index 2806f3308..a4e61c92a 100644 --- a/src/video_core/amdgpu/liverpool.h +++ b/src/video_core/amdgpu/liverpool.h @@ -11,6 +11,7 @@ #include #include #include +#include #include "common/assert.h" #include "common/bit_field.h" @@ -1047,6 +1048,8 @@ public: void SubmitDone() noexcept { std::scoped_lock lk{submit_mutex}; + mapped_queues[GfxQueueId].ccb_buffer_offset = 0; + mapped_queues[GfxQueueId].dcb_buffer_offset = 0; submit_done = true; submit_cv.notify_one(); } @@ -1108,6 +1111,7 @@ private: Handle handle; }; + void CopyCmdBuffers(std::span& dcb, std::span& ccb); Task ProcessGraphics(std::span dcb, std::span ccb); Task ProcessCeUpdate(std::span ccb); Task ProcessCompute(std::span acb, int vqid); @@ -1116,6 +1120,10 @@ private: struct GpuQueue { std::mutex m_access{}; + std::atomic_uint32_t dcb_buffer_offset; + std::atomic_uint32_t ccb_buffer_offset; + std::vector dcb_buffer; + std::vector ccb_buffer; std::queue submits{}; ComputeProgram cs_state{}; }; From 20a84b303ce40e8e95032ddfff90538f019363bb Mon Sep 17 00:00:00 2001 From: Anton Kovalev Date: Tue, 27 Aug 2024 23:31:04 +0200 Subject: [PATCH 2/7] clang-format fix --- src/video_core/amdgpu/liverpool.cpp | 4 ++-- src/video_core/amdgpu/liverpool.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/video_core/amdgpu/liverpool.cpp b/src/video_core/amdgpu/liverpool.cpp index cec3b06bb..4b8a83152 100644 --- a/src/video_core/amdgpu/liverpool.cpp +++ b/src/video_core/amdgpu/liverpool.cpp @@ -593,8 +593,8 @@ void Liverpool::CopyCmdBuffers(std::span& dcb, std::span& if (!queue.dcb_buffer.empty()) { dcb = std::span{queue.dcb_buffer.begin() + prev_dcb_buffer_offset, queue.dcb_buffer.begin() + queue.dcb_buffer_offset}; - } - + } + if (!queue.ccb_buffer.empty()) { ccb = std::span{queue.ccb_buffer.begin() + prev_ccb_buffer_offset, queue.ccb_buffer.begin() + queue.ccb_buffer_offset}; diff --git a/src/video_core/amdgpu/liverpool.h b/src/video_core/amdgpu/liverpool.h index a4e61c92a..c12c8fdb6 100644 --- a/src/video_core/amdgpu/liverpool.h +++ b/src/video_core/amdgpu/liverpool.h @@ -10,8 +10,8 @@ #include #include #include -#include #include +#include #include "common/assert.h" #include "common/bit_field.h" From 67895d756727eb8b1eb9f93413f7211d984145ed Mon Sep 17 00:00:00 2001 From: Anton Kovalev Date: Tue, 27 Aug 2024 23:33:24 +0200 Subject: [PATCH 3/7] Do not shrink buffer's size on submit --- src/video_core/amdgpu/liverpool.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/video_core/amdgpu/liverpool.cpp b/src/video_core/amdgpu/liverpool.cpp index 4b8a83152..93099266a 100644 --- a/src/video_core/amdgpu/liverpool.cpp +++ b/src/video_core/amdgpu/liverpool.cpp @@ -573,8 +573,10 @@ void Liverpool::CopyCmdBuffers(std::span& dcb, std::span& auto& queue = mapped_queues[GfxQueueId]; // This is fine because resize doesn't reallocate the buffer on shrink - queue.dcb_buffer.resize(queue.dcb_buffer_offset + dcb.size()); - queue.ccb_buffer.resize(queue.ccb_buffer_offset + dcb.size()); + queue.dcb_buffer.resize( + std::max(queue.dcb_buffer.size(), queue.dcb_buffer_offset + dcb.size())); + queue.ccb_buffer.resize( + std::max(queue.ccb_buffer.size(), queue.ccb_buffer_offset + dcb.size())); u32 prev_dcb_buffer_offset = queue.dcb_buffer_offset; u32 prev_ccb_buffer_offset = queue.ccb_buffer_offset; From 4b1a8f0e7a662ed9d36d0b8de952e34cbb4dffc5 Mon Sep 17 00:00:00 2001 From: Anton Kovalev Date: Wed, 28 Aug 2024 00:21:12 +0200 Subject: [PATCH 4/7] Use input dcb and ccb instead of copy --- src/video_core/amdgpu/liverpool.cpp | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/src/video_core/amdgpu/liverpool.cpp b/src/video_core/amdgpu/liverpool.cpp index 93099266a..3db9ff0d1 100644 --- a/src/video_core/amdgpu/liverpool.cpp +++ b/src/video_core/amdgpu/liverpool.cpp @@ -572,7 +572,6 @@ Liverpool::Task Liverpool::ProcessCompute(std::span acb, int vqid) { void Liverpool::CopyCmdBuffers(std::span& dcb, std::span& ccb) { auto& queue = mapped_queues[GfxQueueId]; - // This is fine because resize doesn't reallocate the buffer on shrink queue.dcb_buffer.resize( std::max(queue.dcb_buffer.size(), queue.dcb_buffer_offset + dcb.size())); queue.ccb_buffer.resize( @@ -584,20 +583,14 @@ void Liverpool::CopyCmdBuffers(std::span& dcb, std::span& std::memcpy(queue.dcb_buffer.data() + queue.dcb_buffer_offset, dcb.data(), dcb.size_bytes()); queue.dcb_buffer_offset += dcb.size(); + dcb = std::span{queue.dcb_buffer.begin() + prev_dcb_buffer_offset, + queue.dcb_buffer.begin() + queue.dcb_buffer_offset}; } if (!ccb.empty()) { std::memcpy(queue.ccb_buffer.data() + queue.ccb_buffer_offset, ccb.data(), ccb.size_bytes()); queue.ccb_buffer_offset += dcb.size(); - } - - if (!queue.dcb_buffer.empty()) { - dcb = std::span{queue.dcb_buffer.begin() + prev_dcb_buffer_offset, - queue.dcb_buffer.begin() + queue.dcb_buffer_offset}; - } - - if (!queue.ccb_buffer.empty()) { ccb = std::span{queue.ccb_buffer.begin() + prev_ccb_buffer_offset, queue.ccb_buffer.begin() + queue.ccb_buffer_offset}; } From 44e51f3287505a36826956abc6560119ec077607 Mon Sep 17 00:00:00 2001 From: Anton Kovalev Date: Wed, 28 Aug 2024 05:42:48 +0200 Subject: [PATCH 5/7] clang-format style fix --- src/video_core/amdgpu/liverpool.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/video_core/amdgpu/liverpool.cpp b/src/video_core/amdgpu/liverpool.cpp index 3db9ff0d1..6e49aec41 100644 --- a/src/video_core/amdgpu/liverpool.cpp +++ b/src/video_core/amdgpu/liverpool.cpp @@ -2,10 +2,10 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include "common/assert.h" +#include "common/config.h" #include "common/debug.h" #include "common/polyfill_thread.h" #include "common/thread.h" -#include "common/config.h" #include "core/libraries/videoout/driver.h" #include "video_core/amdgpu/liverpool.h" #include "video_core/amdgpu/pm4_cmds.h" From 8c9b450f29cc178ed2fa02f6bae274e8ec9c59b7 Mon Sep 17 00:00:00 2001 From: Anton Kovalev Date: Wed, 28 Aug 2024 09:42:31 +0200 Subject: [PATCH 6/7] Fixed type on function --- src/video_core/amdgpu/liverpool.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/video_core/amdgpu/liverpool.cpp b/src/video_core/amdgpu/liverpool.cpp index 6e49aec41..931b5ea84 100644 --- a/src/video_core/amdgpu/liverpool.cpp +++ b/src/video_core/amdgpu/liverpool.cpp @@ -575,7 +575,7 @@ void Liverpool::CopyCmdBuffers(std::span& dcb, std::span& queue.dcb_buffer.resize( std::max(queue.dcb_buffer.size(), queue.dcb_buffer_offset + dcb.size())); queue.ccb_buffer.resize( - std::max(queue.ccb_buffer.size(), queue.ccb_buffer_offset + dcb.size())); + std::max(queue.ccb_buffer.size(), queue.ccb_buffer_offset + ccb.size())); u32 prev_dcb_buffer_offset = queue.dcb_buffer_offset; u32 prev_ccb_buffer_offset = queue.ccb_buffer_offset; @@ -590,7 +590,7 @@ void Liverpool::CopyCmdBuffers(std::span& dcb, std::span& if (!ccb.empty()) { std::memcpy(queue.ccb_buffer.data() + queue.ccb_buffer_offset, ccb.data(), ccb.size_bytes()); - queue.ccb_buffer_offset += dcb.size(); + queue.ccb_buffer_offset += ccb.size(); ccb = std::span{queue.ccb_buffer.begin() + prev_ccb_buffer_offset, queue.ccb_buffer.begin() + queue.ccb_buffer_offset}; } From 565e3b104d6a4dd73c74f57ff0ecc7cdee53741e Mon Sep 17 00:00:00 2001 From: Anton Kovalev Date: Wed, 28 Aug 2024 11:24:15 +0200 Subject: [PATCH 7/7] Use pair of spans instead of references in copy command buffers function --- src/video_core/amdgpu/liverpool.cpp | 7 +++++-- src/video_core/amdgpu/liverpool.h | 7 ++++--- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/src/video_core/amdgpu/liverpool.cpp b/src/video_core/amdgpu/liverpool.cpp index 931b5ea84..a3e64b7c0 100644 --- a/src/video_core/amdgpu/liverpool.cpp +++ b/src/video_core/amdgpu/liverpool.cpp @@ -569,7 +569,8 @@ Liverpool::Task Liverpool::ProcessCompute(std::span acb, int vqid) { TracyFiberLeave; } -void Liverpool::CopyCmdBuffers(std::span& dcb, std::span& ccb) { +std::pair, std::span> Liverpool::CopyCmdBuffers( + std::span dcb, std::span ccb) { auto& queue = mapped_queues[GfxQueueId]; queue.dcb_buffer.resize( @@ -594,13 +595,15 @@ void Liverpool::CopyCmdBuffers(std::span& dcb, std::span& ccb = std::span{queue.ccb_buffer.begin() + prev_ccb_buffer_offset, queue.ccb_buffer.begin() + queue.ccb_buffer_offset}; } + + return std::make_pair(dcb, ccb); } void Liverpool::SubmitGfx(std::span dcb, std::span ccb) { auto& queue = mapped_queues[GfxQueueId]; if (Config::copyGPUCmdBuffers()) { - CopyCmdBuffers(dcb, ccb); + std::tie(dcb, ccb) = CopyCmdBuffers(dcb, ccb); } auto task = ProcessGraphics(dcb, ccb); diff --git a/src/video_core/amdgpu/liverpool.h b/src/video_core/amdgpu/liverpool.h index c12c8fdb6..14284bbc6 100644 --- a/src/video_core/amdgpu/liverpool.h +++ b/src/video_core/amdgpu/liverpool.h @@ -1111,7 +1111,8 @@ private: Handle handle; }; - void CopyCmdBuffers(std::span& dcb, std::span& ccb); + std::pair, std::span> CopyCmdBuffers(std::span dcb, + std::span ccb); Task ProcessGraphics(std::span dcb, std::span ccb); Task ProcessCeUpdate(std::span ccb); Task ProcessCompute(std::span acb, int vqid); @@ -1120,8 +1121,8 @@ private: struct GpuQueue { std::mutex m_access{}; - std::atomic_uint32_t dcb_buffer_offset; - std::atomic_uint32_t ccb_buffer_offset; + std::atomic dcb_buffer_offset; + std::atomic ccb_buffer_offset; std::vector dcb_buffer; std::vector ccb_buffer; std::queue submits{};