Merge pull request #618 from vertver/main

video_core: Added copyGPUCmdBuffers option
2024-12-28 18:46:06 +00:00 · 2024-08-28 14:00:26 +03:00 · 2024-08-28 14:00:26 +03:00 · be49871c68
parent 905d49fd96 dfb30ea955
commit be49871c68
4 changed files with 57 additions and 0 deletions
--- a/src/common/config.cpp
+++ b/src/common/config.cpp
@ -22,6 +22,7 @@ static int specialPadClass = 1;
 static bool isDebugDump = false;
 static bool isShowSplash = false;
 static bool isNullGpu = false;
+static bool shouldCopyGPUBuffers = false;
 static bool shouldDumpShaders = false;
 static bool shouldDumpPM4 = false;
 static u32 vblankDivider = 1;
@ -103,6 +104,10 @@ bool nullGpu() {
    return isNullGpu;
 }

+bool copyGPUCmdBuffers() {
+    return shouldCopyGPUBuffers;
+}
+
 bool dumpShaders() {
    return shouldDumpShaders;
 }
@ -159,6 +164,10 @@ void setNullGpu(bool enable) {
    isNullGpu = enable;
 }

+void setCopyGPUCmdBuffers(bool enable) {
+    shouldCopyGPUBuffers = enable;
+}
+
 void setDumpShaders(bool enable) {
    shouldDumpShaders = enable;
 }
@ -361,6 +370,7 @@ void load(const std::filesystem::path& path) {
        screenWidth = toml::find_or<int>(gpu, "screenWidth", screenWidth);
        screenHeight = toml::find_or<int>(gpu, "screenHeight", screenHeight);
        isNullGpu = toml::find_or<bool>(gpu, "nullGpu", false);
+        shouldCopyGPUBuffers = toml::find_or<bool>(gpu, "copyGPUBuffers", false);
        shouldDumpShaders = toml::find_or<bool>(gpu, "dumpShaders", false);
        shouldDumpPM4 = toml::find_or<bool>(gpu, "dumpPM4", false);
        vblankDivider = toml::find_or<int>(gpu, "vblankDivider", 1);
@ -441,6 +451,7 @@ void save(const std::filesystem::path& path) {
    data["GPU"]["screenWidth"] = screenWidth;
    data["GPU"]["screenHeight"] = screenHeight;
    data["GPU"]["nullGpu"] = isNullGpu;
+    data["GPU"]["copyGPUBuffers"] = shouldCopyGPUBuffers;
    data["GPU"]["dumpShaders"] = shouldDumpShaders;
    data["GPU"]["dumpPM4"] = shouldDumpPM4;
    data["GPU"]["vblankDivider"] = vblankDivider;
--- a/src/common/config.h
+++ b/src/common/config.h
@ -27,6 +27,7 @@ s32 getGpuId();
 bool debugDump();
 bool showSplash();
 bool nullGpu();
+bool copyGPUCmdBuffers();
 bool dumpShaders();
 bool dumpPM4();
 bool isRdocEnabled();
@ -36,6 +37,7 @@ u32 vblankDiv();
 void setDebugDump(bool enable);
 void setShowSplash(bool enable);
 void setNullGpu(bool enable);
+void setCopyGPUCmdBuffers(bool enable);
 void setDumpShaders(bool enable);
 void setDumpPM4(bool enable);
 void setVblankDiv(u32 value);
--- a/src/video_core/amdgpu/liverpool.cpp
+++ b/src/video_core/amdgpu/liverpool.cpp
@ -2,6 +2,7 @@
 // SPDX-License-Identifier: GPL-2.0-or-later

 #include "common/assert.h"
+#include "common/config.h"
 #include "common/debug.h"
 #include "common/polyfill_thread.h"
 #include "common/thread.h"
@ -579,9 +580,43 @@ Liverpool::Task Liverpool::ProcessCompute(std::span<const u32> acb, int vqid) {
    TracyFiberLeave;
 }

+std::pair<std::span<const u32>, std::span<const u32>> Liverpool::CopyCmdBuffers(
+    std::span<const u32> dcb, std::span<const u32> ccb) {
+    auto& queue = mapped_queues[GfxQueueId];
+
+    queue.dcb_buffer.resize(
+        std::max(queue.dcb_buffer.size(), queue.dcb_buffer_offset + dcb.size()));
+    queue.ccb_buffer.resize(
+        std::max(queue.ccb_buffer.size(), queue.ccb_buffer_offset + ccb.size()));
+
+    u32 prev_dcb_buffer_offset = queue.dcb_buffer_offset;
+    u32 prev_ccb_buffer_offset = queue.ccb_buffer_offset;
+    if (!dcb.empty()) {
+        std::memcpy(queue.dcb_buffer.data() + queue.dcb_buffer_offset, dcb.data(),
+                    dcb.size_bytes());
+        queue.dcb_buffer_offset += dcb.size();
+        dcb = std::span<const u32>{queue.dcb_buffer.begin() + prev_dcb_buffer_offset,
+                                   queue.dcb_buffer.begin() + queue.dcb_buffer_offset};
+    }
+
+    if (!ccb.empty()) {
+        std::memcpy(queue.ccb_buffer.data() + queue.ccb_buffer_offset, ccb.data(),
+                    ccb.size_bytes());
+        queue.ccb_buffer_offset += ccb.size();
+        ccb = std::span<const u32>{queue.ccb_buffer.begin() + prev_ccb_buffer_offset,
+                                   queue.ccb_buffer.begin() + queue.ccb_buffer_offset};
+    }
+
+    return std::make_pair(dcb, ccb);
+}
+
 void Liverpool::SubmitGfx(std::span<const u32> dcb, std::span<const u32> ccb) {
    auto& queue = mapped_queues[GfxQueueId];

+    if (Config::copyGPUCmdBuffers()) {
+        std::tie(dcb, ccb) = CopyCmdBuffers(dcb, ccb);
+    }
+
    auto task = ProcessGraphics(dcb, ccb);
    {
        std::scoped_lock lock{queue.m_access};
--- a/src/video_core/amdgpu/liverpool.h
+++ b/src/video_core/amdgpu/liverpool.h
@ -10,6 +10,7 @@
 #include <mutex>
 #include <span>
 #include <thread>
+#include <vector>
 #include <queue>

 #include "common/assert.h"
@ -1047,6 +1048,8 @@ public:

    void SubmitDone() noexcept {
        std::scoped_lock lk{submit_mutex};
+        mapped_queues[GfxQueueId].ccb_buffer_offset = 0;
+        mapped_queues[GfxQueueId].dcb_buffer_offset = 0;
        submit_done = true;
        submit_cv.notify_one();
    }
@ -1108,6 +1111,8 @@ private:
        Handle handle;
    };

+    std::pair<std::span<const u32>, std::span<const u32>> CopyCmdBuffers(std::span<const u32> dcb,
+                                                                         std::span<const u32> ccb);
    Task ProcessGraphics(std::span<const u32> dcb, std::span<const u32> ccb);
    Task ProcessCeUpdate(std::span<const u32> ccb);
    Task ProcessCompute(std::span<const u32> acb, int vqid);
@ -1116,6 +1121,10 @@ private:

    struct GpuQueue {
        std::mutex m_access{};
+        std::atomic<u32> dcb_buffer_offset;
+        std::atomic<u32> ccb_buffer_offset;
+        std::vector<u32> dcb_buffer;
+        std::vector<u32> ccb_buffer;
        std::queue<Task::Handle> submits{};
        ComputeProgram cs_state{};
    };