From 923baf0164a95b6d069b1986d51545446f6cb37d Mon Sep 17 00:00:00 2001 From: psucien Date: Fri, 10 May 2024 21:48:01 +0200 Subject: [PATCH] core: gpu interrupt dispatcher --- src/core/libraries/gnmdriver/gnmdriver.cpp | 13 +++- src/core/libraries/videoout/driver.cpp | 9 ++- src/core/libraries/videoout/driver.h | 8 ++- src/core/libraries/videoout/video_out.cpp | 9 ++- src/core/libraries/videoout/video_out.h | 2 +- src/core/platform.h | 78 ++++++++++++++++++++++ src/video_core/amdgpu/liverpool.cpp | 57 +++------------- src/video_core/amdgpu/liverpool.h | 6 -- src/video_core/amdgpu/pm4_cmds.h | 47 +++++++++++-- 9 files changed, 161 insertions(+), 68 deletions(-) create mode 100644 src/core/platform.h diff --git a/src/core/libraries/gnmdriver/gnmdriver.cpp b/src/core/libraries/gnmdriver/gnmdriver.cpp index 921babfa..d65d03d0 100644 --- a/src/core/libraries/gnmdriver/gnmdriver.cpp +++ b/src/core/libraries/gnmdriver/gnmdriver.cpp @@ -7,6 +7,7 @@ #include "core/libraries/gnmdriver/gnmdriver.h" #include "core/libraries/libs.h" #include "core/libraries/videoout/video_out.h" +#include "core/platform.h" #include "video_core/amdgpu/liverpool.h" #include "video_core/amdgpu/pm4_cmds.h" #include "video_core/renderer_vulkan/renderer_vulkan.h" @@ -48,8 +49,12 @@ s32 PS4_SYSV_ABI sceGnmAddEqEvent(SceKernelEqueue eq, u64 id, void* udata) { kernel_event.event.udata = udata; eq->addEvent(kernel_event); - liverpool->SetEopCallback( - [=]() { eq->triggerEvent(SceKernelEvent::Type::GfxEop, EVFILT_GRAPHICS_CORE, nullptr); }); + Platform::IrqC::Instance()->Register([=](Platform::InterruptId irq) { + ASSERT_MSG(irq == Platform::InterruptId::GfxEop, + "An unexpected IRQ occured"); // We need to conver IRQ# to event id and do proper + // filtering in trigger function + eq->triggerEvent(SceKernelEvent::Type::GfxEop, EVFILT_GRAPHICS_CORE, nullptr); + }); return ORBIS_OK; } @@ -158,6 +163,8 @@ s32 PS4_SYSV_ABI sceGnmDeleteEqEvent(SceKernelEqueue eq, u64 id) { } eq->removeEvent(id); + + Platform::IrqC::Instance()->Unregister(); return ORBIS_OK; } @@ -1356,7 +1363,7 @@ s32 PS4_SYSV_ABI sceGnmSubmitAndFlipCommandBuffers(u32 count, void* dcb_gpu_addr u32* dcb_sizes_in_bytes, void* ccb_gpu_addrs[], u32* ccb_sizes_in_bytes, u32 vo_handle, u32 buf_idx, u32 flip_mode, u32 flip_arg) { - LOG_INFO(Lib_GnmDriver, "called"); + LOG_INFO(Lib_GnmDriver, "called [buf = {}]", buf_idx); auto* cmdbuf = reinterpret_cast(dcb_gpu_addrs[count - 1]); const auto size_dw = dcb_sizes_in_bytes[count - 1] / 4; diff --git a/src/core/libraries/videoout/driver.cpp b/src/core/libraries/videoout/driver.cpp index 5e093c20..5a5d69a3 100644 --- a/src/core/libraries/videoout/driver.cpp +++ b/src/core/libraries/videoout/driver.cpp @@ -6,6 +6,7 @@ #include "core/libraries/error_codes.h" #include "core/libraries/kernel/time_management.h" #include "core/libraries/videoout/driver.h" +#include "core/platform.h" #include "video_core/renderer_vulkan/renderer_vulkan.h" @@ -199,16 +200,19 @@ void VideoOutDriver::Flip(std::chrono::microseconds timeout) { // Reset flip label req.port->buffer_labels[req.index] = 0; + LOG_INFO(Lib_VideoOut, "Flip done [buf = {}]", req.index); } -bool VideoOutDriver::SubmitFlip(VideoOutPort* port, s32 index, s64 flip_arg) { +bool VideoOutDriver::SubmitFlip(VideoOutPort* port, s32 index, s64 flip_arg, + bool is_eop /*= false*/) { const auto& buffer = port->buffer_slots[index]; const auto& group = port->groups[buffer.group_index]; auto* frame = renderer->PrepareFrame(group, buffer.address_left); std::scoped_lock lock{mutex}; - if (requests.size() >= 2) { + if (requests.size() >= port->NumRegisteredBuffers()) { + LOG_ERROR(Lib_VideoOut, "Flip queue is full"); return false; } @@ -218,6 +222,7 @@ bool VideoOutDriver::SubmitFlip(VideoOutPort* port, s32 index, s64 flip_arg) { .index = index, .flip_arg = flip_arg, .submit_tsc = Libraries::Kernel::sceKernelReadTsc(), + .eop = is_eop, }); port->flip_status.flipPendingNum = static_cast(requests.size()); diff --git a/src/core/libraries/videoout/driver.h b/src/core/libraries/videoout/driver.h index 0c1ea582..f8b9ea81 100644 --- a/src/core/libraries/videoout/driver.h +++ b/src/core/libraries/videoout/driver.h @@ -34,6 +34,11 @@ struct VideoOutPort { } return index; } + + [[nodiscard]] int NumRegisteredBuffers() const { + return std::count_if(buffer_slots.cbegin(), buffer_slots.cend(), + [](auto& buffer) { return buffer.group_index != -1; }); + } }; struct ServiceThreadParams { @@ -59,7 +64,7 @@ public: int UnregisterBuffers(VideoOutPort* port, s32 attributeIndex); void Flip(std::chrono::microseconds timeout); - bool SubmitFlip(VideoOutPort* port, s32 index, s64 flip_arg); + bool SubmitFlip(VideoOutPort* port, s32 index, s64 flip_arg, bool is_eop = false); void Vblank(); @@ -70,6 +75,7 @@ private: s32 index; s64 flip_arg; u64 submit_tsc; + bool eop; }; std::mutex mutex; diff --git a/src/core/libraries/videoout/video_out.cpp b/src/core/libraries/videoout/video_out.cpp index 555ff083..e5995ab2 100644 --- a/src/core/libraries/videoout/video_out.cpp +++ b/src/core/libraries/videoout/video_out.cpp @@ -10,6 +10,7 @@ #include "core/libraries/videoout/driver.h" #include "core/libraries/videoout/video_out.h" #include "core/loader/symbols_resolver.h" +#include "core/platform.h" namespace Libraries::VideoOut { @@ -216,13 +217,17 @@ void sceVideoOutGetBufferLabelAddress(s32 handle, uintptr_t* label_addr) { *label_addr = reinterpret_cast(port->buffer_labels.data()); } -s32 sceVideoOutSubmitEopFlip(s32 handle, u32 buf_id, u32 mode, u32 arg, void* unk) { +s32 sceVideoOutSubmitEopFlip(s32 handle, u32 buf_id, u32 mode, u32 arg, void** unk) { auto* port = driver->GetPort(handle); if (!port) { return 0x8029000b; } - // TODO + Platform::IrqC::Instance()->RegisterOnce([=](Platform::InterruptId irq) { + ASSERT_MSG(irq == Platform::InterruptId::GfxEop, "An unexpected IRQ occured"); + const auto result = driver->SubmitFlip(port, buf_id, arg, true); + ASSERT_MSG(result, "EOP flip submission failed"); + }); return ORBIS_OK; } diff --git a/src/core/libraries/videoout/video_out.h b/src/core/libraries/videoout/video_out.h index fc62bc9b..00ea6afb 100644 --- a/src/core/libraries/videoout/video_out.h +++ b/src/core/libraries/videoout/video_out.h @@ -104,7 +104,7 @@ void Vblank(); // Internal system functions void sceVideoOutGetBufferLabelAddress(s32 handle, uintptr_t* label_addr); -s32 sceVideoOutSubmitEopFlip(s32 handle, u32 buf_id, u32 mode, u32 arg, void* unk); +s32 sceVideoOutSubmitEopFlip(s32 handle, u32 buf_id, u32 mode, u32 arg, void** unk); void RegisterLib(Core::Loader::SymbolsResolver* sym); diff --git a/src/core/platform.h b/src/core/platform.h new file mode 100644 index 00000000..442a6d9e --- /dev/null +++ b/src/core/platform.h @@ -0,0 +1,78 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "common/assert.h" +#include "common/logging/log.h" +#include "common/singleton.h" +#include "common/types.h" +#include "magic_enum.hpp" + +#include +#include +#include +#include + +namespace Platform { + +enum class InterruptId : u32 { + Compute0RelMem = 0u, + Compute1RelMem = 1u, + Compute2RelMem = 2u, + Compute3RelMem = 3u, + Compute4RelMem = 4u, + Compute5RelMem = 5u, + Compute6RelMem = 6u, + GfxEop = 0x40u +}; + +using IrqHandler = std::function; + +struct IrqController { + void RegisterOnce(IrqHandler handler) { + std::unique_lock lock{m_lock}; + one_time_subscribers.emplace(handler); + } + + void Register(IrqHandler handler) { + ASSERT_MSG(!persistent_handler.has_value(), + "Too many persistent handlers"); // Add a slot map if so + { + std::unique_lock lock{m_lock}; + persistent_handler.emplace(handler); + } + } + + void Unregister() { + std::unique_lock lock{m_lock}; + persistent_handler.reset(); + } + + void Signal(InterruptId irq) { + LOG_TRACE(Core, "IRQ signaled: {}", magic_enum::enum_name(irq)); + { + std::unique_lock lock{m_lock}; + + if (persistent_handler) { + persistent_handler.value()(irq); + } + + while (!one_time_subscribers.empty()) { + const auto& h = one_time_subscribers.front(); + h(irq); + + one_time_subscribers.pop(); + } + } + } + +private: + std::optional persistent_handler{}; + std::queue one_time_subscribers{}; + std::mutex m_lock{}; +}; + +using IrqC = Common::Singleton; + +} // namespace Platform diff --git a/src/video_core/amdgpu/liverpool.cpp b/src/video_core/amdgpu/liverpool.cpp index 2e4566f5..f41f4bb3 100644 --- a/src/video_core/amdgpu/liverpool.cpp +++ b/src/video_core/amdgpu/liverpool.cpp @@ -3,6 +3,7 @@ #include "common/assert.h" #include "common/io_file.h" +#include "common/thread.h" #include "video_core/amdgpu/liverpool.h" #include "video_core/amdgpu/pm4_cmds.h" @@ -11,6 +12,8 @@ namespace AmdGpu { Liverpool::Liverpool() = default; void Liverpool::ProcessCmdList(u32* cmdbuf, u32 size_in_bytes) { + Common::SetCurrentThreadName("CommandProcessor_Gfx"); + auto* header = reinterpret_cast(cmdbuf); u32 processed_cmd_size = 0; @@ -70,54 +73,12 @@ void Liverpool::ProcessCmdList(u32* cmdbuf, u32 size_in_bytes) { } case PM4ItOpcode::EventWriteEos: { const auto* event_eos = reinterpret_cast(header); - switch (event_eos->command.Value()) { - case PM4CmdEventWriteEos::Command::SingalFence: { - event_eos->SignalFence(); - break; - } - default: { - UNREACHABLE(); - } - } + event_eos->SignalFence(); break; } case PM4ItOpcode::EventWriteEop: { const auto* event_eop = reinterpret_cast(header); - const InterruptSelect irq_sel = event_eop->int_sel; - const DataSelect data_sel = event_eop->data_sel; - - // Write back data if required - switch (data_sel) { - case DataSelect::Data32Low: { - *reinterpret_cast(event_eop->Address()) = event_eop->DataDWord(); - break; - } - case DataSelect::Data64: { - *event_eop->Address() = event_eop->DataQWord(); - break; - } - default: { - UNREACHABLE(); - } - } - - switch (irq_sel) { - case InterruptSelect::None: { - // No interrupt - break; - } - case InterruptSelect::IrqWhenWriteConfirm: { - if (eop_callback) { - eop_callback(); - } else { - UNREACHABLE_MSG("EOP callback is not registered"); - } - break; - } - default: { - UNREACHABLE(); - } - } + event_eop->SignalFence(); break; } case PM4ItOpcode::DmaData: { @@ -143,11 +104,9 @@ void Liverpool::ProcessCmdList(u32* cmdbuf, u32 size_in_bytes) { case PM4ItOpcode::WaitRegMem: { const auto* wait_reg_mem = reinterpret_cast(header); ASSERT(wait_reg_mem->engine.Value() == PM4CmdWaitRegMem::Engine::Me); - ASSERT(wait_reg_mem->function.Value() == PM4CmdWaitRegMem::Function::Equal); - - { - std::unique_lock lock{m_reg_mem}; - cv_reg_mem.wait(lock, [&]() { return wait_reg_mem->Test(); }); + while (!wait_reg_mem->Test()) { + using namespace std::chrono_literals; + std::this_thread::sleep_for(1ms); } break; } diff --git a/src/video_core/amdgpu/liverpool.h b/src/video_core/amdgpu/liverpool.h index 34cac432..2e0030fd 100644 --- a/src/video_core/amdgpu/liverpool.h +++ b/src/video_core/amdgpu/liverpool.h @@ -624,17 +624,11 @@ public: // reworked with mutiple queues introduction cp.get(); } - void SetEopCallback(auto const& cb) { - eop_callback = cb; - } private: void ProcessCmdList(u32* cmdbuf, u32 size_in_bytes); - std::function eop_callback{}; std::future cp{}; - std::condition_variable cv_reg_mem{}; - std::mutex m_reg_mem{}; }; static_assert(GFX6_3D_REG_INDEX(ps_program) == 0x2C08); diff --git a/src/video_core/amdgpu/pm4_cmds.h b/src/video_core/amdgpu/pm4_cmds.h index 91e67a59..c9870168 100644 --- a/src/video_core/amdgpu/pm4_cmds.h +++ b/src/video_core/amdgpu/pm4_cmds.h @@ -6,6 +6,7 @@ #include #include "common/bit_field.h" #include "common/types.h" +#include "core/platform.h" #include "video_core/amdgpu/pm4_opcodes.h" namespace AmdGpu { @@ -282,8 +283,9 @@ struct PM4CmdEventWriteEop { u32 data_lo; ///< Value that will be written to memory when event occurs u32 data_hi; ///< Value that will be written to memory when event occurs - u64* Address() const { - return reinterpret_cast(address_lo | u64(address_hi) << 32); + template + T* Address() const { + return reinterpret_cast(address_lo | u64(address_hi) << 32); } u32 DataDWord() const { @@ -293,6 +295,36 @@ struct PM4CmdEventWriteEop { u64 DataQWord() const { return data_lo | u64(data_hi) << 32; } + + void SignalFence() const { + switch (data_sel.Value()) { + case DataSelect::Data32Low: { + *Address() = DataDWord(); + break; + } + case DataSelect::Data64: { + *Address() = DataQWord(); + break; + } + default: { + UNREACHABLE(); + } + } + + switch (int_sel.Value()) { + case InterruptSelect::None: { + // No interrupt + break; + } + case InterruptSelect::IrqWhenWriteConfirm: { + Platform::IrqC::Instance()->Signal(Platform::InterruptId::GfxEop); + break; + } + default: { + UNREACHABLE(); + } + } + } }; struct PM4DmaData { @@ -434,8 +466,15 @@ struct PM4CmdEventWriteEos { } void SignalFence() const { - ASSERT_MSG(command.Value() == Command::SingalFence, "Invalid action on packet"); - *Address() = DataDWord(); + switch (command.Value()) { + case Command::SingalFence: { + *Address() = DataDWord(); + break; + } + default: { + UNREACHABLE(); + } + } } };