mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-01-17 04:06:25 +00:00
amdgpu: wait_reg_mem
and write_data
implementation
Command list parsing is temporary moved to async task
This commit is contained in:
parent
bfb18135fb
commit
8e0c67f12e
|
@ -27,7 +27,7 @@ template <u32 data_block_size>
|
|||
static inline u32* WriteTrailingNop(u32* cmdbuf) {
|
||||
auto* nop = reinterpret_cast<PM4CmdNop*>(cmdbuf);
|
||||
nop->header = PM4Type3Header{PM4ItOpcode::Nop, data_block_size - 1};
|
||||
nop->data_block[0] = 0; // only one out of `data_block_size` is initialized
|
||||
nop->data_block[0] = 0u; // only one out of `data_block_size` is initialized
|
||||
return cmdbuf + data_block_size + 1 /* header */;
|
||||
}
|
||||
|
||||
|
@ -48,9 +48,8 @@ s32 PS4_SYSV_ABI sceGnmAddEqEvent(SceKernelEqueue eq, u64 id, void* udata) {
|
|||
kernel_event.event.udata = udata;
|
||||
eq->addEvent(kernel_event);
|
||||
|
||||
liverpool->eop_callback = [=]() {
|
||||
eq->triggerEvent(SceKernelEvent::Type::GfxEop, EVFILT_GRAPHICS_CORE, nullptr);
|
||||
};
|
||||
liverpool->SetEopCallback(
|
||||
[=]() { eq->triggerEvent(SceKernelEvent::Type::GfxEop, EVFILT_GRAPHICS_CORE, nullptr); });
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
|
@ -82,7 +81,7 @@ s32 PS4_SYSV_ABI sceGnmComputeWaitOnAddress(u32* cmdbuf, u32 size, uintptr_t add
|
|||
wait_reg_mem->poll_addr_hi = u32(addr >> 32u);
|
||||
wait_reg_mem->ref = ref;
|
||||
wait_reg_mem->mask = mask;
|
||||
wait_reg_mem->poll_interval = 10;
|
||||
wait_reg_mem->poll_interval = 10u;
|
||||
|
||||
WriteTrailingNop<2>(cmdbuf + 7);
|
||||
return ORBIS_OK;
|
||||
|
@ -652,10 +651,10 @@ s32 PS4_SYSV_ABI sceGnmInsertWaitFlipDone(u32* cmdbuf, u32 size, s32 vo_handle,
|
|||
|
||||
auto* wait_reg_mem = reinterpret_cast<PM4CmdWaitRegMem*>(cmdbuf);
|
||||
wait_reg_mem->header = PM4Type3Header{PM4ItOpcode::WaitRegMem, 5};
|
||||
wait_reg_mem->function.Assign(3u);
|
||||
wait_reg_mem->mem_space.Assign(1u);
|
||||
wait_reg_mem->function.Assign(PM4CmdWaitRegMem::Function::Equal);
|
||||
wait_reg_mem->mem_space.Assign(PM4CmdWaitRegMem::MemSpace::Memory);
|
||||
*reinterpret_cast<uintptr_t*>(&wait_reg_mem->poll_addr_lo) =
|
||||
(label_addr + buf_idx * sizeof(uintptr_t)) & 0xffff'fffcu;
|
||||
(label_addr + buf_idx * sizeof(uintptr_t)) & ~0x3ull;
|
||||
wait_reg_mem->ref = 0u;
|
||||
wait_reg_mem->mask = 0xffff'ffffu;
|
||||
wait_reg_mem->poll_interval = 10u;
|
||||
|
@ -1303,7 +1302,7 @@ static inline s32 PatchFlipRequest(u32* cmdbuf, u32 size, u32 vo_handle, u32 buf
|
|||
write_lock->header = PM4Type3Header{PM4ItOpcode::WriteData, 3};
|
||||
write_lock->dst_sel.Assign(5u);
|
||||
*reinterpret_cast<uintptr_t*>(&write_lock->dst_addr_lo) =
|
||||
(label_addr + buf_idx * sizeof(uintptr_t)) & 0xffff'fffcu;
|
||||
(label_addr + buf_idx * sizeof(uintptr_t)) & ~0x3ull;
|
||||
write_lock->data[0] = 1;
|
||||
|
||||
auto* nop = reinterpret_cast<PM4CmdNop*>(cmdbuf + 5);
|
||||
|
@ -1405,7 +1404,7 @@ s32 PS4_SYSV_ABI sceGnmSubmitCommandBuffers(u32 count, void* dcb_gpu_addrs[],
|
|||
}
|
||||
}
|
||||
|
||||
liverpool->ProcessCmdList(reinterpret_cast<u32*>(dcb_gpu_addrs[0]), dcb_sizes_in_bytes[0]);
|
||||
liverpool->Submit(reinterpret_cast<u32*>(dcb_gpu_addrs[0]), dcb_sizes_in_bytes[0]);
|
||||
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
@ -1416,7 +1415,10 @@ int PS4_SYSV_ABI sceGnmSubmitCommandBuffersForWorkload() {
|
|||
}
|
||||
|
||||
int PS4_SYSV_ABI sceGnmSubmitDone() {
|
||||
LOG_ERROR(Lib_GnmDriver, "(STUBBED) called");
|
||||
LOG_INFO(Lib_GnmDriver, "called");
|
||||
|
||||
liverpool->SubmitDone();
|
||||
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
|
|
|
@ -196,6 +196,9 @@ void VideoOutDriver::Flip(std::chrono::microseconds timeout) {
|
|||
reinterpret_cast<void*>(req.flip_arg));
|
||||
}
|
||||
}
|
||||
|
||||
// Reset flip label
|
||||
req.port->buffer_labels[req.index] = 0;
|
||||
}
|
||||
|
||||
bool VideoOutDriver::SubmitFlip(VideoOutPort* port, s32 index, s64 flip_arg) {
|
||||
|
|
|
@ -117,6 +117,14 @@ void Liverpool::ProcessCmdList(u32* cmdbuf, u32 size_in_bytes) {
|
|||
}
|
||||
case PM4ItOpcode::WriteData: {
|
||||
const auto* write_data = reinterpret_cast<PM4CmdWriteData*>(header);
|
||||
ASSERT(write_data->dst_sel.Value() == 2 || write_data->dst_sel.Value() == 5);
|
||||
const u32 data_size = (header->type3.count.Value() - 2) * 4;
|
||||
if (!write_data->wr_one_addr.Value()) {
|
||||
std::memcpy(reinterpret_cast<void*>(write_data->Address()), write_data->data,
|
||||
data_size);
|
||||
} else {
|
||||
UNREACHABLE();
|
||||
}
|
||||
break;
|
||||
}
|
||||
case PM4ItOpcode::AcquireMem: {
|
||||
|
@ -125,6 +133,13 @@ void Liverpool::ProcessCmdList(u32* cmdbuf, u32 size_in_bytes) {
|
|||
}
|
||||
case PM4ItOpcode::WaitRegMem: {
|
||||
const auto* wait_reg_mem = reinterpret_cast<PM4CmdWaitRegMem*>(header);
|
||||
ASSERT(wait_reg_mem->engine.Value() == PM4CmdWaitRegMem::Engine::Me);
|
||||
ASSERT(wait_reg_mem->function.Value() == PM4CmdWaitRegMem::Function::Equal);
|
||||
|
||||
{
|
||||
std::unique_lock lock{m_reg_mem};
|
||||
cv_reg_mem.wait(lock, [&]() { return wait_reg_mem->Test(); });
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
|
|
|
@ -3,11 +3,14 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/bit_field.h"
|
||||
#include "common/types.h"
|
||||
|
||||
#include <array>
|
||||
#include <condition_variable>
|
||||
#include <functional>
|
||||
#include <future>
|
||||
|
||||
namespace AmdGpu {
|
||||
|
||||
|
@ -612,9 +615,26 @@ struct Liverpool {
|
|||
public:
|
||||
Liverpool();
|
||||
|
||||
void Submit(u32* cmdbuf, u32 size_in_bytes) {
|
||||
ASSERT_MSG(!cp.valid(), "Trying to submit while previous submission is pending");
|
||||
cp = std::async(&Liverpool::ProcessCmdList, this, cmdbuf, size_in_bytes);
|
||||
}
|
||||
void SubmitDone() {
|
||||
// This is wrong as `submitDone()` should never be blocking. The behavior will be
|
||||
// reworked with mutiple queues introduction
|
||||
cp.get();
|
||||
}
|
||||
void SetEopCallback(auto const& cb) {
|
||||
eop_callback = cb;
|
||||
}
|
||||
|
||||
private:
|
||||
void ProcessCmdList(u32* cmdbuf, u32 size_in_bytes);
|
||||
|
||||
std::function<void(void)> eop_callback{};
|
||||
std::future<void> cp{};
|
||||
std::condition_variable cv_reg_mem{};
|
||||
std::mutex m_reg_mem{};
|
||||
};
|
||||
|
||||
static_assert(GFX6_3D_REG_INDEX(ps_program) == 0x2C08);
|
||||
|
|
|
@ -320,6 +320,19 @@ struct PM4DmaData {
|
|||
};
|
||||
|
||||
struct PM4CmdWaitRegMem {
|
||||
enum Engine : u32 { Me = 0u, Pfp = 1u };
|
||||
enum MemSpace : u32 { Register = 0u, Memory = 1u };
|
||||
enum Function : u32 {
|
||||
Always = 0u,
|
||||
LessThan = 1u,
|
||||
LessThanEqual = 2u,
|
||||
Equal = 3u,
|
||||
NotEqual = 4u,
|
||||
GreaterThanEqual = 5u,
|
||||
GreaterThan = 6u,
|
||||
Reserved = 7u
|
||||
};
|
||||
|
||||
PM4Type3Header header;
|
||||
union {
|
||||
BitField<0, 3, u32> function;
|
||||
|
@ -332,6 +345,41 @@ struct PM4CmdWaitRegMem {
|
|||
u32 ref;
|
||||
u32 mask;
|
||||
u32 poll_interval;
|
||||
|
||||
u32* Address() const {
|
||||
return reinterpret_cast<u32*>((uintptr_t(poll_addr_hi) << 32) | poll_addr_lo);
|
||||
}
|
||||
|
||||
bool Test() const {
|
||||
switch (function.Value()) {
|
||||
case Function::Always: {
|
||||
return true;
|
||||
}
|
||||
case Function::LessThan: {
|
||||
return (*Address() & mask) < ref;
|
||||
}
|
||||
case Function::LessThanEqual: {
|
||||
return (*Address() & mask) <= ref;
|
||||
}
|
||||
case Function::Equal: {
|
||||
return (*Address() & mask) == ref;
|
||||
}
|
||||
case Function::NotEqual: {
|
||||
return (*Address() & mask) != ref;
|
||||
}
|
||||
case Function::GreaterThanEqual: {
|
||||
return (*Address() & mask) >= ref;
|
||||
}
|
||||
case Function::GreaterThan: {
|
||||
return (*Address() & mask) > ref;
|
||||
}
|
||||
case Function::Reserved:
|
||||
[[fallthrough]];
|
||||
default: {
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
struct PM4CmdWriteData {
|
||||
|
@ -346,6 +394,10 @@ struct PM4CmdWriteData {
|
|||
u32 dst_addr_lo;
|
||||
u32 dst_addr_hi;
|
||||
u32 data[0];
|
||||
|
||||
uintptr_t Address() const {
|
||||
return (uintptr_t(dst_addr_hi) << 32) | dst_addr_lo;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace AmdGpu
|
||||
|
|
Loading…
Reference in a new issue