Merge pull request #128 from psucien/gnm_driver/basic_sync

gnm_driver: Gnm eventq and GPU flips
This commit is contained in:
georgemoralis 2024-05-11 01:22:17 +03:00 committed by GitHub
commit b326ce5f69
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
16 changed files with 636 additions and 75 deletions

View file

@ -6,6 +6,8 @@
#include "core/libraries/error_codes.h" #include "core/libraries/error_codes.h"
#include "core/libraries/gnmdriver/gnmdriver.h" #include "core/libraries/gnmdriver/gnmdriver.h"
#include "core/libraries/libs.h" #include "core/libraries/libs.h"
#include "core/libraries/videoout/video_out.h"
#include "core/platform.h"
#include "video_core/amdgpu/liverpool.h" #include "video_core/amdgpu/liverpool.h"
#include "video_core/amdgpu/pm4_cmds.h" #include "video_core/amdgpu/pm4_cmds.h"
#include "video_core/renderer_vulkan/renderer_vulkan.h" #include "video_core/renderer_vulkan/renderer_vulkan.h"
@ -26,12 +28,33 @@ template <u32 data_block_size>
static inline u32* WriteTrailingNop(u32* cmdbuf) { static inline u32* WriteTrailingNop(u32* cmdbuf) {
auto* nop = reinterpret_cast<PM4CmdNop*>(cmdbuf); auto* nop = reinterpret_cast<PM4CmdNop*>(cmdbuf);
nop->header = PM4Type3Header{PM4ItOpcode::Nop, data_block_size - 1}; nop->header = PM4Type3Header{PM4ItOpcode::Nop, data_block_size - 1};
nop->data_block[0] = 0; // only one out of `data_block_size` is initialized nop->data_block[0] = 0u; // only one out of `data_block_size` is initialized
return cmdbuf + data_block_size + 1 /* header */; return cmdbuf + data_block_size + 1 /* header */;
} }
int PS4_SYSV_ABI sceGnmAddEqEvent() { s32 PS4_SYSV_ABI sceGnmAddEqEvent(SceKernelEqueue eq, u64 id, void* udata) {
LOG_ERROR(Lib_GnmDriver, "(STUBBED) called"); LOG_TRACE(Lib_GnmDriver, "called");
ASSERT_MSG(id == SceKernelEvent::Type::GfxEop);
if (!eq) {
return ORBIS_KERNEL_ERROR_EBADF;
}
EqueueEvent kernel_event{};
kernel_event.event.ident = id;
kernel_event.event.filter = EVFILT_GRAPHICS_CORE;
kernel_event.event.flags = 1;
kernel_event.event.fflags = 0;
kernel_event.event.data = id;
kernel_event.event.udata = udata;
eq->addEvent(kernel_event);
Platform::IrqC::Instance()->Register([=](Platform::InterruptId irq) {
ASSERT_MSG(irq == Platform::InterruptId::GfxEop,
"An unexpected IRQ occured"); // We need to conver IRQ# to event id and do proper
// filtering in trigger function
eq->triggerEvent(SceKernelEvent::Type::GfxEop, EVFILT_GRAPHICS_CORE, nullptr);
});
return ORBIS_OK; return ORBIS_OK;
} }
@ -63,7 +86,7 @@ s32 PS4_SYSV_ABI sceGnmComputeWaitOnAddress(u32* cmdbuf, u32 size, uintptr_t add
wait_reg_mem->poll_addr_hi = u32(addr >> 32u); wait_reg_mem->poll_addr_hi = u32(addr >> 32u);
wait_reg_mem->ref = ref; wait_reg_mem->ref = ref;
wait_reg_mem->mask = mask; wait_reg_mem->mask = mask;
wait_reg_mem->poll_interval = 10; wait_reg_mem->poll_interval = 10u;
WriteTrailingNop<2>(cmdbuf + 7); WriteTrailingNop<2>(cmdbuf + 7);
return ORBIS_OK; return ORBIS_OK;
@ -131,8 +154,17 @@ int PS4_SYSV_ABI sceGnmDebugHardwareStatus() {
return ORBIS_OK; return ORBIS_OK;
} }
int PS4_SYSV_ABI sceGnmDeleteEqEvent() { s32 PS4_SYSV_ABI sceGnmDeleteEqEvent(SceKernelEqueue eq, u64 id) {
LOG_ERROR(Lib_GnmDriver, "(STUBBED) called"); LOG_TRACE(Lib_GnmDriver, "called");
ASSERT_MSG(id == SceKernelEvent::Type::GfxEop);
if (!eq) {
return ORBIS_KERNEL_ERROR_EBADF;
}
eq->removeEvent(id);
Platform::IrqC::Instance()->Unregister();
return ORBIS_OK; return ORBIS_OK;
} }
@ -205,14 +237,14 @@ u32 PS4_SYSV_ABI sceGnmDispatchInitDefaultHardwareState(u32* cmdbuf, u32 size) {
0xffffffffu); // COMPUTE_STATIC_THREAD_MGMT_SE1 0xffffffffu); // COMPUTE_STATIC_THREAD_MGMT_SE1
cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 0x215u, 0x170u); // COMPUTE_RESOURCE_LIMITS cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 0x215u, 0x170u); // COMPUTE_RESOURCE_LIMITS
cmdbuf = WriteHeader<PM4ItOpcode::Unknown58>( cmdbuf = WriteHeader<PM4ItOpcode::AcquireMem>(
cmdbuf, 6); // for some reason the packet indicates larger size cmdbuf, 6); // for some reason the packet indicates larger size
cmdbuf = WriteBody(cmdbuf, 0x28000000u, 0u, 0u, 0u, 0u); cmdbuf = WriteBody(cmdbuf, 0x28000000u, 0u, 0u, 0u, 0u);
cmdbuf = WriteHeader<PM4ItOpcode::Nop>(cmdbuf, 0xef); cmdbuf = WriteHeader<PM4ItOpcode::Nop>(cmdbuf, 0xef);
cmdbuf = WriteBody(cmdbuf, 0xau, 0u); cmdbuf = WriteBody(cmdbuf, 0xau, 0u);
} else { } else {
cmdbuf = cmdbuf = WriteHeader<PM4ItOpcode::Nop>(cmdbuf, 0x100); cmdbuf = cmdbuf = WriteHeader<PM4ItOpcode::Nop>(cmdbuf, 0xff);
} }
return 0x100; // it is a size, not a retcode return 0x100; // it is a size, not a retcode
} }
@ -321,7 +353,7 @@ u32 PS4_SYSV_ABI sceGnmDrawInitDefaultHardwareState200(u32* cmdbuf, u32 size) {
if constexpr (g_fair_hw_init) { if constexpr (g_fair_hw_init) {
ASSERT_MSG(0, "Not implemented"); ASSERT_MSG(0, "Not implemented");
} else { } else {
cmdbuf = cmdbuf = WriteHeader<PM4ItOpcode::Nop>(cmdbuf, 0x100); cmdbuf = cmdbuf = WriteHeader<PM4ItOpcode::Nop>(cmdbuf, 0xff);
} }
return 0x100; // it is a size, not a retcode return 0x100; // it is a size, not a retcode
} }
@ -335,7 +367,7 @@ u32 PS4_SYSV_ABI sceGnmDrawInitDefaultHardwareState350(u32* cmdbuf, u32 size) {
if constexpr (g_fair_hw_init) { if constexpr (g_fair_hw_init) {
ASSERT_MSG(0, "Not implemented"); ASSERT_MSG(0, "Not implemented");
} else { } else {
cmdbuf = cmdbuf = WriteHeader<PM4ItOpcode::Nop>(cmdbuf, 0x100); cmdbuf = cmdbuf = WriteHeader<PM4ItOpcode::Nop>(cmdbuf, 0xff);
} }
return 0x100; // it is a size, not a retcode return 0x100; // it is a size, not a retcode
} }
@ -565,9 +597,9 @@ s32 PS4_SYSV_ABI sceGnmInsertPopMarker(u32* cmdbuf, u32 size) {
LOG_TRACE(Lib_GnmDriver, "called"); LOG_TRACE(Lib_GnmDriver, "called");
if (cmdbuf && (size == 6)) { if (cmdbuf && (size == 6)) {
cmdbuf = WritePacket<PM4ItOpcode::Nop>( cmdbuf =
cmdbuf, PM4ShaderType::ShaderGraphics, WritePacket<PM4ItOpcode::Nop>(cmdbuf, PM4ShaderType::ShaderGraphics,
static_cast<u32>(PM4CmdNop::PayloadType::DebugMarkerPop), 0u, 0u, 0u, 0u); PM4CmdNop::PayloadType::DebugMarkerPop, 0u, 0u, 0u, 0u);
return ORBIS_OK; return ORBIS_OK;
} }
return -1; return -1;
@ -588,7 +620,7 @@ s32 PS4_SYSV_ABI sceGnmInsertPushMarker(u32* cmdbuf, u32 size, const char* marke
auto* nop = reinterpret_cast<PM4CmdNop*>(cmdbuf); auto* nop = reinterpret_cast<PM4CmdNop*>(cmdbuf);
nop->header = nop->header =
PM4Type3Header{PM4ItOpcode::Nop, packet_size, PM4ShaderType::ShaderGraphics}; PM4Type3Header{PM4ItOpcode::Nop, packet_size, PM4ShaderType::ShaderGraphics};
nop->data_block[0] = static_cast<u32>(PM4CmdNop::PayloadType::DebugMarkerPush); nop->data_block[0] = PM4CmdNop::PayloadType::DebugMarkerPush;
const auto marker_len = len + 1; const auto marker_len = len + 1;
std::memcpy(&nop->data_block[1], marker, marker_len); std::memcpy(&nop->data_block[1], marker, marker_len);
std::memset(reinterpret_cast<u8*>(&nop->data_block[1]) + marker_len, 0, std::memset(reinterpret_cast<u8*>(&nop->data_block[1]) + marker_len, 0,
@ -614,8 +646,24 @@ int PS4_SYSV_ABI sceGnmInsertThreadTraceMarker() {
return ORBIS_OK; return ORBIS_OK;
} }
int PS4_SYSV_ABI sceGnmInsertWaitFlipDone() { s32 PS4_SYSV_ABI sceGnmInsertWaitFlipDone(u32* cmdbuf, u32 size, s32 vo_handle, u32 buf_idx) {
LOG_ERROR(Lib_GnmDriver, "(STUBBED) called"); LOG_TRACE(Lib_GnmDriver, "called");
if (size != 7) {
return -1;
}
uintptr_t label_addr{};
VideoOut::sceVideoOutGetBufferLabelAddress(vo_handle, &label_addr);
auto* wait_reg_mem = reinterpret_cast<PM4CmdWaitRegMem*>(cmdbuf);
wait_reg_mem->header = PM4Type3Header{PM4ItOpcode::WaitRegMem, 5};
wait_reg_mem->raw = 0x13u;
*reinterpret_cast<uintptr_t*>(&wait_reg_mem->poll_addr_lo) =
(label_addr + buf_idx * sizeof(uintptr_t)) & ~0x3ull;
wait_reg_mem->ref = 0u;
wait_reg_mem->mask = 0xffff'ffffu;
wait_reg_mem->poll_interval = 10u;
return ORBIS_OK; return ORBIS_OK;
} }
@ -824,8 +872,48 @@ int PS4_SYSV_ABI sceGnmSetEmbeddedPsShader() {
return ORBIS_OK; return ORBIS_OK;
} }
int PS4_SYSV_ABI sceGnmSetEmbeddedVsShader() { s32 PS4_SYSV_ABI sceGnmSetEmbeddedVsShader(u32* cmdbuf, u32 size, u32 shader_id, u32 modifier) {
LOG_ERROR(Lib_GnmDriver, "(STUBBED) called"); LOG_TRACE(Lib_GnmDriver, "called");
// A fullscreen triangle with one uv set
const static u32 shader_code[] = {
0xbeeb03ffu, 00000007u, // s_mov_b32 vcc_hi, $0x00000007
0x36020081u, // v_and_b32 v1, 1, v0
0x34020281u, // v_lshlrev_b32 v1, 1, v1
0x360000c2u, // v_and_b32 v0, -2, v0
0x4a0202c1u, // v_add_i32 v1, vcc, -1, v1
0x4a0000c1u, // v_add_i32 v0, vcc, -1, v0
0x7e020b01u, // v_cvt_f32_i32 v1, v1
0x7e040280u, // v_cvt_f32_i32 v0, v0
0x7e0602f2u, // v_mov_b32 v3, 1.0
0xf80008cfu, 0x03020001u, // exp pos0, v1, v0, v2, v3 done
0xf800020fu, 0x03030303u, // exp param0, v3, v3, v3, v3
0xbf810000u, // s_endpgm
// OrbShdr header
0x5362724fu, 0x07726468u, 0x00004047u, 0u, 0x47f8c29fu, 0x9b2da5cfu, 0xff7c5b7du,
0x00000017u, 0x0fe000f1u, 0u, 0x000c0000u, 4u, 0u, 4u, 0u, 7u};
const auto shader_addr = uintptr_t(&shader_code); // Original address is 0xfe000f10
const static u32 vs_regs[] = {
u32(shader_addr >> 8), u32(shader_addr >> 40), 0xc0000u, 4, 0, 4, 0, 7};
if (shader_id != 0) {
return 0x8eee00ff;
}
// Normally the driver will do a call to `sceGnmSetVsShader()`, but this function has
// a check for zero in the upper part of shader address. In our case, the address is a
// pointer to a stack memory, so the check will likely fail. To workaround it we will
// repeat set shader functionality here as it is trivial.
cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 0x48u, vs_regs[0], 0u); // SPI_SHADER_PGM_LO_VS
cmdbuf =
PM4CmdSetData::SetShReg(cmdbuf, 0x4au, vs_regs[2], vs_regs[3]); // SPI_SHADER_PGM_RSRC1_VS
cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x207u, vs_regs[6]); // PA_CL_VS_OUT_CNTL
cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x1b1u, vs_regs[4]); // SPI_VS_OUT_CONFIG
cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x1c3u, vs_regs[5]); // SPI_SHADER_POS_FORMAT
WriteTrailingNop<11>(cmdbuf);
return ORBIS_OK; return ORBIS_OK;
} }
@ -960,6 +1048,8 @@ int PS4_SYSV_ABI sceGnmSetVgtControl() {
} }
s32 PS4_SYSV_ABI sceGnmSetVsShader(u32* cmdbuf, u32 size, const u32* vs_regs, u32 shader_modifier) { s32 PS4_SYSV_ABI sceGnmSetVsShader(u32* cmdbuf, u32 size, const u32* vs_regs, u32 shader_modifier) {
LOG_TRACE(Lib_GnmDriver, "called");
if (!cmdbuf || size <= 0x1c) { if (!cmdbuf || size <= 0x1c) {
return -1; return -1;
} }
@ -987,7 +1077,6 @@ s32 PS4_SYSV_ABI sceGnmSetVsShader(u32* cmdbuf, u32 size, const u32* vs_regs, u3
cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x1c3u, vs_regs[5]); // SPI_SHADER_POS_FORMAT cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x1c3u, vs_regs[5]); // SPI_SHADER_POS_FORMAT
WriteTrailingNop<11>(cmdbuf); WriteTrailingNop<11>(cmdbuf);
return ORBIS_OK; return ORBIS_OK;
} }
@ -1186,44 +1275,142 @@ int PS4_SYSV_ABI sceGnmSqttWaitForEvent() {
return ORBIS_OK; return ORBIS_OK;
} }
int PS4_SYSV_ABI sceGnmSubmitAndFlipCommandBuffers() { static inline s32 PatchFlipRequest(u32* cmdbuf, u32 size, u32 vo_handle, u32 buf_idx, u32 flip_mode,
LOG_ERROR(Lib_GnmDriver, "(STUBBED) called"); u32 flip_arg, void* unk) {
// check for `prepareFlip` packet
cmdbuf += size - 64;
ASSERT_MSG(cmdbuf[0] == 0xc03e1000, "Can't find `prepareFlip` packet");
std::array<u32, 7> backup{};
std::memcpy(backup.data(), cmdbuf, backup.size() * sizeof(decltype(backup)::value_type));
ASSERT_MSG(((backup[2] & 3) == 0u) || (backup[1] != PM4CmdNop::PayloadType::PrepareFlipLabel),
"Invalid flip packet");
ASSERT_MSG(buf_idx != 0xffff'ffffu, "Invalid VO buffer index");
const s32 flip_result = VideoOut::sceVideoOutSubmitEopFlip(vo_handle, buf_idx, flip_mode,
flip_arg, nullptr /*unk*/);
if (flip_result != 0) {
if (flip_result == 0x80290012) {
LOG_ERROR(Lib_GnmDriver, "Flip queue is full");
return 0x80d11081;
} else {
LOG_ERROR(Lib_GnmDriver, "Flip request failed");
return flip_result;
}
}
uintptr_t label_addr{};
VideoOut::sceVideoOutGetBufferLabelAddress(vo_handle, &label_addr);
// Write event to lock the VO surface
auto* write_lock = reinterpret_cast<PM4CmdWriteData*>(cmdbuf);
write_lock->header = PM4Type3Header{PM4ItOpcode::WriteData, 3};
write_lock->raw = 0x500u;
const auto addr = (label_addr + buf_idx * sizeof(label_addr)) & ~0x3ull;
write_lock->Address<uintptr_t>(addr);
write_lock->data[0] = 1;
auto* nop = reinterpret_cast<PM4CmdNop*>(cmdbuf + 5);
if (backup[1] == PM4CmdNop::PayloadType::PrepareFlip) {
nop->header = PM4Type3Header{PM4ItOpcode::Nop, 0x39};
nop->data_block[0] = PM4CmdNop::PayloadType::PatchedFlip;
} else {
if (backup[1] == PM4CmdNop::PayloadType::PrepareFlipLabel) {
nop->header = PM4Type3Header{PM4ItOpcode::Nop, 0x34};
nop->data_block[0] = PM4CmdNop::PayloadType::PatchedFlip;
// Write event to update label
auto* write_label = reinterpret_cast<PM4CmdWriteData*>(cmdbuf + 0x3b);
write_label->header = PM4Type3Header{PM4ItOpcode::WriteData, 3};
write_label->raw = 0x500u;
write_label->dst_addr_lo = backup[2] & 0xffff'fffcu;
write_label->dst_addr_hi = backup[3];
write_label->data[0] = backup[4];
}
if (backup[1] == PM4CmdNop::PayloadType::PrepareFlipInterruptLabel) {
nop->header = PM4Type3Header{PM4ItOpcode::Nop, 0x33};
nop->data_block[0] = PM4CmdNop::PayloadType::PatchedFlip;
auto* write_eop = reinterpret_cast<PM4CmdEventWriteEop*>(cmdbuf + 0x3a);
write_eop->header = PM4Type3Header{PM4ItOpcode::EventWriteEop, 4};
write_eop->event_control = (backup[5] & 0x3f) + 0x500u + (backup[6] & 0x3f) * 0x1000;
write_eop->address_lo = backup[2] & 0xffff'fffcu;
write_eop->data_control = (backup[3] & 0xffffu) | 0x2200'0000u;
write_eop->data_lo = backup[4];
write_eop->data_hi = 0u;
}
if (backup[1] == PM4CmdNop::PayloadType::PrepareFlipInterrupt) {
nop->header = PM4Type3Header{PM4ItOpcode::Nop, 0x33};
nop->data_block[0] = PM4CmdNop::PayloadType::PatchedFlip;
auto* write_eop = reinterpret_cast<PM4CmdEventWriteEop*>(cmdbuf + 0x3a);
write_eop->header = PM4Type3Header{PM4ItOpcode::EventWriteEop, 4};
write_eop->event_control = (backup[5] & 0x3f) + 0x500u + (backup[6] & 0x3f) * 0x1000;
write_eop->address_lo = 0u;
write_eop->data_control = 0x100'0000u;
write_eop->data_lo = 0u;
write_eop->data_hi = 0u;
}
}
return ORBIS_OK; return ORBIS_OK;
} }
s32 PS4_SYSV_ABI sceGnmSubmitAndFlipCommandBuffers(u32 count, void* dcb_gpu_addrs[],
u32* dcb_sizes_in_bytes, void* ccb_gpu_addrs[],
u32* ccb_sizes_in_bytes, u32 vo_handle,
u32 buf_idx, u32 flip_mode, u32 flip_arg) {
LOG_INFO(Lib_GnmDriver, "called [buf = {}]", buf_idx);
auto* cmdbuf = reinterpret_cast<u32*>(dcb_gpu_addrs[count - 1]);
const auto size_dw = dcb_sizes_in_bytes[count - 1] / 4;
const s32 patch_result =
PatchFlipRequest(cmdbuf, size_dw, vo_handle, buf_idx, flip_mode, flip_arg, nullptr /*unk*/);
if (patch_result != ORBIS_OK) {
return patch_result;
}
return sceGnmSubmitCommandBuffers(count, dcb_gpu_addrs, dcb_sizes_in_bytes, ccb_gpu_addrs,
ccb_sizes_in_bytes);
}
int PS4_SYSV_ABI sceGnmSubmitAndFlipCommandBuffersForWorkload() { int PS4_SYSV_ABI sceGnmSubmitAndFlipCommandBuffersForWorkload() {
LOG_ERROR(Lib_GnmDriver, "(STUBBED) called"); LOG_ERROR(Lib_GnmDriver, "(STUBBED) called");
return ORBIS_OK; return ORBIS_OK;
} }
int PS4_SYSV_ABI sceGnmSubmitCommandBuffers(u32 count, void* dcbGpuAddrs[], u32* dcbSizesInBytes, s32 PS4_SYSV_ABI sceGnmSubmitCommandBuffers(u32 count, void* dcb_gpu_addrs[],
void* ccbGpuAddrs[], u32* ccbSizesInBytes) { u32* dcb_sizes_in_bytes, void* ccb_gpu_addrs[],
u32* ccb_sizes_in_bytes) {
LOG_INFO(Lib_GnmDriver, "called"); LOG_INFO(Lib_GnmDriver, "called");
ASSERT_MSG(count == 1, "Multiple command buffer submission is unsupported!"); ASSERT_MSG(count == 1, "Multiple command buffer submission is unsupported!");
if (!dcbGpuAddrs || !dcbSizesInBytes) { if (!dcb_gpu_addrs || !dcb_sizes_in_bytes) {
LOG_ERROR(Lib_GnmDriver, "dcbGpuAddrs and dcbSizesInBytes must not be NULL"); LOG_ERROR(Lib_GnmDriver, "dcbGpuAddrs and dcbSizesInBytes must not be NULL");
return 0x80d11000; return 0x80d11000;
} }
for (u32 i = 0; i < count; i++) { for (u32 i = 0; i < count; i++) {
if (dcbSizesInBytes[i] == 0) { if (dcb_sizes_in_bytes[i] == 0) {
LOG_ERROR(Lib_GnmDriver, "Submitting a null DCB {}", i); LOG_ERROR(Lib_GnmDriver, "Submitting a null DCB {}", i);
return 0x80d11000; return 0x80d11000;
} }
if (dcbSizesInBytes[i] > 0x3ffffc) { if (dcb_sizes_in_bytes[i] > 0x3ffffc) {
LOG_ERROR(Lib_GnmDriver, "dcbSizesInBytes[{}] ({}) is limited to (2*20)-1 DWORDS", i, LOG_ERROR(Lib_GnmDriver, "dcbSizesInBytes[{}] ({}) is limited to (2*20)-1 DWORDS", i,
dcbSizesInBytes[i]); dcb_sizes_in_bytes[i]);
return 0x80d11000; return 0x80d11000;
} }
if (ccbSizesInBytes && ccbSizesInBytes[i] > 0x3ffffc) { if (ccb_sizes_in_bytes && ccb_sizes_in_bytes[i] > 0x3ffffc) {
LOG_ERROR(Lib_GnmDriver, "ccbSizesInBytes[{}] ({}) is limited to (2*20)-1 DWORDS", i, LOG_ERROR(Lib_GnmDriver, "ccbSizesInBytes[{}] ({}) is limited to (2*20)-1 DWORDS", i,
ccbSizesInBytes[i]); ccb_sizes_in_bytes[i]);
return 0x80d11000; return 0x80d11000;
} }
} }
liverpool->ProcessCmdList(reinterpret_cast<u32*>(dcbGpuAddrs[0]), dcbSizesInBytes[0]); liverpool->Submit(reinterpret_cast<u32*>(dcb_gpu_addrs[0]), dcb_sizes_in_bytes[0]);
return ORBIS_OK; return ORBIS_OK;
} }
@ -1234,7 +1421,10 @@ int PS4_SYSV_ABI sceGnmSubmitCommandBuffersForWorkload() {
} }
int PS4_SYSV_ABI sceGnmSubmitDone() { int PS4_SYSV_ABI sceGnmSubmitDone() {
LOG_ERROR(Lib_GnmDriver, "(STUBBED) called"); LOG_INFO(Lib_GnmDriver, "called");
liverpool->SubmitDone();
return ORBIS_OK; return ORBIS_OK;
} }

View file

@ -4,6 +4,7 @@
#pragma once #pragma once
#include "common/types.h" #include "common/types.h"
#include "core/libraries/kernel/event_queues.h"
namespace Core::Loader { namespace Core::Loader {
class SymbolsResolver; class SymbolsResolver;
@ -11,7 +12,9 @@ class SymbolsResolver;
namespace Libraries::GnmDriver { namespace Libraries::GnmDriver {
int PS4_SYSV_ABI sceGnmAddEqEvent(); using namespace Kernel;
s32 PS4_SYSV_ABI sceGnmAddEqEvent(SceKernelEqueue eq, u64 id, void* udata);
int PS4_SYSV_ABI sceGnmAreSubmitsAllowed(); int PS4_SYSV_ABI sceGnmAreSubmitsAllowed();
int PS4_SYSV_ABI sceGnmBeginWorkload(); int PS4_SYSV_ABI sceGnmBeginWorkload();
s32 PS4_SYSV_ABI sceGnmComputeWaitOnAddress(u32* cmdbuf, u32 size, uintptr_t addr, u32 mask, s32 PS4_SYSV_ABI sceGnmComputeWaitOnAddress(u32* cmdbuf, u32 size, uintptr_t addr, u32 mask,
@ -28,7 +31,7 @@ int PS4_SYSV_ABI sceGnmDebuggerSetAddressWatch();
int PS4_SYSV_ABI sceGnmDebuggerWriteGds(); int PS4_SYSV_ABI sceGnmDebuggerWriteGds();
int PS4_SYSV_ABI sceGnmDebuggerWriteSqIndirectRegister(); int PS4_SYSV_ABI sceGnmDebuggerWriteSqIndirectRegister();
int PS4_SYSV_ABI sceGnmDebugHardwareStatus(); int PS4_SYSV_ABI sceGnmDebugHardwareStatus();
int PS4_SYSV_ABI sceGnmDeleteEqEvent(); s32 PS4_SYSV_ABI sceGnmDeleteEqEvent(SceKernelEqueue eq, u64 id);
int PS4_SYSV_ABI sceGnmDestroyWorkloadStream(); int PS4_SYSV_ABI sceGnmDestroyWorkloadStream();
int PS4_SYSV_ABI sceGnmDingDong(); int PS4_SYSV_ABI sceGnmDingDong();
int PS4_SYSV_ABI sceGnmDingDongForWorkload(); int PS4_SYSV_ABI sceGnmDingDongForWorkload();
@ -104,7 +107,7 @@ s32 PS4_SYSV_ABI sceGnmInsertPushMarker(u32* cmdbuf, u32 size, const char* marke
int PS4_SYSV_ABI sceGnmInsertSetColorMarker(); int PS4_SYSV_ABI sceGnmInsertSetColorMarker();
int PS4_SYSV_ABI sceGnmInsertSetMarker(); int PS4_SYSV_ABI sceGnmInsertSetMarker();
int PS4_SYSV_ABI sceGnmInsertThreadTraceMarker(); int PS4_SYSV_ABI sceGnmInsertThreadTraceMarker();
int PS4_SYSV_ABI sceGnmInsertWaitFlipDone(); s32 PS4_SYSV_ABI sceGnmInsertWaitFlipDone(u32* cmdbuf, u32 size, s32 vo_handle, u32 buf_idx);
int PS4_SYSV_ABI sceGnmIsCoredumpValid(); int PS4_SYSV_ABI sceGnmIsCoredumpValid();
int PS4_SYSV_ABI sceGnmIsUserPaEnabled(); int PS4_SYSV_ABI sceGnmIsUserPaEnabled();
int PS4_SYSV_ABI sceGnmLogicalCuIndexToPhysicalCuIndex(); int PS4_SYSV_ABI sceGnmLogicalCuIndexToPhysicalCuIndex();
@ -137,7 +140,7 @@ s32 PS4_SYSV_ABI sceGnmSetCsShader(u32* cmdbuf, u32 size, const u32* cs_regs);
s32 PS4_SYSV_ABI sceGnmSetCsShaderWithModifier(u32* cmdbuf, u32 size, const u32* cs_regs, s32 PS4_SYSV_ABI sceGnmSetCsShaderWithModifier(u32* cmdbuf, u32 size, const u32* cs_regs,
u32 modifier); u32 modifier);
int PS4_SYSV_ABI sceGnmSetEmbeddedPsShader(); int PS4_SYSV_ABI sceGnmSetEmbeddedPsShader();
int PS4_SYSV_ABI sceGnmSetEmbeddedVsShader(); s32 PS4_SYSV_ABI sceGnmSetEmbeddedVsShader(u32* cmdbuf, u32 size, u32 shader_id, u32 modifier);
int PS4_SYSV_ABI sceGnmSetEsShader(); int PS4_SYSV_ABI sceGnmSetEsShader();
int PS4_SYSV_ABI sceGnmSetGsRingSizes(); int PS4_SYSV_ABI sceGnmSetGsRingSizes();
int PS4_SYSV_ABI sceGnmSetGsShader(); int PS4_SYSV_ABI sceGnmSetGsShader();
@ -191,9 +194,12 @@ int PS4_SYSV_ABI sceGnmSqttStopTrace();
int PS4_SYSV_ABI sceGnmSqttSwitchTraceBuffer(); int PS4_SYSV_ABI sceGnmSqttSwitchTraceBuffer();
int PS4_SYSV_ABI sceGnmSqttSwitchTraceBuffer2(); int PS4_SYSV_ABI sceGnmSqttSwitchTraceBuffer2();
int PS4_SYSV_ABI sceGnmSqttWaitForEvent(); int PS4_SYSV_ABI sceGnmSqttWaitForEvent();
int PS4_SYSV_ABI sceGnmSubmitAndFlipCommandBuffers(); s32 PS4_SYSV_ABI sceGnmSubmitAndFlipCommandBuffers(u32 count, void* dcb_gpu_addrs[],
u32* dcb_sizes_in_bytes, void* ccb_gpu_addrs[],
u32* ccb_sizes_in_bytes, u32 vo_handle,
u32 buf_idx, u32 flip_mode, u32 flip_arg);
int PS4_SYSV_ABI sceGnmSubmitAndFlipCommandBuffersForWorkload(); int PS4_SYSV_ABI sceGnmSubmitAndFlipCommandBuffersForWorkload();
int PS4_SYSV_ABI sceGnmSubmitCommandBuffers(u32 count, void* dcb_gpu_addrs[], s32 PS4_SYSV_ABI sceGnmSubmitCommandBuffers(u32 count, void* dcb_gpu_addrs[],
u32* dcb_sizes_in_bytes, void* ccb_gpu_addrs[], u32* dcb_sizes_in_bytes, void* ccb_gpu_addrs[],
u32* ccb_sizes_in_bytes); u32* ccb_sizes_in_bytes);
int PS4_SYSV_ABI sceGnmSubmitCommandBuffersForWorkload(); int PS4_SYSV_ABI sceGnmSubmitCommandBuffersForWorkload();

View file

@ -20,6 +20,14 @@ int EqueueInternal::addEvent(const EqueueEvent& event) {
return 0; return 0;
} }
int EqueueInternal::removeEvent(u64 id) {
const auto& event_q =
std::ranges::find_if(m_events, [id](auto& ev) { return ev.event.ident == id; });
ASSERT(event_q != m_events.cend());
m_events.erase(event_q);
return 0;
}
int EqueueInternal::waitForEvents(SceKernelEvent* ev, int num, u32 micros) { int EqueueInternal::waitForEvents(SceKernelEvent* ev, int num, u32 micros) {
std::unique_lock lock{m_mutex}; std::unique_lock lock{m_mutex};
int ret = 0; int ret = 0;

View file

@ -42,11 +42,22 @@ using ResetFunc = void (*)(EqueueEvent* event);
using DeleteFunc = void (*)(EqueueInternal* eq, EqueueEvent* event); using DeleteFunc = void (*)(EqueueInternal* eq, EqueueEvent* event);
struct SceKernelEvent { struct SceKernelEvent {
enum Type : u64 {
Compute0RelMem = 0x00,
Compute1RelMem = 0x01,
Compute2RelMem = 0x02,
Compute3RelMem = 0x03,
Compute4RelMem = 0x04,
Compute5RelMem = 0x05,
Compute6RelMem = 0x06,
GfxEop = 0x40
};
u64 ident = 0; /* identifier for this event */ u64 ident = 0; /* identifier for this event */
s16 filter = 0; /* filter for event */ s16 filter = 0; /* filter for event */
u16 flags = 0; u16 flags = 0;
u32 fflags = 0; u32 fflags = 0;
s64 data = 0; u64 data = 0;
void* udata = nullptr; /* opaque user data identifier */ void* udata = nullptr; /* opaque user data identifier */
}; };
@ -80,6 +91,7 @@ public:
this->m_name = m_name; this->m_name = m_name;
} }
int addEvent(const EqueueEvent& event); int addEvent(const EqueueEvent& event);
int removeEvent(u64 id);
int waitForEvents(SceKernelEvent* ev, int num, u32 micros); int waitForEvents(SceKernelEvent* ev, int num, u32 micros);
bool triggerEvent(u64 ident, s16 filter, void* trigger_data); bool triggerEvent(u64 ident, s16 filter, void* trigger_data);
int getTriggeredEvents(SceKernelEvent* ev, int num); int getTriggeredEvents(SceKernelEvent* ev, int num);

View file

@ -11,29 +11,34 @@ namespace Libraries::Kernel {
int PS4_SYSV_ABI sceKernelCreateEqueue(SceKernelEqueue* eq, const char* name) { int PS4_SYSV_ABI sceKernelCreateEqueue(SceKernelEqueue* eq, const char* name) {
if (eq == nullptr) { if (eq == nullptr) {
LOG_ERROR(Kernel_Event, "Event queue is null!"); LOG_ERROR(Kernel_Event, "Event queue is null!");
return SCE_KERNEL_ERROR_EINVAL; return ORBIS_KERNEL_ERROR_EINVAL;
} }
if (name == nullptr) { if (name == nullptr) {
LOG_ERROR(Kernel_Event, "Event queue name is invalid!");
return SCE_KERNEL_ERROR_EFAULT;
}
if (name == NULL) {
LOG_ERROR(Kernel_Event, "Event queue name is null!"); LOG_ERROR(Kernel_Event, "Event queue name is null!");
return SCE_KERNEL_ERROR_EINVAL; return ORBIS_KERNEL_ERROR_EINVAL;
} }
// Maximum is 32 including null terminator // Maximum is 32 including null terminator
static constexpr size_t MaxEventQueueNameSize = 32; static constexpr size_t MaxEventQueueNameSize = 32;
if (std::strlen(name) > MaxEventQueueNameSize) { if (std::strlen(name) > MaxEventQueueNameSize) {
LOG_ERROR(Kernel_Event, "Event queue name exceeds 32 bytes!"); LOG_ERROR(Kernel_Event, "Event queue name exceeds 32 bytes!");
return SCE_KERNEL_ERROR_ENAMETOOLONG; return ORBIS_KERNEL_ERROR_ENAMETOOLONG;
} }
LOG_INFO(Kernel_Event, "name = {}", name); LOG_INFO(Kernel_Event, "name = {}", name);
*eq = new EqueueInternal; *eq = new EqueueInternal;
(*eq)->setName(std::string(name)); (*eq)->setName(std::string(name));
return SCE_OK; return ORBIS_OK;
}
int PS4_SYSV_ABI sceKernelDeleteEqueue(SceKernelEqueue eq) {
if (eq == nullptr) {
return SCE_KERNEL_ERROR_EBADF;
}
delete eq;
return ORBIS_OK;
} }
int PS4_SYSV_ABI sceKernelWaitEqueue(SceKernelEqueue eq, SceKernelEvent* ev, int num, int* out, int PS4_SYSV_ABI sceKernelWaitEqueue(SceKernelEqueue eq, SceKernelEvent* ev, int num, int* out,

View file

@ -11,6 +11,7 @@ using SceKernelUseconds = u32;
using SceKernelEqueue = EqueueInternal*; using SceKernelEqueue = EqueueInternal*;
int PS4_SYSV_ABI sceKernelCreateEqueue(SceKernelEqueue* eq, const char* name); int PS4_SYSV_ABI sceKernelCreateEqueue(SceKernelEqueue* eq, const char* name);
int PS4_SYSV_ABI sceKernelDeleteEqueue(SceKernelEqueue eq);
int PS4_SYSV_ABI sceKernelWaitEqueue(SceKernelEqueue eq, SceKernelEvent* ev, int num, int* out, int PS4_SYSV_ABI sceKernelWaitEqueue(SceKernelEqueue eq, SceKernelEvent* ev, int num, int* out,
SceKernelUseconds* timo); SceKernelUseconds* timo);

View file

@ -169,6 +169,7 @@ void LibKernel_Register(Core::Loader::SymbolsResolver* sym) {
LIB_FUNCTION("cQke9UuBQOk", "libkernel", 1, "libkernel", 1, 1, sceKernelMunmap); LIB_FUNCTION("cQke9UuBQOk", "libkernel", 1, "libkernel", 1, 1, sceKernelMunmap);
// equeue // equeue
LIB_FUNCTION("D0OdFMjp46I", "libkernel", 1, "libkernel", 1, 1, sceKernelCreateEqueue); LIB_FUNCTION("D0OdFMjp46I", "libkernel", 1, "libkernel", 1, 1, sceKernelCreateEqueue);
LIB_FUNCTION("jpFjmgAC5AE", "libkernel", 1, "libkernel", 1, 1, sceKernelDeleteEqueue);
LIB_FUNCTION("fzyMKs9kim0", "libkernel", 1, "libkernel", 1, 1, sceKernelWaitEqueue); LIB_FUNCTION("fzyMKs9kim0", "libkernel", 1, "libkernel", 1, 1, sceKernelWaitEqueue);
// misc // misc
LIB_FUNCTION("WslcK1FQcGI", "libkernel", 1, "libkernel", 1, 1, sceKernelIsNeoMode); LIB_FUNCTION("WslcK1FQcGI", "libkernel", 1, "libkernel", 1, 1, sceKernelIsNeoMode);

View file

@ -6,6 +6,7 @@
#include "core/libraries/error_codes.h" #include "core/libraries/error_codes.h"
#include "core/libraries/kernel/time_management.h" #include "core/libraries/kernel/time_management.h"
#include "core/libraries/videoout/driver.h" #include "core/libraries/videoout/driver.h"
#include "core/platform.h"
#include "video_core/renderer_vulkan/renderer_vulkan.h" #include "video_core/renderer_vulkan/renderer_vulkan.h"
@ -196,16 +197,22 @@ void VideoOutDriver::Flip(std::chrono::microseconds timeout) {
reinterpret_cast<void*>(req.flip_arg)); reinterpret_cast<void*>(req.flip_arg));
} }
} }
// Reset flip label
req.port->buffer_labels[req.index] = 0;
LOG_INFO(Lib_VideoOut, "Flip done [buf = {}]", req.index);
} }
bool VideoOutDriver::SubmitFlip(VideoOutPort* port, s32 index, s64 flip_arg) { bool VideoOutDriver::SubmitFlip(VideoOutPort* port, s32 index, s64 flip_arg,
bool is_eop /*= false*/) {
const auto& buffer = port->buffer_slots[index]; const auto& buffer = port->buffer_slots[index];
const auto& group = port->groups[buffer.group_index]; const auto& group = port->groups[buffer.group_index];
auto* frame = renderer->PrepareFrame(group, buffer.address_left); auto* frame = renderer->PrepareFrame(group, buffer.address_left);
std::scoped_lock lock{mutex}; std::scoped_lock lock{mutex};
if (requests.size() >= 2) { if (requests.size() >= port->NumRegisteredBuffers()) {
LOG_ERROR(Lib_VideoOut, "Flip queue is full");
return false; return false;
} }
@ -215,6 +222,7 @@ bool VideoOutDriver::SubmitFlip(VideoOutPort* port, s32 index, s64 flip_arg) {
.index = index, .index = index,
.flip_arg = flip_arg, .flip_arg = flip_arg,
.submit_tsc = Libraries::Kernel::sceKernelReadTsc(), .submit_tsc = Libraries::Kernel::sceKernelReadTsc(),
.eop = is_eop,
}); });
port->flip_status.flipPendingNum = static_cast<int>(requests.size()); port->flip_status.flipPendingNum = static_cast<int>(requests.size());

View file

@ -19,6 +19,8 @@ struct VideoOutPort {
bool is_open = false; bool is_open = false;
SceVideoOutResolutionStatus resolution; SceVideoOutResolutionStatus resolution;
std::array<VideoOutBuffer, MaxDisplayBuffers> buffer_slots; std::array<VideoOutBuffer, MaxDisplayBuffers> buffer_slots;
std::array<uintptr_t, MaxDisplayBuffers> buffer_labels; // should be contiguous in memory
static_assert(sizeof(buffer_labels[0]) == 8u);
std::array<BufferAttributeGroup, MaxDisplayBufferGroups> groups; std::array<BufferAttributeGroup, MaxDisplayBufferGroups> groups;
FlipStatus flip_status; FlipStatus flip_status;
SceVideoOutVblankStatus vblank_status; SceVideoOutVblankStatus vblank_status;
@ -32,6 +34,11 @@ struct VideoOutPort {
} }
return index; return index;
} }
[[nodiscard]] int NumRegisteredBuffers() const {
return std::count_if(buffer_slots.cbegin(), buffer_slots.cend(),
[](auto& buffer) { return buffer.group_index != -1; });
}
}; };
struct ServiceThreadParams { struct ServiceThreadParams {
@ -57,7 +64,7 @@ public:
int UnregisterBuffers(VideoOutPort* port, s32 attributeIndex); int UnregisterBuffers(VideoOutPort* port, s32 attributeIndex);
void Flip(std::chrono::microseconds timeout); void Flip(std::chrono::microseconds timeout);
bool SubmitFlip(VideoOutPort* port, s32 index, s64 flip_arg); bool SubmitFlip(VideoOutPort* port, s32 index, s64 flip_arg, bool is_eop = false);
void Vblank(); void Vblank();
@ -68,6 +75,7 @@ private:
s32 index; s32 index;
s64 flip_arg; s64 flip_arg;
u64 submit_tsc; u64 submit_tsc;
bool eop;
}; };
std::mutex mutex; std::mutex mutex;

View file

@ -10,6 +10,7 @@
#include "core/libraries/videoout/driver.h" #include "core/libraries/videoout/driver.h"
#include "core/libraries/videoout/video_out.h" #include "core/libraries/videoout/video_out.h"
#include "core/loader/symbols_resolver.h" #include "core/loader/symbols_resolver.h"
#include "core/platform.h"
namespace Libraries::VideoOut { namespace Libraries::VideoOut {
@ -210,6 +211,27 @@ void Vblank() {
return driver->Vblank(); return driver->Vblank();
} }
void sceVideoOutGetBufferLabelAddress(s32 handle, uintptr_t* label_addr) {
auto* port = driver->GetPort(handle);
ASSERT(port);
*label_addr = reinterpret_cast<uintptr_t>(port->buffer_labels.data());
}
s32 sceVideoOutSubmitEopFlip(s32 handle, u32 buf_id, u32 mode, u32 arg, void** unk) {
auto* port = driver->GetPort(handle);
if (!port) {
return 0x8029000b;
}
Platform::IrqC::Instance()->RegisterOnce([=](Platform::InterruptId irq) {
ASSERT_MSG(irq == Platform::InterruptId::GfxEop, "An unexpected IRQ occured");
const auto result = driver->SubmitFlip(port, buf_id, arg, true);
ASSERT_MSG(result, "EOP flip submission failed");
});
return ORBIS_OK;
}
void RegisterLib(Core::Loader::SymbolsResolver* sym) { void RegisterLib(Core::Loader::SymbolsResolver* sym) {
driver = std::make_unique<VideoOutDriver>(Config::getScreenWidth(), Config::getScreenHeight()); driver = std::make_unique<VideoOutDriver>(Config::getScreenWidth(), Config::getScreenHeight());

View file

@ -102,6 +102,10 @@ s32 PS4_SYSV_ABI sceVideoOutClose(s32 handle);
void Flip(std::chrono::microseconds micros); void Flip(std::chrono::microseconds micros);
void Vblank(); void Vblank();
// Internal system functions
void sceVideoOutGetBufferLabelAddress(s32 handle, uintptr_t* label_addr);
s32 sceVideoOutSubmitEopFlip(s32 handle, u32 buf_id, u32 mode, u32 arg, void** unk);
void RegisterLib(Core::Loader::SymbolsResolver* sym); void RegisterLib(Core::Loader::SymbolsResolver* sym);
} // namespace Libraries::VideoOut } // namespace Libraries::VideoOut

76
src/core/platform.h Normal file
View file

@ -0,0 +1,76 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include "common/assert.h"
#include "common/logging/log.h"
#include "common/singleton.h"
#include "common/types.h"
#include "magic_enum.hpp"
#include <functional>
#include <mutex>
#include <optional>
#include <queue>
namespace Platform {
enum class InterruptId : u32 {
Compute0RelMem = 0u,
Compute1RelMem = 1u,
Compute2RelMem = 2u,
Compute3RelMem = 3u,
Compute4RelMem = 4u,
Compute5RelMem = 5u,
Compute6RelMem = 6u,
GfxEop = 0x40u
};
using IrqHandler = std::function<void(InterruptId)>;
struct IrqController {
void RegisterOnce(IrqHandler handler) {
std::unique_lock lock{m_lock};
one_time_subscribers.emplace(handler);
}
void Register(IrqHandler handler) {
ASSERT_MSG(!persistent_handler.has_value(),
"Too many persistent handlers"); // Add a slot map if so
std::unique_lock lock{m_lock};
persistent_handler.emplace(handler);
}
void Unregister() {
std::unique_lock lock{m_lock};
persistent_handler.reset();
}
void Signal(InterruptId irq) {
std::unique_lock lock{m_lock};
LOG_TRACE(Core, "IRQ signaled: {}", magic_enum::enum_name(irq));
if (persistent_handler) {
persistent_handler.value()(irq);
}
while (!one_time_subscribers.empty()) {
const auto& h = one_time_subscribers.front();
h(irq);
one_time_subscribers.pop();
}
}
private:
std::optional<IrqHandler> persistent_handler{};
std::queue<IrqHandler> one_time_subscribers{};
std::mutex m_lock{};
};
using IrqC = Common::Singleton<IrqController>;
} // namespace Platform

View file

@ -3,6 +3,7 @@
#include "common/assert.h" #include "common/assert.h"
#include "common/io_file.h" #include "common/io_file.h"
#include "common/thread.h"
#include "video_core/amdgpu/liverpool.h" #include "video_core/amdgpu/liverpool.h"
#include "video_core/amdgpu/pm4_cmds.h" #include "video_core/amdgpu/pm4_cmds.h"
@ -11,6 +12,8 @@ namespace AmdGpu {
Liverpool::Liverpool() = default; Liverpool::Liverpool() = default;
void Liverpool::ProcessCmdList(u32* cmdbuf, u32 size_in_bytes) { void Liverpool::ProcessCmdList(u32* cmdbuf, u32 size_in_bytes) {
Common::SetCurrentThreadName("CommandProcessor_Gfx");
auto* header = reinterpret_cast<PM4Header*>(cmdbuf); auto* header = reinterpret_cast<PM4Header*>(cmdbuf);
u32 processed_cmd_size = 0; u32 processed_cmd_size = 0;
@ -25,30 +28,30 @@ void Liverpool::ProcessCmdList(u32* cmdbuf, u32 size_in_bytes) {
case PM4ItOpcode::Nop: case PM4ItOpcode::Nop:
break; break;
case PM4ItOpcode::SetContextReg: { case PM4ItOpcode::SetContextReg: {
auto* set_data = reinterpret_cast<PM4CmdSetData*>(header); const auto* set_data = reinterpret_cast<PM4CmdSetData*>(header);
std::memcpy(&regs.reg_array[ContextRegWordOffset + set_data->reg_offset], std::memcpy(&regs.reg_array[ContextRegWordOffset + set_data->reg_offset],
header + 2, (count - 1) * sizeof(u32)); header + 2, (count - 1) * sizeof(u32));
break; break;
} }
case PM4ItOpcode::SetShReg: { case PM4ItOpcode::SetShReg: {
auto* set_data = reinterpret_cast<PM4CmdSetData*>(header); const auto* set_data = reinterpret_cast<PM4CmdSetData*>(header);
std::memcpy(&regs.reg_array[ShRegWordOffset + set_data->reg_offset], header + 2, std::memcpy(&regs.reg_array[ShRegWordOffset + set_data->reg_offset], header + 2,
(count - 1) * sizeof(u32)); (count - 1) * sizeof(u32));
break; break;
} }
case PM4ItOpcode::SetUconfigReg: { case PM4ItOpcode::SetUconfigReg: {
auto* set_data = reinterpret_cast<PM4CmdSetData*>(header); const auto* set_data = reinterpret_cast<PM4CmdSetData*>(header);
std::memcpy(&regs.reg_array[UconfigRegWordOffset + set_data->reg_offset], std::memcpy(&regs.reg_array[UconfigRegWordOffset + set_data->reg_offset],
header + 2, (count - 1) * sizeof(u32)); header + 2, (count - 1) * sizeof(u32));
break; break;
} }
case PM4ItOpcode::IndexType: { case PM4ItOpcode::IndexType: {
auto* index_type = reinterpret_cast<PM4CmdDrawIndexType*>(header); const auto* index_type = reinterpret_cast<PM4CmdDrawIndexType*>(header);
regs.index_buffer_type.raw = index_type->raw; regs.index_buffer_type.raw = index_type->raw;
break; break;
} }
case PM4ItOpcode::DrawIndex2: { case PM4ItOpcode::DrawIndex2: {
auto* draw_index = reinterpret_cast<PM4CmdDrawIndex2*>(header); const auto* draw_index = reinterpret_cast<PM4CmdDrawIndex2*>(header);
regs.max_index_size = draw_index->max_size; regs.max_index_size = draw_index->max_size;
regs.index_base_address.base_addr_lo = draw_index->index_base_lo; regs.index_base_address.base_addr_lo = draw_index->index_base_lo;
regs.index_base_address.base_addr_hi.Assign(draw_index->index_base_hi); regs.index_base_address.base_addr_hi.Assign(draw_index->index_base_hi);
@ -58,22 +61,52 @@ void Liverpool::ProcessCmdList(u32* cmdbuf, u32 size_in_bytes) {
break; break;
} }
case PM4ItOpcode::DrawIndexAuto: { case PM4ItOpcode::DrawIndexAuto: {
auto* draw_index = reinterpret_cast<PM4CmdDrawIndexAuto*>(header); const auto* draw_index = reinterpret_cast<PM4CmdDrawIndexAuto*>(header);
regs.num_indices = draw_index->index_count; regs.num_indices = draw_index->index_count;
regs.draw_initiator = draw_index->draw_initiator; regs.draw_initiator = draw_index->draw_initiator;
// rasterizer->DrawIndex(); // rasterizer->DrawIndex();
break; break;
} }
case PM4ItOpcode::DispatchDirect: {
// const auto* dispatch_direct = reinterpret_cast<PM4CmdDispatchDirect*>(header);
break;
}
case PM4ItOpcode::EventWriteEos: {
const auto* event_eos = reinterpret_cast<PM4CmdEventWriteEos*>(header);
event_eos->SignalFence();
break;
}
case PM4ItOpcode::EventWriteEop: { case PM4ItOpcode::EventWriteEop: {
auto* event_write = reinterpret_cast<PM4CmdEventWriteEop*>(header); const auto* event_eop = reinterpret_cast<PM4CmdEventWriteEop*>(header);
const InterruptSelect irq_sel = event_write->int_sel; event_eop->SignalFence();
const DataSelect data_sel = event_write->data_sel;
ASSERT(irq_sel == InterruptSelect::None && data_sel == DataSelect::Data64);
*event_write->Address() = event_write->DataQWord();
break; break;
} }
case PM4ItOpcode::DmaData: { case PM4ItOpcode::DmaData: {
auto* dma_data = reinterpret_cast<PM4DmaData*>(header); const auto* dma_data = reinterpret_cast<PM4DmaData*>(header);
break;
}
case PM4ItOpcode::WriteData: {
const auto* write_data = reinterpret_cast<PM4CmdWriteData*>(header);
ASSERT(write_data->dst_sel.Value() == 2 || write_data->dst_sel.Value() == 5);
const u32 data_size = (header->type3.count.Value() - 2) * 4;
if (!write_data->wr_one_addr.Value()) {
std::memcpy(write_data->Address<void*>(), write_data->data, data_size);
} else {
UNREACHABLE();
}
break;
}
case PM4ItOpcode::AcquireMem: {
// const auto* acquire_mem = reinterpret_cast<PM4CmdAcquireMem*>(header);
break;
}
case PM4ItOpcode::WaitRegMem: {
const auto* wait_reg_mem = reinterpret_cast<PM4CmdWaitRegMem*>(header);
ASSERT(wait_reg_mem->engine.Value() == PM4CmdWaitRegMem::Engine::Me);
while (!wait_reg_mem->Test()) {
using namespace std::chrono_literals;
std::this_thread::sleep_for(1ms);
}
break; break;
} }
default: default:

View file

@ -3,10 +3,15 @@
#pragma once #pragma once
#include <array> #include "common/assert.h"
#include "common/bit_field.h" #include "common/bit_field.h"
#include "common/types.h" #include "common/types.h"
#include <array>
#include <condition_variable>
#include <functional>
#include <future>
namespace AmdGpu { namespace AmdGpu {
#define GFX6_3D_REG_INDEX(field_name) (offsetof(AmdGpu::Liverpool::Regs, field_name) / sizeof(u32)) #define GFX6_3D_REG_INDEX(field_name) (offsetof(AmdGpu::Liverpool::Regs, field_name) / sizeof(u32))
@ -610,7 +615,20 @@ struct Liverpool {
public: public:
Liverpool(); Liverpool();
void Submit(u32* cmdbuf, u32 size_in_bytes) {
ASSERT_MSG(!cp.valid(), "Trying to submit while previous submission is pending");
cp = std::async(&Liverpool::ProcessCmdList, this, cmdbuf, size_in_bytes);
}
void SubmitDone() {
// This is wrong as `submitDone()` should never be blocking. The behavior will be
// reworked with mutiple queues introduction
cp.get();
}
private:
void ProcessCmdList(u32* cmdbuf, u32 size_in_bytes); void ProcessCmdList(u32* cmdbuf, u32 size_in_bytes);
std::future<void> cp{};
}; };
static_assert(GFX6_3D_REG_INDEX(ps_program) == 0x2C08); static_assert(GFX6_3D_REG_INDEX(ps_program) == 0x2C08);

View file

@ -5,7 +5,9 @@
#include <cstring> #include <cstring>
#include "common/bit_field.h" #include "common/bit_field.h"
#include "common/rdtsc.h"
#include "common/types.h" #include "common/types.h"
#include "core/platform.h"
#include "video_core/amdgpu/pm4_opcodes.h" #include "video_core/amdgpu/pm4_opcodes.h"
namespace AmdGpu { namespace AmdGpu {
@ -201,13 +203,18 @@ struct PM4CmdNop {
PM4Type3Header header; PM4Type3Header header;
u32 data_block[0]; u32 data_block[0];
enum class PayloadType : u32 { enum PayloadType : u32 {
DebugMarkerPush = 0x68750001, ///< Begin of GPU event scope DebugMarkerPush = 0x68750001u, ///< Begin of GPU event scope
DebugMarkerPop = 0x68750002, ///< End of GPU event scope DebugMarkerPop = 0x68750002u, ///< End of GPU event scope
SetVsharpInUdata = 0x68750004, ///< Indicates that V# will be set in the next packet SetVsharpInUdata = 0x68750004u, ///< Indicates that V# will be set in the next packet
SetTsharpInUdata = 0x68750005, ///< Indicates that T# will be set in the next packet SetTsharpInUdata = 0x68750005u, ///< Indicates that T# will be set in the next packet
SetSsharpInUdata = 0x68750006, ///< Indicates that S# will be set in the next packet SetSsharpInUdata = 0x68750006u, ///< Indicates that S# will be set in the next packet
DebugColorMarkerPush = 0x6875000e, ///< Begin of GPU event scope with color DebugColorMarkerPush = 0x6875000eu, ///< Begin of GPU event scope with color
PatchedFlip = 0x68750776u, ///< Patched flip marker
PrepareFlip = 0x68750777u, ///< Flip marker
PrepareFlipLabel = 0x68750778u, ///< Flip marker with label address
PrepareFlipInterrupt = 0x68750780u, ///< Flip marker with interrupt
PrepareFlipInterruptLabel = 0x68750781u, ///< Flip marker with interrupt and label
}; };
}; };
@ -277,13 +284,52 @@ struct PM4CmdEventWriteEop {
u32 data_lo; ///< Value that will be written to memory when event occurs u32 data_lo; ///< Value that will be written to memory when event occurs
u32 data_hi; ///< Value that will be written to memory when event occurs u32 data_hi; ///< Value that will be written to memory when event occurs
u64* Address() const { template <typename T>
return reinterpret_cast<u64*>(address_lo | u64(address_hi) << 32); T* Address() const {
return reinterpret_cast<T*>(address_lo | u64(address_hi) << 32);
}
u32 DataDWord() const {
return data_lo;
} }
u64 DataQWord() const { u64 DataQWord() const {
return data_lo | u64(data_hi) << 32; return data_lo | u64(data_hi) << 32;
} }
void SignalFence() const {
switch (data_sel.Value()) {
case DataSelect::Data32Low: {
*Address<u32>() = DataDWord();
break;
}
case DataSelect::Data64: {
*Address<u64>() = DataQWord();
break;
}
case DataSelect::PerfCounter: {
*Address<u64>() = Common::FencedRDTSC();
break;
}
default: {
UNREACHABLE();
}
}
switch (int_sel.Value()) {
case InterruptSelect::None: {
// No interrupt
break;
}
case InterruptSelect::IrqWhenWriteConfirm: {
Platform::IrqC::Instance()->Signal(Platform::InterruptId::GfxEop);
break;
}
default: {
UNREACHABLE();
}
}
}
}; };
struct PM4DmaData { struct PM4DmaData {
@ -311,11 +357,24 @@ struct PM4DmaData {
}; };
struct PM4CmdWaitRegMem { struct PM4CmdWaitRegMem {
enum class Engine : u32 { Me = 0u, Pfp = 1u };
enum class MemSpace : u32 { Register = 0u, Memory = 1u };
enum class Function : u32 {
Always = 0u,
LessThan = 1u,
LessThanEqual = 2u,
Equal = 3u,
NotEqual = 4u,
GreaterThanEqual = 5u,
GreaterThan = 6u,
Reserved = 7u
};
PM4Type3Header header; PM4Type3Header header;
union { union {
BitField<0, 3, u32> function; BitField<0, 3, Function> function;
BitField<4, 1, u32> mem_space; BitField<4, 1, MemSpace> mem_space;
BitField<8, 1, u32> engine; BitField<8, 1, Engine> engine;
u32 raw; u32 raw;
}; };
u32 poll_addr_lo; u32 poll_addr_lo;
@ -323,6 +382,116 @@ struct PM4CmdWaitRegMem {
u32 ref; u32 ref;
u32 mask; u32 mask;
u32 poll_interval; u32 poll_interval;
u32* Address() const {
return reinterpret_cast<u32*>((uintptr_t(poll_addr_hi) << 32) | poll_addr_lo);
}
bool Test() const {
switch (function.Value()) {
case Function::Always: {
return true;
}
case Function::LessThan: {
return (*Address() & mask) < ref;
}
case Function::LessThanEqual: {
return (*Address() & mask) <= ref;
}
case Function::Equal: {
return (*Address() & mask) == ref;
}
case Function::NotEqual: {
return (*Address() & mask) != ref;
}
case Function::GreaterThanEqual: {
return (*Address() & mask) >= ref;
}
case Function::GreaterThan: {
return (*Address() & mask) > ref;
}
case Function::Reserved:
[[fallthrough]];
default: {
UNREACHABLE();
}
}
}
};
struct PM4CmdWriteData {
PM4Type3Header header;
union {
BitField<8, 11, u32> dst_sel;
BitField<16, 1, u32> wr_one_addr;
BitField<20, 1, u32> wr_confirm;
BitField<30, 1, u32> engine_sel;
u32 raw;
};
union {
struct {
u32 dst_addr_lo;
u32 dst_addr_hi;
};
u64 addr64;
};
u32 data[0];
template <typename T>
void Address(T addr) {
addr64 = reinterpret_cast<u64>(addr);
}
template <typename T>
T* Address() const {
return reinterpret_cast<T*>(addr64);
}
};
struct PM4CmdEventWriteEos {
enum class Command : u32 {
GdsStore = 1u,
SingalFence = 2u,
};
PM4Type3Header header;
union {
u32 event_control;
BitField<0, 6, u32> event_type; ///< Event type written to VGT_EVENT_INITIATOR
BitField<8, 4, u32> event_index; ///< Event index
};
u32 address_lo;
union {
u32 cmd_info;
BitField<0, 16, u32> address_hi; ///< High bits of address
BitField<29, 3, Command> command; ///< Command
};
union {
u32 data; ///< Fence value that will be written to memory when event occurs
BitField<0, 16, u32>
gds_index; ///< Indexed offset from the start of the segment within the partition
BitField<16, 16, u32> size; ///< Number of DWs to read from the GDS
};
u32* Address() const {
return reinterpret_cast<u32*>(address_lo | u64(address_hi) << 32);
}
u32 DataDWord() const {
return this->data;
}
void SignalFence() const {
switch (command.Value()) {
case Command::SingalFence: {
*Address() = DataDWord();
break;
}
default: {
UNREACHABLE();
}
}
}
}; };
} // namespace AmdGpu } // namespace AmdGpu

View file

@ -49,7 +49,7 @@ enum class PM4ItOpcode : u32 {
PremableCntl = 0x4A, PremableCntl = 0x4A,
DmaData = 0x50, DmaData = 0x50,
ContextRegRmw = 0x51, ContextRegRmw = 0x51,
Unknown58 = 0x58, AcquireMem = 0x58,
LoadShReg = 0x5F, LoadShReg = 0x5F,
LoadConfigReg = 0x60, LoadConfigReg = 0x60,
LoadContextReg = 0x61, LoadContextReg = 0x61,