mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-01-04 06:06:00 +00:00
Merge pull request #128 from psucien/gnm_driver/basic_sync
gnm_driver: Gnm eventq and GPU flips
This commit is contained in:
commit
b326ce5f69
|
@ -6,6 +6,8 @@
|
||||||
#include "core/libraries/error_codes.h"
|
#include "core/libraries/error_codes.h"
|
||||||
#include "core/libraries/gnmdriver/gnmdriver.h"
|
#include "core/libraries/gnmdriver/gnmdriver.h"
|
||||||
#include "core/libraries/libs.h"
|
#include "core/libraries/libs.h"
|
||||||
|
#include "core/libraries/videoout/video_out.h"
|
||||||
|
#include "core/platform.h"
|
||||||
#include "video_core/amdgpu/liverpool.h"
|
#include "video_core/amdgpu/liverpool.h"
|
||||||
#include "video_core/amdgpu/pm4_cmds.h"
|
#include "video_core/amdgpu/pm4_cmds.h"
|
||||||
#include "video_core/renderer_vulkan/renderer_vulkan.h"
|
#include "video_core/renderer_vulkan/renderer_vulkan.h"
|
||||||
|
@ -26,12 +28,33 @@ template <u32 data_block_size>
|
||||||
static inline u32* WriteTrailingNop(u32* cmdbuf) {
|
static inline u32* WriteTrailingNop(u32* cmdbuf) {
|
||||||
auto* nop = reinterpret_cast<PM4CmdNop*>(cmdbuf);
|
auto* nop = reinterpret_cast<PM4CmdNop*>(cmdbuf);
|
||||||
nop->header = PM4Type3Header{PM4ItOpcode::Nop, data_block_size - 1};
|
nop->header = PM4Type3Header{PM4ItOpcode::Nop, data_block_size - 1};
|
||||||
nop->data_block[0] = 0; // only one out of `data_block_size` is initialized
|
nop->data_block[0] = 0u; // only one out of `data_block_size` is initialized
|
||||||
return cmdbuf + data_block_size + 1 /* header */;
|
return cmdbuf + data_block_size + 1 /* header */;
|
||||||
}
|
}
|
||||||
|
|
||||||
int PS4_SYSV_ABI sceGnmAddEqEvent() {
|
s32 PS4_SYSV_ABI sceGnmAddEqEvent(SceKernelEqueue eq, u64 id, void* udata) {
|
||||||
LOG_ERROR(Lib_GnmDriver, "(STUBBED) called");
|
LOG_TRACE(Lib_GnmDriver, "called");
|
||||||
|
ASSERT_MSG(id == SceKernelEvent::Type::GfxEop);
|
||||||
|
|
||||||
|
if (!eq) {
|
||||||
|
return ORBIS_KERNEL_ERROR_EBADF;
|
||||||
|
}
|
||||||
|
|
||||||
|
EqueueEvent kernel_event{};
|
||||||
|
kernel_event.event.ident = id;
|
||||||
|
kernel_event.event.filter = EVFILT_GRAPHICS_CORE;
|
||||||
|
kernel_event.event.flags = 1;
|
||||||
|
kernel_event.event.fflags = 0;
|
||||||
|
kernel_event.event.data = id;
|
||||||
|
kernel_event.event.udata = udata;
|
||||||
|
eq->addEvent(kernel_event);
|
||||||
|
|
||||||
|
Platform::IrqC::Instance()->Register([=](Platform::InterruptId irq) {
|
||||||
|
ASSERT_MSG(irq == Platform::InterruptId::GfxEop,
|
||||||
|
"An unexpected IRQ occured"); // We need to conver IRQ# to event id and do proper
|
||||||
|
// filtering in trigger function
|
||||||
|
eq->triggerEvent(SceKernelEvent::Type::GfxEop, EVFILT_GRAPHICS_CORE, nullptr);
|
||||||
|
});
|
||||||
return ORBIS_OK;
|
return ORBIS_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -63,7 +86,7 @@ s32 PS4_SYSV_ABI sceGnmComputeWaitOnAddress(u32* cmdbuf, u32 size, uintptr_t add
|
||||||
wait_reg_mem->poll_addr_hi = u32(addr >> 32u);
|
wait_reg_mem->poll_addr_hi = u32(addr >> 32u);
|
||||||
wait_reg_mem->ref = ref;
|
wait_reg_mem->ref = ref;
|
||||||
wait_reg_mem->mask = mask;
|
wait_reg_mem->mask = mask;
|
||||||
wait_reg_mem->poll_interval = 10;
|
wait_reg_mem->poll_interval = 10u;
|
||||||
|
|
||||||
WriteTrailingNop<2>(cmdbuf + 7);
|
WriteTrailingNop<2>(cmdbuf + 7);
|
||||||
return ORBIS_OK;
|
return ORBIS_OK;
|
||||||
|
@ -131,8 +154,17 @@ int PS4_SYSV_ABI sceGnmDebugHardwareStatus() {
|
||||||
return ORBIS_OK;
|
return ORBIS_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
int PS4_SYSV_ABI sceGnmDeleteEqEvent() {
|
s32 PS4_SYSV_ABI sceGnmDeleteEqEvent(SceKernelEqueue eq, u64 id) {
|
||||||
LOG_ERROR(Lib_GnmDriver, "(STUBBED) called");
|
LOG_TRACE(Lib_GnmDriver, "called");
|
||||||
|
ASSERT_MSG(id == SceKernelEvent::Type::GfxEop);
|
||||||
|
|
||||||
|
if (!eq) {
|
||||||
|
return ORBIS_KERNEL_ERROR_EBADF;
|
||||||
|
}
|
||||||
|
|
||||||
|
eq->removeEvent(id);
|
||||||
|
|
||||||
|
Platform::IrqC::Instance()->Unregister();
|
||||||
return ORBIS_OK;
|
return ORBIS_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -205,14 +237,14 @@ u32 PS4_SYSV_ABI sceGnmDispatchInitDefaultHardwareState(u32* cmdbuf, u32 size) {
|
||||||
0xffffffffu); // COMPUTE_STATIC_THREAD_MGMT_SE1
|
0xffffffffu); // COMPUTE_STATIC_THREAD_MGMT_SE1
|
||||||
cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 0x215u, 0x170u); // COMPUTE_RESOURCE_LIMITS
|
cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 0x215u, 0x170u); // COMPUTE_RESOURCE_LIMITS
|
||||||
|
|
||||||
cmdbuf = WriteHeader<PM4ItOpcode::Unknown58>(
|
cmdbuf = WriteHeader<PM4ItOpcode::AcquireMem>(
|
||||||
cmdbuf, 6); // for some reason the packet indicates larger size
|
cmdbuf, 6); // for some reason the packet indicates larger size
|
||||||
cmdbuf = WriteBody(cmdbuf, 0x28000000u, 0u, 0u, 0u, 0u);
|
cmdbuf = WriteBody(cmdbuf, 0x28000000u, 0u, 0u, 0u, 0u);
|
||||||
|
|
||||||
cmdbuf = WriteHeader<PM4ItOpcode::Nop>(cmdbuf, 0xef);
|
cmdbuf = WriteHeader<PM4ItOpcode::Nop>(cmdbuf, 0xef);
|
||||||
cmdbuf = WriteBody(cmdbuf, 0xau, 0u);
|
cmdbuf = WriteBody(cmdbuf, 0xau, 0u);
|
||||||
} else {
|
} else {
|
||||||
cmdbuf = cmdbuf = WriteHeader<PM4ItOpcode::Nop>(cmdbuf, 0x100);
|
cmdbuf = cmdbuf = WriteHeader<PM4ItOpcode::Nop>(cmdbuf, 0xff);
|
||||||
}
|
}
|
||||||
return 0x100; // it is a size, not a retcode
|
return 0x100; // it is a size, not a retcode
|
||||||
}
|
}
|
||||||
|
@ -321,7 +353,7 @@ u32 PS4_SYSV_ABI sceGnmDrawInitDefaultHardwareState200(u32* cmdbuf, u32 size) {
|
||||||
if constexpr (g_fair_hw_init) {
|
if constexpr (g_fair_hw_init) {
|
||||||
ASSERT_MSG(0, "Not implemented");
|
ASSERT_MSG(0, "Not implemented");
|
||||||
} else {
|
} else {
|
||||||
cmdbuf = cmdbuf = WriteHeader<PM4ItOpcode::Nop>(cmdbuf, 0x100);
|
cmdbuf = cmdbuf = WriteHeader<PM4ItOpcode::Nop>(cmdbuf, 0xff);
|
||||||
}
|
}
|
||||||
return 0x100; // it is a size, not a retcode
|
return 0x100; // it is a size, not a retcode
|
||||||
}
|
}
|
||||||
|
@ -335,7 +367,7 @@ u32 PS4_SYSV_ABI sceGnmDrawInitDefaultHardwareState350(u32* cmdbuf, u32 size) {
|
||||||
if constexpr (g_fair_hw_init) {
|
if constexpr (g_fair_hw_init) {
|
||||||
ASSERT_MSG(0, "Not implemented");
|
ASSERT_MSG(0, "Not implemented");
|
||||||
} else {
|
} else {
|
||||||
cmdbuf = cmdbuf = WriteHeader<PM4ItOpcode::Nop>(cmdbuf, 0x100);
|
cmdbuf = cmdbuf = WriteHeader<PM4ItOpcode::Nop>(cmdbuf, 0xff);
|
||||||
}
|
}
|
||||||
return 0x100; // it is a size, not a retcode
|
return 0x100; // it is a size, not a retcode
|
||||||
}
|
}
|
||||||
|
@ -565,9 +597,9 @@ s32 PS4_SYSV_ABI sceGnmInsertPopMarker(u32* cmdbuf, u32 size) {
|
||||||
LOG_TRACE(Lib_GnmDriver, "called");
|
LOG_TRACE(Lib_GnmDriver, "called");
|
||||||
|
|
||||||
if (cmdbuf && (size == 6)) {
|
if (cmdbuf && (size == 6)) {
|
||||||
cmdbuf = WritePacket<PM4ItOpcode::Nop>(
|
cmdbuf =
|
||||||
cmdbuf, PM4ShaderType::ShaderGraphics,
|
WritePacket<PM4ItOpcode::Nop>(cmdbuf, PM4ShaderType::ShaderGraphics,
|
||||||
static_cast<u32>(PM4CmdNop::PayloadType::DebugMarkerPop), 0u, 0u, 0u, 0u);
|
PM4CmdNop::PayloadType::DebugMarkerPop, 0u, 0u, 0u, 0u);
|
||||||
return ORBIS_OK;
|
return ORBIS_OK;
|
||||||
}
|
}
|
||||||
return -1;
|
return -1;
|
||||||
|
@ -588,7 +620,7 @@ s32 PS4_SYSV_ABI sceGnmInsertPushMarker(u32* cmdbuf, u32 size, const char* marke
|
||||||
auto* nop = reinterpret_cast<PM4CmdNop*>(cmdbuf);
|
auto* nop = reinterpret_cast<PM4CmdNop*>(cmdbuf);
|
||||||
nop->header =
|
nop->header =
|
||||||
PM4Type3Header{PM4ItOpcode::Nop, packet_size, PM4ShaderType::ShaderGraphics};
|
PM4Type3Header{PM4ItOpcode::Nop, packet_size, PM4ShaderType::ShaderGraphics};
|
||||||
nop->data_block[0] = static_cast<u32>(PM4CmdNop::PayloadType::DebugMarkerPush);
|
nop->data_block[0] = PM4CmdNop::PayloadType::DebugMarkerPush;
|
||||||
const auto marker_len = len + 1;
|
const auto marker_len = len + 1;
|
||||||
std::memcpy(&nop->data_block[1], marker, marker_len);
|
std::memcpy(&nop->data_block[1], marker, marker_len);
|
||||||
std::memset(reinterpret_cast<u8*>(&nop->data_block[1]) + marker_len, 0,
|
std::memset(reinterpret_cast<u8*>(&nop->data_block[1]) + marker_len, 0,
|
||||||
|
@ -614,8 +646,24 @@ int PS4_SYSV_ABI sceGnmInsertThreadTraceMarker() {
|
||||||
return ORBIS_OK;
|
return ORBIS_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
int PS4_SYSV_ABI sceGnmInsertWaitFlipDone() {
|
s32 PS4_SYSV_ABI sceGnmInsertWaitFlipDone(u32* cmdbuf, u32 size, s32 vo_handle, u32 buf_idx) {
|
||||||
LOG_ERROR(Lib_GnmDriver, "(STUBBED) called");
|
LOG_TRACE(Lib_GnmDriver, "called");
|
||||||
|
|
||||||
|
if (size != 7) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
uintptr_t label_addr{};
|
||||||
|
VideoOut::sceVideoOutGetBufferLabelAddress(vo_handle, &label_addr);
|
||||||
|
|
||||||
|
auto* wait_reg_mem = reinterpret_cast<PM4CmdWaitRegMem*>(cmdbuf);
|
||||||
|
wait_reg_mem->header = PM4Type3Header{PM4ItOpcode::WaitRegMem, 5};
|
||||||
|
wait_reg_mem->raw = 0x13u;
|
||||||
|
*reinterpret_cast<uintptr_t*>(&wait_reg_mem->poll_addr_lo) =
|
||||||
|
(label_addr + buf_idx * sizeof(uintptr_t)) & ~0x3ull;
|
||||||
|
wait_reg_mem->ref = 0u;
|
||||||
|
wait_reg_mem->mask = 0xffff'ffffu;
|
||||||
|
wait_reg_mem->poll_interval = 10u;
|
||||||
return ORBIS_OK;
|
return ORBIS_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -824,8 +872,48 @@ int PS4_SYSV_ABI sceGnmSetEmbeddedPsShader() {
|
||||||
return ORBIS_OK;
|
return ORBIS_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
int PS4_SYSV_ABI sceGnmSetEmbeddedVsShader() {
|
s32 PS4_SYSV_ABI sceGnmSetEmbeddedVsShader(u32* cmdbuf, u32 size, u32 shader_id, u32 modifier) {
|
||||||
LOG_ERROR(Lib_GnmDriver, "(STUBBED) called");
|
LOG_TRACE(Lib_GnmDriver, "called");
|
||||||
|
|
||||||
|
// A fullscreen triangle with one uv set
|
||||||
|
const static u32 shader_code[] = {
|
||||||
|
0xbeeb03ffu, 00000007u, // s_mov_b32 vcc_hi, $0x00000007
|
||||||
|
0x36020081u, // v_and_b32 v1, 1, v0
|
||||||
|
0x34020281u, // v_lshlrev_b32 v1, 1, v1
|
||||||
|
0x360000c2u, // v_and_b32 v0, -2, v0
|
||||||
|
0x4a0202c1u, // v_add_i32 v1, vcc, -1, v1
|
||||||
|
0x4a0000c1u, // v_add_i32 v0, vcc, -1, v0
|
||||||
|
0x7e020b01u, // v_cvt_f32_i32 v1, v1
|
||||||
|
0x7e040280u, // v_cvt_f32_i32 v0, v0
|
||||||
|
0x7e0602f2u, // v_mov_b32 v3, 1.0
|
||||||
|
0xf80008cfu, 0x03020001u, // exp pos0, v1, v0, v2, v3 done
|
||||||
|
0xf800020fu, 0x03030303u, // exp param0, v3, v3, v3, v3
|
||||||
|
0xbf810000u, // s_endpgm
|
||||||
|
|
||||||
|
// OrbShdr header
|
||||||
|
0x5362724fu, 0x07726468u, 0x00004047u, 0u, 0x47f8c29fu, 0x9b2da5cfu, 0xff7c5b7du,
|
||||||
|
0x00000017u, 0x0fe000f1u, 0u, 0x000c0000u, 4u, 0u, 4u, 0u, 7u};
|
||||||
|
|
||||||
|
const auto shader_addr = uintptr_t(&shader_code); // Original address is 0xfe000f10
|
||||||
|
const static u32 vs_regs[] = {
|
||||||
|
u32(shader_addr >> 8), u32(shader_addr >> 40), 0xc0000u, 4, 0, 4, 0, 7};
|
||||||
|
|
||||||
|
if (shader_id != 0) {
|
||||||
|
return 0x8eee00ff;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Normally the driver will do a call to `sceGnmSetVsShader()`, but this function has
|
||||||
|
// a check for zero in the upper part of shader address. In our case, the address is a
|
||||||
|
// pointer to a stack memory, so the check will likely fail. To workaround it we will
|
||||||
|
// repeat set shader functionality here as it is trivial.
|
||||||
|
cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 0x48u, vs_regs[0], 0u); // SPI_SHADER_PGM_LO_VS
|
||||||
|
cmdbuf =
|
||||||
|
PM4CmdSetData::SetShReg(cmdbuf, 0x4au, vs_regs[2], vs_regs[3]); // SPI_SHADER_PGM_RSRC1_VS
|
||||||
|
cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x207u, vs_regs[6]); // PA_CL_VS_OUT_CNTL
|
||||||
|
cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x1b1u, vs_regs[4]); // SPI_VS_OUT_CONFIG
|
||||||
|
cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x1c3u, vs_regs[5]); // SPI_SHADER_POS_FORMAT
|
||||||
|
|
||||||
|
WriteTrailingNop<11>(cmdbuf);
|
||||||
return ORBIS_OK;
|
return ORBIS_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -960,6 +1048,8 @@ int PS4_SYSV_ABI sceGnmSetVgtControl() {
|
||||||
}
|
}
|
||||||
|
|
||||||
s32 PS4_SYSV_ABI sceGnmSetVsShader(u32* cmdbuf, u32 size, const u32* vs_regs, u32 shader_modifier) {
|
s32 PS4_SYSV_ABI sceGnmSetVsShader(u32* cmdbuf, u32 size, const u32* vs_regs, u32 shader_modifier) {
|
||||||
|
LOG_TRACE(Lib_GnmDriver, "called");
|
||||||
|
|
||||||
if (!cmdbuf || size <= 0x1c) {
|
if (!cmdbuf || size <= 0x1c) {
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
@ -987,7 +1077,6 @@ s32 PS4_SYSV_ABI sceGnmSetVsShader(u32* cmdbuf, u32 size, const u32* vs_regs, u3
|
||||||
cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x1c3u, vs_regs[5]); // SPI_SHADER_POS_FORMAT
|
cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x1c3u, vs_regs[5]); // SPI_SHADER_POS_FORMAT
|
||||||
|
|
||||||
WriteTrailingNop<11>(cmdbuf);
|
WriteTrailingNop<11>(cmdbuf);
|
||||||
|
|
||||||
return ORBIS_OK;
|
return ORBIS_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1186,44 +1275,142 @@ int PS4_SYSV_ABI sceGnmSqttWaitForEvent() {
|
||||||
return ORBIS_OK;
|
return ORBIS_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
int PS4_SYSV_ABI sceGnmSubmitAndFlipCommandBuffers() {
|
static inline s32 PatchFlipRequest(u32* cmdbuf, u32 size, u32 vo_handle, u32 buf_idx, u32 flip_mode,
|
||||||
LOG_ERROR(Lib_GnmDriver, "(STUBBED) called");
|
u32 flip_arg, void* unk) {
|
||||||
|
// check for `prepareFlip` packet
|
||||||
|
cmdbuf += size - 64;
|
||||||
|
ASSERT_MSG(cmdbuf[0] == 0xc03e1000, "Can't find `prepareFlip` packet");
|
||||||
|
|
||||||
|
std::array<u32, 7> backup{};
|
||||||
|
std::memcpy(backup.data(), cmdbuf, backup.size() * sizeof(decltype(backup)::value_type));
|
||||||
|
|
||||||
|
ASSERT_MSG(((backup[2] & 3) == 0u) || (backup[1] != PM4CmdNop::PayloadType::PrepareFlipLabel),
|
||||||
|
"Invalid flip packet");
|
||||||
|
ASSERT_MSG(buf_idx != 0xffff'ffffu, "Invalid VO buffer index");
|
||||||
|
|
||||||
|
const s32 flip_result = VideoOut::sceVideoOutSubmitEopFlip(vo_handle, buf_idx, flip_mode,
|
||||||
|
flip_arg, nullptr /*unk*/);
|
||||||
|
if (flip_result != 0) {
|
||||||
|
if (flip_result == 0x80290012) {
|
||||||
|
LOG_ERROR(Lib_GnmDriver, "Flip queue is full");
|
||||||
|
return 0x80d11081;
|
||||||
|
} else {
|
||||||
|
LOG_ERROR(Lib_GnmDriver, "Flip request failed");
|
||||||
|
return flip_result;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
uintptr_t label_addr{};
|
||||||
|
VideoOut::sceVideoOutGetBufferLabelAddress(vo_handle, &label_addr);
|
||||||
|
|
||||||
|
// Write event to lock the VO surface
|
||||||
|
auto* write_lock = reinterpret_cast<PM4CmdWriteData*>(cmdbuf);
|
||||||
|
write_lock->header = PM4Type3Header{PM4ItOpcode::WriteData, 3};
|
||||||
|
write_lock->raw = 0x500u;
|
||||||
|
const auto addr = (label_addr + buf_idx * sizeof(label_addr)) & ~0x3ull;
|
||||||
|
write_lock->Address<uintptr_t>(addr);
|
||||||
|
write_lock->data[0] = 1;
|
||||||
|
|
||||||
|
auto* nop = reinterpret_cast<PM4CmdNop*>(cmdbuf + 5);
|
||||||
|
|
||||||
|
if (backup[1] == PM4CmdNop::PayloadType::PrepareFlip) {
|
||||||
|
nop->header = PM4Type3Header{PM4ItOpcode::Nop, 0x39};
|
||||||
|
nop->data_block[0] = PM4CmdNop::PayloadType::PatchedFlip;
|
||||||
|
} else {
|
||||||
|
if (backup[1] == PM4CmdNop::PayloadType::PrepareFlipLabel) {
|
||||||
|
nop->header = PM4Type3Header{PM4ItOpcode::Nop, 0x34};
|
||||||
|
nop->data_block[0] = PM4CmdNop::PayloadType::PatchedFlip;
|
||||||
|
|
||||||
|
// Write event to update label
|
||||||
|
auto* write_label = reinterpret_cast<PM4CmdWriteData*>(cmdbuf + 0x3b);
|
||||||
|
write_label->header = PM4Type3Header{PM4ItOpcode::WriteData, 3};
|
||||||
|
write_label->raw = 0x500u;
|
||||||
|
write_label->dst_addr_lo = backup[2] & 0xffff'fffcu;
|
||||||
|
write_label->dst_addr_hi = backup[3];
|
||||||
|
write_label->data[0] = backup[4];
|
||||||
|
}
|
||||||
|
if (backup[1] == PM4CmdNop::PayloadType::PrepareFlipInterruptLabel) {
|
||||||
|
nop->header = PM4Type3Header{PM4ItOpcode::Nop, 0x33};
|
||||||
|
nop->data_block[0] = PM4CmdNop::PayloadType::PatchedFlip;
|
||||||
|
|
||||||
|
auto* write_eop = reinterpret_cast<PM4CmdEventWriteEop*>(cmdbuf + 0x3a);
|
||||||
|
write_eop->header = PM4Type3Header{PM4ItOpcode::EventWriteEop, 4};
|
||||||
|
write_eop->event_control = (backup[5] & 0x3f) + 0x500u + (backup[6] & 0x3f) * 0x1000;
|
||||||
|
write_eop->address_lo = backup[2] & 0xffff'fffcu;
|
||||||
|
write_eop->data_control = (backup[3] & 0xffffu) | 0x2200'0000u;
|
||||||
|
write_eop->data_lo = backup[4];
|
||||||
|
write_eop->data_hi = 0u;
|
||||||
|
}
|
||||||
|
if (backup[1] == PM4CmdNop::PayloadType::PrepareFlipInterrupt) {
|
||||||
|
nop->header = PM4Type3Header{PM4ItOpcode::Nop, 0x33};
|
||||||
|
nop->data_block[0] = PM4CmdNop::PayloadType::PatchedFlip;
|
||||||
|
|
||||||
|
auto* write_eop = reinterpret_cast<PM4CmdEventWriteEop*>(cmdbuf + 0x3a);
|
||||||
|
write_eop->header = PM4Type3Header{PM4ItOpcode::EventWriteEop, 4};
|
||||||
|
write_eop->event_control = (backup[5] & 0x3f) + 0x500u + (backup[6] & 0x3f) * 0x1000;
|
||||||
|
write_eop->address_lo = 0u;
|
||||||
|
write_eop->data_control = 0x100'0000u;
|
||||||
|
write_eop->data_lo = 0u;
|
||||||
|
write_eop->data_hi = 0u;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return ORBIS_OK;
|
return ORBIS_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
s32 PS4_SYSV_ABI sceGnmSubmitAndFlipCommandBuffers(u32 count, void* dcb_gpu_addrs[],
|
||||||
|
u32* dcb_sizes_in_bytes, void* ccb_gpu_addrs[],
|
||||||
|
u32* ccb_sizes_in_bytes, u32 vo_handle,
|
||||||
|
u32 buf_idx, u32 flip_mode, u32 flip_arg) {
|
||||||
|
LOG_INFO(Lib_GnmDriver, "called [buf = {}]", buf_idx);
|
||||||
|
|
||||||
|
auto* cmdbuf = reinterpret_cast<u32*>(dcb_gpu_addrs[count - 1]);
|
||||||
|
const auto size_dw = dcb_sizes_in_bytes[count - 1] / 4;
|
||||||
|
|
||||||
|
const s32 patch_result =
|
||||||
|
PatchFlipRequest(cmdbuf, size_dw, vo_handle, buf_idx, flip_mode, flip_arg, nullptr /*unk*/);
|
||||||
|
if (patch_result != ORBIS_OK) {
|
||||||
|
return patch_result;
|
||||||
|
}
|
||||||
|
|
||||||
|
return sceGnmSubmitCommandBuffers(count, dcb_gpu_addrs, dcb_sizes_in_bytes, ccb_gpu_addrs,
|
||||||
|
ccb_sizes_in_bytes);
|
||||||
|
}
|
||||||
|
|
||||||
int PS4_SYSV_ABI sceGnmSubmitAndFlipCommandBuffersForWorkload() {
|
int PS4_SYSV_ABI sceGnmSubmitAndFlipCommandBuffersForWorkload() {
|
||||||
LOG_ERROR(Lib_GnmDriver, "(STUBBED) called");
|
LOG_ERROR(Lib_GnmDriver, "(STUBBED) called");
|
||||||
return ORBIS_OK;
|
return ORBIS_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
int PS4_SYSV_ABI sceGnmSubmitCommandBuffers(u32 count, void* dcbGpuAddrs[], u32* dcbSizesInBytes,
|
s32 PS4_SYSV_ABI sceGnmSubmitCommandBuffers(u32 count, void* dcb_gpu_addrs[],
|
||||||
void* ccbGpuAddrs[], u32* ccbSizesInBytes) {
|
u32* dcb_sizes_in_bytes, void* ccb_gpu_addrs[],
|
||||||
|
u32* ccb_sizes_in_bytes) {
|
||||||
LOG_INFO(Lib_GnmDriver, "called");
|
LOG_INFO(Lib_GnmDriver, "called");
|
||||||
ASSERT_MSG(count == 1, "Multiple command buffer submission is unsupported!");
|
ASSERT_MSG(count == 1, "Multiple command buffer submission is unsupported!");
|
||||||
|
|
||||||
if (!dcbGpuAddrs || !dcbSizesInBytes) {
|
if (!dcb_gpu_addrs || !dcb_sizes_in_bytes) {
|
||||||
LOG_ERROR(Lib_GnmDriver, "dcbGpuAddrs and dcbSizesInBytes must not be NULL");
|
LOG_ERROR(Lib_GnmDriver, "dcbGpuAddrs and dcbSizesInBytes must not be NULL");
|
||||||
return 0x80d11000;
|
return 0x80d11000;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (u32 i = 0; i < count; i++) {
|
for (u32 i = 0; i < count; i++) {
|
||||||
if (dcbSizesInBytes[i] == 0) {
|
if (dcb_sizes_in_bytes[i] == 0) {
|
||||||
LOG_ERROR(Lib_GnmDriver, "Submitting a null DCB {}", i);
|
LOG_ERROR(Lib_GnmDriver, "Submitting a null DCB {}", i);
|
||||||
return 0x80d11000;
|
return 0x80d11000;
|
||||||
}
|
}
|
||||||
if (dcbSizesInBytes[i] > 0x3ffffc) {
|
if (dcb_sizes_in_bytes[i] > 0x3ffffc) {
|
||||||
LOG_ERROR(Lib_GnmDriver, "dcbSizesInBytes[{}] ({}) is limited to (2*20)-1 DWORDS", i,
|
LOG_ERROR(Lib_GnmDriver, "dcbSizesInBytes[{}] ({}) is limited to (2*20)-1 DWORDS", i,
|
||||||
dcbSizesInBytes[i]);
|
dcb_sizes_in_bytes[i]);
|
||||||
return 0x80d11000;
|
return 0x80d11000;
|
||||||
}
|
}
|
||||||
if (ccbSizesInBytes && ccbSizesInBytes[i] > 0x3ffffc) {
|
if (ccb_sizes_in_bytes && ccb_sizes_in_bytes[i] > 0x3ffffc) {
|
||||||
LOG_ERROR(Lib_GnmDriver, "ccbSizesInBytes[{}] ({}) is limited to (2*20)-1 DWORDS", i,
|
LOG_ERROR(Lib_GnmDriver, "ccbSizesInBytes[{}] ({}) is limited to (2*20)-1 DWORDS", i,
|
||||||
ccbSizesInBytes[i]);
|
ccb_sizes_in_bytes[i]);
|
||||||
return 0x80d11000;
|
return 0x80d11000;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
liverpool->ProcessCmdList(reinterpret_cast<u32*>(dcbGpuAddrs[0]), dcbSizesInBytes[0]);
|
liverpool->Submit(reinterpret_cast<u32*>(dcb_gpu_addrs[0]), dcb_sizes_in_bytes[0]);
|
||||||
|
|
||||||
return ORBIS_OK;
|
return ORBIS_OK;
|
||||||
}
|
}
|
||||||
|
@ -1234,7 +1421,10 @@ int PS4_SYSV_ABI sceGnmSubmitCommandBuffersForWorkload() {
|
||||||
}
|
}
|
||||||
|
|
||||||
int PS4_SYSV_ABI sceGnmSubmitDone() {
|
int PS4_SYSV_ABI sceGnmSubmitDone() {
|
||||||
LOG_ERROR(Lib_GnmDriver, "(STUBBED) called");
|
LOG_INFO(Lib_GnmDriver, "called");
|
||||||
|
|
||||||
|
liverpool->SubmitDone();
|
||||||
|
|
||||||
return ORBIS_OK;
|
return ORBIS_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -4,6 +4,7 @@
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include "common/types.h"
|
#include "common/types.h"
|
||||||
|
#include "core/libraries/kernel/event_queues.h"
|
||||||
|
|
||||||
namespace Core::Loader {
|
namespace Core::Loader {
|
||||||
class SymbolsResolver;
|
class SymbolsResolver;
|
||||||
|
@ -11,7 +12,9 @@ class SymbolsResolver;
|
||||||
|
|
||||||
namespace Libraries::GnmDriver {
|
namespace Libraries::GnmDriver {
|
||||||
|
|
||||||
int PS4_SYSV_ABI sceGnmAddEqEvent();
|
using namespace Kernel;
|
||||||
|
|
||||||
|
s32 PS4_SYSV_ABI sceGnmAddEqEvent(SceKernelEqueue eq, u64 id, void* udata);
|
||||||
int PS4_SYSV_ABI sceGnmAreSubmitsAllowed();
|
int PS4_SYSV_ABI sceGnmAreSubmitsAllowed();
|
||||||
int PS4_SYSV_ABI sceGnmBeginWorkload();
|
int PS4_SYSV_ABI sceGnmBeginWorkload();
|
||||||
s32 PS4_SYSV_ABI sceGnmComputeWaitOnAddress(u32* cmdbuf, u32 size, uintptr_t addr, u32 mask,
|
s32 PS4_SYSV_ABI sceGnmComputeWaitOnAddress(u32* cmdbuf, u32 size, uintptr_t addr, u32 mask,
|
||||||
|
@ -28,7 +31,7 @@ int PS4_SYSV_ABI sceGnmDebuggerSetAddressWatch();
|
||||||
int PS4_SYSV_ABI sceGnmDebuggerWriteGds();
|
int PS4_SYSV_ABI sceGnmDebuggerWriteGds();
|
||||||
int PS4_SYSV_ABI sceGnmDebuggerWriteSqIndirectRegister();
|
int PS4_SYSV_ABI sceGnmDebuggerWriteSqIndirectRegister();
|
||||||
int PS4_SYSV_ABI sceGnmDebugHardwareStatus();
|
int PS4_SYSV_ABI sceGnmDebugHardwareStatus();
|
||||||
int PS4_SYSV_ABI sceGnmDeleteEqEvent();
|
s32 PS4_SYSV_ABI sceGnmDeleteEqEvent(SceKernelEqueue eq, u64 id);
|
||||||
int PS4_SYSV_ABI sceGnmDestroyWorkloadStream();
|
int PS4_SYSV_ABI sceGnmDestroyWorkloadStream();
|
||||||
int PS4_SYSV_ABI sceGnmDingDong();
|
int PS4_SYSV_ABI sceGnmDingDong();
|
||||||
int PS4_SYSV_ABI sceGnmDingDongForWorkload();
|
int PS4_SYSV_ABI sceGnmDingDongForWorkload();
|
||||||
|
@ -104,7 +107,7 @@ s32 PS4_SYSV_ABI sceGnmInsertPushMarker(u32* cmdbuf, u32 size, const char* marke
|
||||||
int PS4_SYSV_ABI sceGnmInsertSetColorMarker();
|
int PS4_SYSV_ABI sceGnmInsertSetColorMarker();
|
||||||
int PS4_SYSV_ABI sceGnmInsertSetMarker();
|
int PS4_SYSV_ABI sceGnmInsertSetMarker();
|
||||||
int PS4_SYSV_ABI sceGnmInsertThreadTraceMarker();
|
int PS4_SYSV_ABI sceGnmInsertThreadTraceMarker();
|
||||||
int PS4_SYSV_ABI sceGnmInsertWaitFlipDone();
|
s32 PS4_SYSV_ABI sceGnmInsertWaitFlipDone(u32* cmdbuf, u32 size, s32 vo_handle, u32 buf_idx);
|
||||||
int PS4_SYSV_ABI sceGnmIsCoredumpValid();
|
int PS4_SYSV_ABI sceGnmIsCoredumpValid();
|
||||||
int PS4_SYSV_ABI sceGnmIsUserPaEnabled();
|
int PS4_SYSV_ABI sceGnmIsUserPaEnabled();
|
||||||
int PS4_SYSV_ABI sceGnmLogicalCuIndexToPhysicalCuIndex();
|
int PS4_SYSV_ABI sceGnmLogicalCuIndexToPhysicalCuIndex();
|
||||||
|
@ -137,7 +140,7 @@ s32 PS4_SYSV_ABI sceGnmSetCsShader(u32* cmdbuf, u32 size, const u32* cs_regs);
|
||||||
s32 PS4_SYSV_ABI sceGnmSetCsShaderWithModifier(u32* cmdbuf, u32 size, const u32* cs_regs,
|
s32 PS4_SYSV_ABI sceGnmSetCsShaderWithModifier(u32* cmdbuf, u32 size, const u32* cs_regs,
|
||||||
u32 modifier);
|
u32 modifier);
|
||||||
int PS4_SYSV_ABI sceGnmSetEmbeddedPsShader();
|
int PS4_SYSV_ABI sceGnmSetEmbeddedPsShader();
|
||||||
int PS4_SYSV_ABI sceGnmSetEmbeddedVsShader();
|
s32 PS4_SYSV_ABI sceGnmSetEmbeddedVsShader(u32* cmdbuf, u32 size, u32 shader_id, u32 modifier);
|
||||||
int PS4_SYSV_ABI sceGnmSetEsShader();
|
int PS4_SYSV_ABI sceGnmSetEsShader();
|
||||||
int PS4_SYSV_ABI sceGnmSetGsRingSizes();
|
int PS4_SYSV_ABI sceGnmSetGsRingSizes();
|
||||||
int PS4_SYSV_ABI sceGnmSetGsShader();
|
int PS4_SYSV_ABI sceGnmSetGsShader();
|
||||||
|
@ -191,9 +194,12 @@ int PS4_SYSV_ABI sceGnmSqttStopTrace();
|
||||||
int PS4_SYSV_ABI sceGnmSqttSwitchTraceBuffer();
|
int PS4_SYSV_ABI sceGnmSqttSwitchTraceBuffer();
|
||||||
int PS4_SYSV_ABI sceGnmSqttSwitchTraceBuffer2();
|
int PS4_SYSV_ABI sceGnmSqttSwitchTraceBuffer2();
|
||||||
int PS4_SYSV_ABI sceGnmSqttWaitForEvent();
|
int PS4_SYSV_ABI sceGnmSqttWaitForEvent();
|
||||||
int PS4_SYSV_ABI sceGnmSubmitAndFlipCommandBuffers();
|
s32 PS4_SYSV_ABI sceGnmSubmitAndFlipCommandBuffers(u32 count, void* dcb_gpu_addrs[],
|
||||||
|
u32* dcb_sizes_in_bytes, void* ccb_gpu_addrs[],
|
||||||
|
u32* ccb_sizes_in_bytes, u32 vo_handle,
|
||||||
|
u32 buf_idx, u32 flip_mode, u32 flip_arg);
|
||||||
int PS4_SYSV_ABI sceGnmSubmitAndFlipCommandBuffersForWorkload();
|
int PS4_SYSV_ABI sceGnmSubmitAndFlipCommandBuffersForWorkload();
|
||||||
int PS4_SYSV_ABI sceGnmSubmitCommandBuffers(u32 count, void* dcb_gpu_addrs[],
|
s32 PS4_SYSV_ABI sceGnmSubmitCommandBuffers(u32 count, void* dcb_gpu_addrs[],
|
||||||
u32* dcb_sizes_in_bytes, void* ccb_gpu_addrs[],
|
u32* dcb_sizes_in_bytes, void* ccb_gpu_addrs[],
|
||||||
u32* ccb_sizes_in_bytes);
|
u32* ccb_sizes_in_bytes);
|
||||||
int PS4_SYSV_ABI sceGnmSubmitCommandBuffersForWorkload();
|
int PS4_SYSV_ABI sceGnmSubmitCommandBuffersForWorkload();
|
||||||
|
|
|
@ -20,6 +20,14 @@ int EqueueInternal::addEvent(const EqueueEvent& event) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int EqueueInternal::removeEvent(u64 id) {
|
||||||
|
const auto& event_q =
|
||||||
|
std::ranges::find_if(m_events, [id](auto& ev) { return ev.event.ident == id; });
|
||||||
|
ASSERT(event_q != m_events.cend());
|
||||||
|
m_events.erase(event_q);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
int EqueueInternal::waitForEvents(SceKernelEvent* ev, int num, u32 micros) {
|
int EqueueInternal::waitForEvents(SceKernelEvent* ev, int num, u32 micros) {
|
||||||
std::unique_lock lock{m_mutex};
|
std::unique_lock lock{m_mutex};
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
|
|
@ -42,11 +42,22 @@ using ResetFunc = void (*)(EqueueEvent* event);
|
||||||
using DeleteFunc = void (*)(EqueueInternal* eq, EqueueEvent* event);
|
using DeleteFunc = void (*)(EqueueInternal* eq, EqueueEvent* event);
|
||||||
|
|
||||||
struct SceKernelEvent {
|
struct SceKernelEvent {
|
||||||
|
enum Type : u64 {
|
||||||
|
Compute0RelMem = 0x00,
|
||||||
|
Compute1RelMem = 0x01,
|
||||||
|
Compute2RelMem = 0x02,
|
||||||
|
Compute3RelMem = 0x03,
|
||||||
|
Compute4RelMem = 0x04,
|
||||||
|
Compute5RelMem = 0x05,
|
||||||
|
Compute6RelMem = 0x06,
|
||||||
|
GfxEop = 0x40
|
||||||
|
};
|
||||||
|
|
||||||
u64 ident = 0; /* identifier for this event */
|
u64 ident = 0; /* identifier for this event */
|
||||||
s16 filter = 0; /* filter for event */
|
s16 filter = 0; /* filter for event */
|
||||||
u16 flags = 0;
|
u16 flags = 0;
|
||||||
u32 fflags = 0;
|
u32 fflags = 0;
|
||||||
s64 data = 0;
|
u64 data = 0;
|
||||||
void* udata = nullptr; /* opaque user data identifier */
|
void* udata = nullptr; /* opaque user data identifier */
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -80,6 +91,7 @@ public:
|
||||||
this->m_name = m_name;
|
this->m_name = m_name;
|
||||||
}
|
}
|
||||||
int addEvent(const EqueueEvent& event);
|
int addEvent(const EqueueEvent& event);
|
||||||
|
int removeEvent(u64 id);
|
||||||
int waitForEvents(SceKernelEvent* ev, int num, u32 micros);
|
int waitForEvents(SceKernelEvent* ev, int num, u32 micros);
|
||||||
bool triggerEvent(u64 ident, s16 filter, void* trigger_data);
|
bool triggerEvent(u64 ident, s16 filter, void* trigger_data);
|
||||||
int getTriggeredEvents(SceKernelEvent* ev, int num);
|
int getTriggeredEvents(SceKernelEvent* ev, int num);
|
||||||
|
|
|
@ -11,29 +11,34 @@ namespace Libraries::Kernel {
|
||||||
int PS4_SYSV_ABI sceKernelCreateEqueue(SceKernelEqueue* eq, const char* name) {
|
int PS4_SYSV_ABI sceKernelCreateEqueue(SceKernelEqueue* eq, const char* name) {
|
||||||
if (eq == nullptr) {
|
if (eq == nullptr) {
|
||||||
LOG_ERROR(Kernel_Event, "Event queue is null!");
|
LOG_ERROR(Kernel_Event, "Event queue is null!");
|
||||||
return SCE_KERNEL_ERROR_EINVAL;
|
return ORBIS_KERNEL_ERROR_EINVAL;
|
||||||
}
|
}
|
||||||
if (name == nullptr) {
|
if (name == nullptr) {
|
||||||
LOG_ERROR(Kernel_Event, "Event queue name is invalid!");
|
|
||||||
return SCE_KERNEL_ERROR_EFAULT;
|
|
||||||
}
|
|
||||||
if (name == NULL) {
|
|
||||||
LOG_ERROR(Kernel_Event, "Event queue name is null!");
|
LOG_ERROR(Kernel_Event, "Event queue name is null!");
|
||||||
return SCE_KERNEL_ERROR_EINVAL;
|
return ORBIS_KERNEL_ERROR_EINVAL;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Maximum is 32 including null terminator
|
// Maximum is 32 including null terminator
|
||||||
static constexpr size_t MaxEventQueueNameSize = 32;
|
static constexpr size_t MaxEventQueueNameSize = 32;
|
||||||
if (std::strlen(name) > MaxEventQueueNameSize) {
|
if (std::strlen(name) > MaxEventQueueNameSize) {
|
||||||
LOG_ERROR(Kernel_Event, "Event queue name exceeds 32 bytes!");
|
LOG_ERROR(Kernel_Event, "Event queue name exceeds 32 bytes!");
|
||||||
return SCE_KERNEL_ERROR_ENAMETOOLONG;
|
return ORBIS_KERNEL_ERROR_ENAMETOOLONG;
|
||||||
}
|
}
|
||||||
|
|
||||||
LOG_INFO(Kernel_Event, "name = {}", name);
|
LOG_INFO(Kernel_Event, "name = {}", name);
|
||||||
|
|
||||||
*eq = new EqueueInternal;
|
*eq = new EqueueInternal;
|
||||||
(*eq)->setName(std::string(name));
|
(*eq)->setName(std::string(name));
|
||||||
return SCE_OK;
|
return ORBIS_OK;
|
||||||
|
}
|
||||||
|
|
||||||
|
int PS4_SYSV_ABI sceKernelDeleteEqueue(SceKernelEqueue eq) {
|
||||||
|
if (eq == nullptr) {
|
||||||
|
return SCE_KERNEL_ERROR_EBADF;
|
||||||
|
}
|
||||||
|
|
||||||
|
delete eq;
|
||||||
|
return ORBIS_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
int PS4_SYSV_ABI sceKernelWaitEqueue(SceKernelEqueue eq, SceKernelEvent* ev, int num, int* out,
|
int PS4_SYSV_ABI sceKernelWaitEqueue(SceKernelEqueue eq, SceKernelEvent* ev, int num, int* out,
|
||||||
|
|
|
@ -11,6 +11,7 @@ using SceKernelUseconds = u32;
|
||||||
using SceKernelEqueue = EqueueInternal*;
|
using SceKernelEqueue = EqueueInternal*;
|
||||||
|
|
||||||
int PS4_SYSV_ABI sceKernelCreateEqueue(SceKernelEqueue* eq, const char* name);
|
int PS4_SYSV_ABI sceKernelCreateEqueue(SceKernelEqueue* eq, const char* name);
|
||||||
|
int PS4_SYSV_ABI sceKernelDeleteEqueue(SceKernelEqueue eq);
|
||||||
int PS4_SYSV_ABI sceKernelWaitEqueue(SceKernelEqueue eq, SceKernelEvent* ev, int num, int* out,
|
int PS4_SYSV_ABI sceKernelWaitEqueue(SceKernelEqueue eq, SceKernelEvent* ev, int num, int* out,
|
||||||
SceKernelUseconds* timo);
|
SceKernelUseconds* timo);
|
||||||
|
|
||||||
|
|
|
@ -169,6 +169,7 @@ void LibKernel_Register(Core::Loader::SymbolsResolver* sym) {
|
||||||
LIB_FUNCTION("cQke9UuBQOk", "libkernel", 1, "libkernel", 1, 1, sceKernelMunmap);
|
LIB_FUNCTION("cQke9UuBQOk", "libkernel", 1, "libkernel", 1, 1, sceKernelMunmap);
|
||||||
// equeue
|
// equeue
|
||||||
LIB_FUNCTION("D0OdFMjp46I", "libkernel", 1, "libkernel", 1, 1, sceKernelCreateEqueue);
|
LIB_FUNCTION("D0OdFMjp46I", "libkernel", 1, "libkernel", 1, 1, sceKernelCreateEqueue);
|
||||||
|
LIB_FUNCTION("jpFjmgAC5AE", "libkernel", 1, "libkernel", 1, 1, sceKernelDeleteEqueue);
|
||||||
LIB_FUNCTION("fzyMKs9kim0", "libkernel", 1, "libkernel", 1, 1, sceKernelWaitEqueue);
|
LIB_FUNCTION("fzyMKs9kim0", "libkernel", 1, "libkernel", 1, 1, sceKernelWaitEqueue);
|
||||||
// misc
|
// misc
|
||||||
LIB_FUNCTION("WslcK1FQcGI", "libkernel", 1, "libkernel", 1, 1, sceKernelIsNeoMode);
|
LIB_FUNCTION("WslcK1FQcGI", "libkernel", 1, "libkernel", 1, 1, sceKernelIsNeoMode);
|
||||||
|
|
|
@ -6,6 +6,7 @@
|
||||||
#include "core/libraries/error_codes.h"
|
#include "core/libraries/error_codes.h"
|
||||||
#include "core/libraries/kernel/time_management.h"
|
#include "core/libraries/kernel/time_management.h"
|
||||||
#include "core/libraries/videoout/driver.h"
|
#include "core/libraries/videoout/driver.h"
|
||||||
|
#include "core/platform.h"
|
||||||
|
|
||||||
#include "video_core/renderer_vulkan/renderer_vulkan.h"
|
#include "video_core/renderer_vulkan/renderer_vulkan.h"
|
||||||
|
|
||||||
|
@ -196,16 +197,22 @@ void VideoOutDriver::Flip(std::chrono::microseconds timeout) {
|
||||||
reinterpret_cast<void*>(req.flip_arg));
|
reinterpret_cast<void*>(req.flip_arg));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Reset flip label
|
||||||
|
req.port->buffer_labels[req.index] = 0;
|
||||||
|
LOG_INFO(Lib_VideoOut, "Flip done [buf = {}]", req.index);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool VideoOutDriver::SubmitFlip(VideoOutPort* port, s32 index, s64 flip_arg) {
|
bool VideoOutDriver::SubmitFlip(VideoOutPort* port, s32 index, s64 flip_arg,
|
||||||
|
bool is_eop /*= false*/) {
|
||||||
const auto& buffer = port->buffer_slots[index];
|
const auto& buffer = port->buffer_slots[index];
|
||||||
const auto& group = port->groups[buffer.group_index];
|
const auto& group = port->groups[buffer.group_index];
|
||||||
auto* frame = renderer->PrepareFrame(group, buffer.address_left);
|
auto* frame = renderer->PrepareFrame(group, buffer.address_left);
|
||||||
|
|
||||||
std::scoped_lock lock{mutex};
|
std::scoped_lock lock{mutex};
|
||||||
|
|
||||||
if (requests.size() >= 2) {
|
if (requests.size() >= port->NumRegisteredBuffers()) {
|
||||||
|
LOG_ERROR(Lib_VideoOut, "Flip queue is full");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -215,6 +222,7 @@ bool VideoOutDriver::SubmitFlip(VideoOutPort* port, s32 index, s64 flip_arg) {
|
||||||
.index = index,
|
.index = index,
|
||||||
.flip_arg = flip_arg,
|
.flip_arg = flip_arg,
|
||||||
.submit_tsc = Libraries::Kernel::sceKernelReadTsc(),
|
.submit_tsc = Libraries::Kernel::sceKernelReadTsc(),
|
||||||
|
.eop = is_eop,
|
||||||
});
|
});
|
||||||
|
|
||||||
port->flip_status.flipPendingNum = static_cast<int>(requests.size());
|
port->flip_status.flipPendingNum = static_cast<int>(requests.size());
|
||||||
|
|
|
@ -19,6 +19,8 @@ struct VideoOutPort {
|
||||||
bool is_open = false;
|
bool is_open = false;
|
||||||
SceVideoOutResolutionStatus resolution;
|
SceVideoOutResolutionStatus resolution;
|
||||||
std::array<VideoOutBuffer, MaxDisplayBuffers> buffer_slots;
|
std::array<VideoOutBuffer, MaxDisplayBuffers> buffer_slots;
|
||||||
|
std::array<uintptr_t, MaxDisplayBuffers> buffer_labels; // should be contiguous in memory
|
||||||
|
static_assert(sizeof(buffer_labels[0]) == 8u);
|
||||||
std::array<BufferAttributeGroup, MaxDisplayBufferGroups> groups;
|
std::array<BufferAttributeGroup, MaxDisplayBufferGroups> groups;
|
||||||
FlipStatus flip_status;
|
FlipStatus flip_status;
|
||||||
SceVideoOutVblankStatus vblank_status;
|
SceVideoOutVblankStatus vblank_status;
|
||||||
|
@ -32,6 +34,11 @@ struct VideoOutPort {
|
||||||
}
|
}
|
||||||
return index;
|
return index;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
[[nodiscard]] int NumRegisteredBuffers() const {
|
||||||
|
return std::count_if(buffer_slots.cbegin(), buffer_slots.cend(),
|
||||||
|
[](auto& buffer) { return buffer.group_index != -1; });
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
struct ServiceThreadParams {
|
struct ServiceThreadParams {
|
||||||
|
@ -57,7 +64,7 @@ public:
|
||||||
int UnregisterBuffers(VideoOutPort* port, s32 attributeIndex);
|
int UnregisterBuffers(VideoOutPort* port, s32 attributeIndex);
|
||||||
|
|
||||||
void Flip(std::chrono::microseconds timeout);
|
void Flip(std::chrono::microseconds timeout);
|
||||||
bool SubmitFlip(VideoOutPort* port, s32 index, s64 flip_arg);
|
bool SubmitFlip(VideoOutPort* port, s32 index, s64 flip_arg, bool is_eop = false);
|
||||||
|
|
||||||
void Vblank();
|
void Vblank();
|
||||||
|
|
||||||
|
@ -68,6 +75,7 @@ private:
|
||||||
s32 index;
|
s32 index;
|
||||||
s64 flip_arg;
|
s64 flip_arg;
|
||||||
u64 submit_tsc;
|
u64 submit_tsc;
|
||||||
|
bool eop;
|
||||||
};
|
};
|
||||||
|
|
||||||
std::mutex mutex;
|
std::mutex mutex;
|
||||||
|
|
|
@ -10,6 +10,7 @@
|
||||||
#include "core/libraries/videoout/driver.h"
|
#include "core/libraries/videoout/driver.h"
|
||||||
#include "core/libraries/videoout/video_out.h"
|
#include "core/libraries/videoout/video_out.h"
|
||||||
#include "core/loader/symbols_resolver.h"
|
#include "core/loader/symbols_resolver.h"
|
||||||
|
#include "core/platform.h"
|
||||||
|
|
||||||
namespace Libraries::VideoOut {
|
namespace Libraries::VideoOut {
|
||||||
|
|
||||||
|
@ -210,6 +211,27 @@ void Vblank() {
|
||||||
return driver->Vblank();
|
return driver->Vblank();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void sceVideoOutGetBufferLabelAddress(s32 handle, uintptr_t* label_addr) {
|
||||||
|
auto* port = driver->GetPort(handle);
|
||||||
|
ASSERT(port);
|
||||||
|
*label_addr = reinterpret_cast<uintptr_t>(port->buffer_labels.data());
|
||||||
|
}
|
||||||
|
|
||||||
|
s32 sceVideoOutSubmitEopFlip(s32 handle, u32 buf_id, u32 mode, u32 arg, void** unk) {
|
||||||
|
auto* port = driver->GetPort(handle);
|
||||||
|
if (!port) {
|
||||||
|
return 0x8029000b;
|
||||||
|
}
|
||||||
|
|
||||||
|
Platform::IrqC::Instance()->RegisterOnce([=](Platform::InterruptId irq) {
|
||||||
|
ASSERT_MSG(irq == Platform::InterruptId::GfxEop, "An unexpected IRQ occured");
|
||||||
|
const auto result = driver->SubmitFlip(port, buf_id, arg, true);
|
||||||
|
ASSERT_MSG(result, "EOP flip submission failed");
|
||||||
|
});
|
||||||
|
|
||||||
|
return ORBIS_OK;
|
||||||
|
}
|
||||||
|
|
||||||
void RegisterLib(Core::Loader::SymbolsResolver* sym) {
|
void RegisterLib(Core::Loader::SymbolsResolver* sym) {
|
||||||
driver = std::make_unique<VideoOutDriver>(Config::getScreenWidth(), Config::getScreenHeight());
|
driver = std::make_unique<VideoOutDriver>(Config::getScreenWidth(), Config::getScreenHeight());
|
||||||
|
|
||||||
|
|
|
@ -102,6 +102,10 @@ s32 PS4_SYSV_ABI sceVideoOutClose(s32 handle);
|
||||||
void Flip(std::chrono::microseconds micros);
|
void Flip(std::chrono::microseconds micros);
|
||||||
void Vblank();
|
void Vblank();
|
||||||
|
|
||||||
|
// Internal system functions
|
||||||
|
void sceVideoOutGetBufferLabelAddress(s32 handle, uintptr_t* label_addr);
|
||||||
|
s32 sceVideoOutSubmitEopFlip(s32 handle, u32 buf_id, u32 mode, u32 arg, void** unk);
|
||||||
|
|
||||||
void RegisterLib(Core::Loader::SymbolsResolver* sym);
|
void RegisterLib(Core::Loader::SymbolsResolver* sym);
|
||||||
|
|
||||||
} // namespace Libraries::VideoOut
|
} // namespace Libraries::VideoOut
|
||||||
|
|
76
src/core/platform.h
Normal file
76
src/core/platform.h
Normal file
|
@ -0,0 +1,76 @@
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||||
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "common/assert.h"
|
||||||
|
#include "common/logging/log.h"
|
||||||
|
#include "common/singleton.h"
|
||||||
|
#include "common/types.h"
|
||||||
|
#include "magic_enum.hpp"
|
||||||
|
|
||||||
|
#include <functional>
|
||||||
|
#include <mutex>
|
||||||
|
#include <optional>
|
||||||
|
#include <queue>
|
||||||
|
|
||||||
|
namespace Platform {
|
||||||
|
|
||||||
|
enum class InterruptId : u32 {
|
||||||
|
Compute0RelMem = 0u,
|
||||||
|
Compute1RelMem = 1u,
|
||||||
|
Compute2RelMem = 2u,
|
||||||
|
Compute3RelMem = 3u,
|
||||||
|
Compute4RelMem = 4u,
|
||||||
|
Compute5RelMem = 5u,
|
||||||
|
Compute6RelMem = 6u,
|
||||||
|
GfxEop = 0x40u
|
||||||
|
};
|
||||||
|
|
||||||
|
using IrqHandler = std::function<void(InterruptId)>;
|
||||||
|
|
||||||
|
struct IrqController {
|
||||||
|
void RegisterOnce(IrqHandler handler) {
|
||||||
|
std::unique_lock lock{m_lock};
|
||||||
|
one_time_subscribers.emplace(handler);
|
||||||
|
}
|
||||||
|
|
||||||
|
void Register(IrqHandler handler) {
|
||||||
|
ASSERT_MSG(!persistent_handler.has_value(),
|
||||||
|
"Too many persistent handlers"); // Add a slot map if so
|
||||||
|
|
||||||
|
std::unique_lock lock{m_lock};
|
||||||
|
persistent_handler.emplace(handler);
|
||||||
|
}
|
||||||
|
|
||||||
|
void Unregister() {
|
||||||
|
std::unique_lock lock{m_lock};
|
||||||
|
persistent_handler.reset();
|
||||||
|
}
|
||||||
|
|
||||||
|
void Signal(InterruptId irq) {
|
||||||
|
std::unique_lock lock{m_lock};
|
||||||
|
|
||||||
|
LOG_TRACE(Core, "IRQ signaled: {}", magic_enum::enum_name(irq));
|
||||||
|
|
||||||
|
if (persistent_handler) {
|
||||||
|
persistent_handler.value()(irq);
|
||||||
|
}
|
||||||
|
|
||||||
|
while (!one_time_subscribers.empty()) {
|
||||||
|
const auto& h = one_time_subscribers.front();
|
||||||
|
h(irq);
|
||||||
|
|
||||||
|
one_time_subscribers.pop();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
std::optional<IrqHandler> persistent_handler{};
|
||||||
|
std::queue<IrqHandler> one_time_subscribers{};
|
||||||
|
std::mutex m_lock{};
|
||||||
|
};
|
||||||
|
|
||||||
|
using IrqC = Common::Singleton<IrqController>;
|
||||||
|
|
||||||
|
} // namespace Platform
|
|
@ -3,6 +3,7 @@
|
||||||
|
|
||||||
#include "common/assert.h"
|
#include "common/assert.h"
|
||||||
#include "common/io_file.h"
|
#include "common/io_file.h"
|
||||||
|
#include "common/thread.h"
|
||||||
#include "video_core/amdgpu/liverpool.h"
|
#include "video_core/amdgpu/liverpool.h"
|
||||||
#include "video_core/amdgpu/pm4_cmds.h"
|
#include "video_core/amdgpu/pm4_cmds.h"
|
||||||
|
|
||||||
|
@ -11,6 +12,8 @@ namespace AmdGpu {
|
||||||
Liverpool::Liverpool() = default;
|
Liverpool::Liverpool() = default;
|
||||||
|
|
||||||
void Liverpool::ProcessCmdList(u32* cmdbuf, u32 size_in_bytes) {
|
void Liverpool::ProcessCmdList(u32* cmdbuf, u32 size_in_bytes) {
|
||||||
|
Common::SetCurrentThreadName("CommandProcessor_Gfx");
|
||||||
|
|
||||||
auto* header = reinterpret_cast<PM4Header*>(cmdbuf);
|
auto* header = reinterpret_cast<PM4Header*>(cmdbuf);
|
||||||
u32 processed_cmd_size = 0;
|
u32 processed_cmd_size = 0;
|
||||||
|
|
||||||
|
@ -25,30 +28,30 @@ void Liverpool::ProcessCmdList(u32* cmdbuf, u32 size_in_bytes) {
|
||||||
case PM4ItOpcode::Nop:
|
case PM4ItOpcode::Nop:
|
||||||
break;
|
break;
|
||||||
case PM4ItOpcode::SetContextReg: {
|
case PM4ItOpcode::SetContextReg: {
|
||||||
auto* set_data = reinterpret_cast<PM4CmdSetData*>(header);
|
const auto* set_data = reinterpret_cast<PM4CmdSetData*>(header);
|
||||||
std::memcpy(®s.reg_array[ContextRegWordOffset + set_data->reg_offset],
|
std::memcpy(®s.reg_array[ContextRegWordOffset + set_data->reg_offset],
|
||||||
header + 2, (count - 1) * sizeof(u32));
|
header + 2, (count - 1) * sizeof(u32));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case PM4ItOpcode::SetShReg: {
|
case PM4ItOpcode::SetShReg: {
|
||||||
auto* set_data = reinterpret_cast<PM4CmdSetData*>(header);
|
const auto* set_data = reinterpret_cast<PM4CmdSetData*>(header);
|
||||||
std::memcpy(®s.reg_array[ShRegWordOffset + set_data->reg_offset], header + 2,
|
std::memcpy(®s.reg_array[ShRegWordOffset + set_data->reg_offset], header + 2,
|
||||||
(count - 1) * sizeof(u32));
|
(count - 1) * sizeof(u32));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case PM4ItOpcode::SetUconfigReg: {
|
case PM4ItOpcode::SetUconfigReg: {
|
||||||
auto* set_data = reinterpret_cast<PM4CmdSetData*>(header);
|
const auto* set_data = reinterpret_cast<PM4CmdSetData*>(header);
|
||||||
std::memcpy(®s.reg_array[UconfigRegWordOffset + set_data->reg_offset],
|
std::memcpy(®s.reg_array[UconfigRegWordOffset + set_data->reg_offset],
|
||||||
header + 2, (count - 1) * sizeof(u32));
|
header + 2, (count - 1) * sizeof(u32));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case PM4ItOpcode::IndexType: {
|
case PM4ItOpcode::IndexType: {
|
||||||
auto* index_type = reinterpret_cast<PM4CmdDrawIndexType*>(header);
|
const auto* index_type = reinterpret_cast<PM4CmdDrawIndexType*>(header);
|
||||||
regs.index_buffer_type.raw = index_type->raw;
|
regs.index_buffer_type.raw = index_type->raw;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case PM4ItOpcode::DrawIndex2: {
|
case PM4ItOpcode::DrawIndex2: {
|
||||||
auto* draw_index = reinterpret_cast<PM4CmdDrawIndex2*>(header);
|
const auto* draw_index = reinterpret_cast<PM4CmdDrawIndex2*>(header);
|
||||||
regs.max_index_size = draw_index->max_size;
|
regs.max_index_size = draw_index->max_size;
|
||||||
regs.index_base_address.base_addr_lo = draw_index->index_base_lo;
|
regs.index_base_address.base_addr_lo = draw_index->index_base_lo;
|
||||||
regs.index_base_address.base_addr_hi.Assign(draw_index->index_base_hi);
|
regs.index_base_address.base_addr_hi.Assign(draw_index->index_base_hi);
|
||||||
|
@ -58,22 +61,52 @@ void Liverpool::ProcessCmdList(u32* cmdbuf, u32 size_in_bytes) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case PM4ItOpcode::DrawIndexAuto: {
|
case PM4ItOpcode::DrawIndexAuto: {
|
||||||
auto* draw_index = reinterpret_cast<PM4CmdDrawIndexAuto*>(header);
|
const auto* draw_index = reinterpret_cast<PM4CmdDrawIndexAuto*>(header);
|
||||||
regs.num_indices = draw_index->index_count;
|
regs.num_indices = draw_index->index_count;
|
||||||
regs.draw_initiator = draw_index->draw_initiator;
|
regs.draw_initiator = draw_index->draw_initiator;
|
||||||
// rasterizer->DrawIndex();
|
// rasterizer->DrawIndex();
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
case PM4ItOpcode::DispatchDirect: {
|
||||||
|
// const auto* dispatch_direct = reinterpret_cast<PM4CmdDispatchDirect*>(header);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case PM4ItOpcode::EventWriteEos: {
|
||||||
|
const auto* event_eos = reinterpret_cast<PM4CmdEventWriteEos*>(header);
|
||||||
|
event_eos->SignalFence();
|
||||||
|
break;
|
||||||
|
}
|
||||||
case PM4ItOpcode::EventWriteEop: {
|
case PM4ItOpcode::EventWriteEop: {
|
||||||
auto* event_write = reinterpret_cast<PM4CmdEventWriteEop*>(header);
|
const auto* event_eop = reinterpret_cast<PM4CmdEventWriteEop*>(header);
|
||||||
const InterruptSelect irq_sel = event_write->int_sel;
|
event_eop->SignalFence();
|
||||||
const DataSelect data_sel = event_write->data_sel;
|
|
||||||
ASSERT(irq_sel == InterruptSelect::None && data_sel == DataSelect::Data64);
|
|
||||||
*event_write->Address() = event_write->DataQWord();
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case PM4ItOpcode::DmaData: {
|
case PM4ItOpcode::DmaData: {
|
||||||
auto* dma_data = reinterpret_cast<PM4DmaData*>(header);
|
const auto* dma_data = reinterpret_cast<PM4DmaData*>(header);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case PM4ItOpcode::WriteData: {
|
||||||
|
const auto* write_data = reinterpret_cast<PM4CmdWriteData*>(header);
|
||||||
|
ASSERT(write_data->dst_sel.Value() == 2 || write_data->dst_sel.Value() == 5);
|
||||||
|
const u32 data_size = (header->type3.count.Value() - 2) * 4;
|
||||||
|
if (!write_data->wr_one_addr.Value()) {
|
||||||
|
std::memcpy(write_data->Address<void*>(), write_data->data, data_size);
|
||||||
|
} else {
|
||||||
|
UNREACHABLE();
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case PM4ItOpcode::AcquireMem: {
|
||||||
|
// const auto* acquire_mem = reinterpret_cast<PM4CmdAcquireMem*>(header);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case PM4ItOpcode::WaitRegMem: {
|
||||||
|
const auto* wait_reg_mem = reinterpret_cast<PM4CmdWaitRegMem*>(header);
|
||||||
|
ASSERT(wait_reg_mem->engine.Value() == PM4CmdWaitRegMem::Engine::Me);
|
||||||
|
while (!wait_reg_mem->Test()) {
|
||||||
|
using namespace std::chrono_literals;
|
||||||
|
std::this_thread::sleep_for(1ms);
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
default:
|
default:
|
||||||
|
|
|
@ -3,10 +3,15 @@
|
||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <array>
|
#include "common/assert.h"
|
||||||
#include "common/bit_field.h"
|
#include "common/bit_field.h"
|
||||||
#include "common/types.h"
|
#include "common/types.h"
|
||||||
|
|
||||||
|
#include <array>
|
||||||
|
#include <condition_variable>
|
||||||
|
#include <functional>
|
||||||
|
#include <future>
|
||||||
|
|
||||||
namespace AmdGpu {
|
namespace AmdGpu {
|
||||||
|
|
||||||
#define GFX6_3D_REG_INDEX(field_name) (offsetof(AmdGpu::Liverpool::Regs, field_name) / sizeof(u32))
|
#define GFX6_3D_REG_INDEX(field_name) (offsetof(AmdGpu::Liverpool::Regs, field_name) / sizeof(u32))
|
||||||
|
@ -610,7 +615,20 @@ struct Liverpool {
|
||||||
public:
|
public:
|
||||||
Liverpool();
|
Liverpool();
|
||||||
|
|
||||||
|
void Submit(u32* cmdbuf, u32 size_in_bytes) {
|
||||||
|
ASSERT_MSG(!cp.valid(), "Trying to submit while previous submission is pending");
|
||||||
|
cp = std::async(&Liverpool::ProcessCmdList, this, cmdbuf, size_in_bytes);
|
||||||
|
}
|
||||||
|
void SubmitDone() {
|
||||||
|
// This is wrong as `submitDone()` should never be blocking. The behavior will be
|
||||||
|
// reworked with mutiple queues introduction
|
||||||
|
cp.get();
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
void ProcessCmdList(u32* cmdbuf, u32 size_in_bytes);
|
void ProcessCmdList(u32* cmdbuf, u32 size_in_bytes);
|
||||||
|
|
||||||
|
std::future<void> cp{};
|
||||||
};
|
};
|
||||||
|
|
||||||
static_assert(GFX6_3D_REG_INDEX(ps_program) == 0x2C08);
|
static_assert(GFX6_3D_REG_INDEX(ps_program) == 0x2C08);
|
||||||
|
|
|
@ -5,7 +5,9 @@
|
||||||
|
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
#include "common/bit_field.h"
|
#include "common/bit_field.h"
|
||||||
|
#include "common/rdtsc.h"
|
||||||
#include "common/types.h"
|
#include "common/types.h"
|
||||||
|
#include "core/platform.h"
|
||||||
#include "video_core/amdgpu/pm4_opcodes.h"
|
#include "video_core/amdgpu/pm4_opcodes.h"
|
||||||
|
|
||||||
namespace AmdGpu {
|
namespace AmdGpu {
|
||||||
|
@ -201,13 +203,18 @@ struct PM4CmdNop {
|
||||||
PM4Type3Header header;
|
PM4Type3Header header;
|
||||||
u32 data_block[0];
|
u32 data_block[0];
|
||||||
|
|
||||||
enum class PayloadType : u32 {
|
enum PayloadType : u32 {
|
||||||
DebugMarkerPush = 0x68750001, ///< Begin of GPU event scope
|
DebugMarkerPush = 0x68750001u, ///< Begin of GPU event scope
|
||||||
DebugMarkerPop = 0x68750002, ///< End of GPU event scope
|
DebugMarkerPop = 0x68750002u, ///< End of GPU event scope
|
||||||
SetVsharpInUdata = 0x68750004, ///< Indicates that V# will be set in the next packet
|
SetVsharpInUdata = 0x68750004u, ///< Indicates that V# will be set in the next packet
|
||||||
SetTsharpInUdata = 0x68750005, ///< Indicates that T# will be set in the next packet
|
SetTsharpInUdata = 0x68750005u, ///< Indicates that T# will be set in the next packet
|
||||||
SetSsharpInUdata = 0x68750006, ///< Indicates that S# will be set in the next packet
|
SetSsharpInUdata = 0x68750006u, ///< Indicates that S# will be set in the next packet
|
||||||
DebugColorMarkerPush = 0x6875000e, ///< Begin of GPU event scope with color
|
DebugColorMarkerPush = 0x6875000eu, ///< Begin of GPU event scope with color
|
||||||
|
PatchedFlip = 0x68750776u, ///< Patched flip marker
|
||||||
|
PrepareFlip = 0x68750777u, ///< Flip marker
|
||||||
|
PrepareFlipLabel = 0x68750778u, ///< Flip marker with label address
|
||||||
|
PrepareFlipInterrupt = 0x68750780u, ///< Flip marker with interrupt
|
||||||
|
PrepareFlipInterruptLabel = 0x68750781u, ///< Flip marker with interrupt and label
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -277,13 +284,52 @@ struct PM4CmdEventWriteEop {
|
||||||
u32 data_lo; ///< Value that will be written to memory when event occurs
|
u32 data_lo; ///< Value that will be written to memory when event occurs
|
||||||
u32 data_hi; ///< Value that will be written to memory when event occurs
|
u32 data_hi; ///< Value that will be written to memory when event occurs
|
||||||
|
|
||||||
u64* Address() const {
|
template <typename T>
|
||||||
return reinterpret_cast<u64*>(address_lo | u64(address_hi) << 32);
|
T* Address() const {
|
||||||
|
return reinterpret_cast<T*>(address_lo | u64(address_hi) << 32);
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 DataDWord() const {
|
||||||
|
return data_lo;
|
||||||
}
|
}
|
||||||
|
|
||||||
u64 DataQWord() const {
|
u64 DataQWord() const {
|
||||||
return data_lo | u64(data_hi) << 32;
|
return data_lo | u64(data_hi) << 32;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void SignalFence() const {
|
||||||
|
switch (data_sel.Value()) {
|
||||||
|
case DataSelect::Data32Low: {
|
||||||
|
*Address<u32>() = DataDWord();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case DataSelect::Data64: {
|
||||||
|
*Address<u64>() = DataQWord();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case DataSelect::PerfCounter: {
|
||||||
|
*Address<u64>() = Common::FencedRDTSC();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
default: {
|
||||||
|
UNREACHABLE();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
switch (int_sel.Value()) {
|
||||||
|
case InterruptSelect::None: {
|
||||||
|
// No interrupt
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case InterruptSelect::IrqWhenWriteConfirm: {
|
||||||
|
Platform::IrqC::Instance()->Signal(Platform::InterruptId::GfxEop);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
default: {
|
||||||
|
UNREACHABLE();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
struct PM4DmaData {
|
struct PM4DmaData {
|
||||||
|
@ -311,11 +357,24 @@ struct PM4DmaData {
|
||||||
};
|
};
|
||||||
|
|
||||||
struct PM4CmdWaitRegMem {
|
struct PM4CmdWaitRegMem {
|
||||||
|
enum class Engine : u32 { Me = 0u, Pfp = 1u };
|
||||||
|
enum class MemSpace : u32 { Register = 0u, Memory = 1u };
|
||||||
|
enum class Function : u32 {
|
||||||
|
Always = 0u,
|
||||||
|
LessThan = 1u,
|
||||||
|
LessThanEqual = 2u,
|
||||||
|
Equal = 3u,
|
||||||
|
NotEqual = 4u,
|
||||||
|
GreaterThanEqual = 5u,
|
||||||
|
GreaterThan = 6u,
|
||||||
|
Reserved = 7u
|
||||||
|
};
|
||||||
|
|
||||||
PM4Type3Header header;
|
PM4Type3Header header;
|
||||||
union {
|
union {
|
||||||
BitField<0, 3, u32> function;
|
BitField<0, 3, Function> function;
|
||||||
BitField<4, 1, u32> mem_space;
|
BitField<4, 1, MemSpace> mem_space;
|
||||||
BitField<8, 1, u32> engine;
|
BitField<8, 1, Engine> engine;
|
||||||
u32 raw;
|
u32 raw;
|
||||||
};
|
};
|
||||||
u32 poll_addr_lo;
|
u32 poll_addr_lo;
|
||||||
|
@ -323,6 +382,116 @@ struct PM4CmdWaitRegMem {
|
||||||
u32 ref;
|
u32 ref;
|
||||||
u32 mask;
|
u32 mask;
|
||||||
u32 poll_interval;
|
u32 poll_interval;
|
||||||
|
|
||||||
|
u32* Address() const {
|
||||||
|
return reinterpret_cast<u32*>((uintptr_t(poll_addr_hi) << 32) | poll_addr_lo);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool Test() const {
|
||||||
|
switch (function.Value()) {
|
||||||
|
case Function::Always: {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
case Function::LessThan: {
|
||||||
|
return (*Address() & mask) < ref;
|
||||||
|
}
|
||||||
|
case Function::LessThanEqual: {
|
||||||
|
return (*Address() & mask) <= ref;
|
||||||
|
}
|
||||||
|
case Function::Equal: {
|
||||||
|
return (*Address() & mask) == ref;
|
||||||
|
}
|
||||||
|
case Function::NotEqual: {
|
||||||
|
return (*Address() & mask) != ref;
|
||||||
|
}
|
||||||
|
case Function::GreaterThanEqual: {
|
||||||
|
return (*Address() & mask) >= ref;
|
||||||
|
}
|
||||||
|
case Function::GreaterThan: {
|
||||||
|
return (*Address() & mask) > ref;
|
||||||
|
}
|
||||||
|
case Function::Reserved:
|
||||||
|
[[fallthrough]];
|
||||||
|
default: {
|
||||||
|
UNREACHABLE();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct PM4CmdWriteData {
|
||||||
|
PM4Type3Header header;
|
||||||
|
union {
|
||||||
|
BitField<8, 11, u32> dst_sel;
|
||||||
|
BitField<16, 1, u32> wr_one_addr;
|
||||||
|
BitField<20, 1, u32> wr_confirm;
|
||||||
|
BitField<30, 1, u32> engine_sel;
|
||||||
|
u32 raw;
|
||||||
|
};
|
||||||
|
union {
|
||||||
|
struct {
|
||||||
|
u32 dst_addr_lo;
|
||||||
|
u32 dst_addr_hi;
|
||||||
|
};
|
||||||
|
u64 addr64;
|
||||||
|
};
|
||||||
|
u32 data[0];
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
void Address(T addr) {
|
||||||
|
addr64 = reinterpret_cast<u64>(addr);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
T* Address() const {
|
||||||
|
return reinterpret_cast<T*>(addr64);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct PM4CmdEventWriteEos {
|
||||||
|
enum class Command : u32 {
|
||||||
|
GdsStore = 1u,
|
||||||
|
SingalFence = 2u,
|
||||||
|
};
|
||||||
|
|
||||||
|
PM4Type3Header header;
|
||||||
|
union {
|
||||||
|
u32 event_control;
|
||||||
|
BitField<0, 6, u32> event_type; ///< Event type written to VGT_EVENT_INITIATOR
|
||||||
|
BitField<8, 4, u32> event_index; ///< Event index
|
||||||
|
};
|
||||||
|
u32 address_lo;
|
||||||
|
union {
|
||||||
|
u32 cmd_info;
|
||||||
|
BitField<0, 16, u32> address_hi; ///< High bits of address
|
||||||
|
BitField<29, 3, Command> command; ///< Command
|
||||||
|
};
|
||||||
|
union {
|
||||||
|
u32 data; ///< Fence value that will be written to memory when event occurs
|
||||||
|
BitField<0, 16, u32>
|
||||||
|
gds_index; ///< Indexed offset from the start of the segment within the partition
|
||||||
|
BitField<16, 16, u32> size; ///< Number of DWs to read from the GDS
|
||||||
|
};
|
||||||
|
|
||||||
|
u32* Address() const {
|
||||||
|
return reinterpret_cast<u32*>(address_lo | u64(address_hi) << 32);
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 DataDWord() const {
|
||||||
|
return this->data;
|
||||||
|
}
|
||||||
|
|
||||||
|
void SignalFence() const {
|
||||||
|
switch (command.Value()) {
|
||||||
|
case Command::SingalFence: {
|
||||||
|
*Address() = DataDWord();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
default: {
|
||||||
|
UNREACHABLE();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace AmdGpu
|
} // namespace AmdGpu
|
||||||
|
|
|
@ -49,7 +49,7 @@ enum class PM4ItOpcode : u32 {
|
||||||
PremableCntl = 0x4A,
|
PremableCntl = 0x4A,
|
||||||
DmaData = 0x50,
|
DmaData = 0x50,
|
||||||
ContextRegRmw = 0x51,
|
ContextRegRmw = 0x51,
|
||||||
Unknown58 = 0x58,
|
AcquireMem = 0x58,
|
||||||
LoadShReg = 0x5F,
|
LoadShReg = 0x5F,
|
||||||
LoadConfigReg = 0x60,
|
LoadConfigReg = 0x60,
|
||||||
LoadContextReg = 0x61,
|
LoadContextReg = 0x61,
|
||||||
|
|
Loading…
Reference in a new issue