From 508d034263b25dd0985138b2b0e8c4e4bed3ef00 Mon Sep 17 00:00:00 2001 From: psucien Date: Mon, 24 Jun 2024 14:51:17 +0200 Subject: [PATCH 1/2] libraries: gnm_driver: added `sceGnmSetEmbeddedPsShader` --- src/core/libraries/gnmdriver/gnmdriver.cpp | 84 +++++++++++++++++++--- src/core/libraries/gnmdriver/gnmdriver.h | 3 +- 2 files changed, 77 insertions(+), 10 deletions(-) diff --git a/src/core/libraries/gnmdriver/gnmdriver.cpp b/src/core/libraries/gnmdriver/gnmdriver.cpp index 3da41cca..74769e8c 100644 --- a/src/core/libraries/gnmdriver/gnmdriver.cpp +++ b/src/core/libraries/gnmdriver/gnmdriver.cpp @@ -972,8 +972,73 @@ s32 PS4_SYSV_ABI sceGnmSetCsShaderWithModifier(u32* cmdbuf, u32 size, const u32* return ORBIS_OK; } -int PS4_SYSV_ABI sceGnmSetEmbeddedPsShader() { - LOG_ERROR(Lib_GnmDriver, "(STUBBED) called"); +s32 PS4_SYSV_ABI sceGnmSetEmbeddedPsShader(u32* cmdbuf, u32 size, u32 shader_id, + u32 shader_modifier) { + LOG_TRACE(Lib_GnmDriver, "called"); + + if (shader_id > 1) { + LOG_ERROR(Lib_GnmDriver, "Unknown shader id {}", shader_id); + return 0x8eee00ff; + } + + // clang-format off + constexpr static std::array ps0_code alignas(256) = { + 0xbeeb03ffu, 0x00000003u, // s_mov_b32 vcc_hi, $0x00000003 + 0x7e000280u, // v_mov_b32 v0, 0 + 0x5e000100u, // v_cvt_pkrtz_f16_f32 v0, v0, v0 + 0xbf800000u, // s_nop + 0xf8001c0fu, 0x00000000u, // exp mrt0, v0, v0 compr vm done + 0xbf810000u, // s_endpgm + + // Binary header + 0x5362724fu, 0x07726468u, 0x00002043u, 0u, 0xb0a45b2bu, 0x1d39766du, 0x72044b7bu, 0x0000000fu, + // PS regs + 0x0fe000f0u, 0u, 0xc0000u, 4u, 0u, 4u, 2u, 2u, 0u, 0u, 0x10u, 0xfu, 0xcu, 0u, 0u, 0u, + }; + + const auto shader0_addr = uintptr_t(ps0_code.data()); // Original address is 0xfe000f00 + const static u32 ps0_regs[] = { + u32(shader0_addr >> 8), u32(shader0_addr >> 40), 0xc0000u, 4u, 0u, 4u, 2u, 2u, 0u, 0u, 0x10u, 0xfu, 0xcu}; + + constexpr static std::array ps1_code alignas(256) = { + 0xbeeb03ffu, 0x00000003u, // s_mov_b32 vcc_hi, $0x00000003 + 0x7e040280u, // v_mov_b32 v2, 0 + 0xf8001803u, 0x02020202u, // exp mrt0, v2, v2, off, off vm done + 0xbf810000u, // s_endpgm + + // Binary header + 0x5362724fu, 0x07726468u, 0x00001841u, 0x04080002u, 0x98b9cb94u, 0u, 0x6f130734u, 0x0000000fu, + // PS regs + 0x0fe000f2u, 0u, 0x2000u, 0u, 0u, 2u, 2u, 2u, 0u, 0u, 0x10u, 3u, 0xcu, + }; + + const auto shader1_addr = uintptr_t(ps1_code.data()); // Original address is 0xfe000f20 + const static u32 ps1_regs[] = { + u32(shader1_addr >> 8), u32(shader1_addr >> 40), 0x2000u, 0u, 0u, 2u, 2u, 2u, 0u, 0u, 0x10u, 3u, 0xcu}; + // clang-format on + + const auto ps_regs = shader_id == 0 ? ps0_regs : ps1_regs; + + // Normally the driver will do a call to `sceGnmSetPsShader350()`, but this function has + // a check for zero in the upper part of shader address. In our case, the address is a + // pointer to a stack memory, so the check will likely fail. To workaround it we will + // repeat set shader functionality here as it is trivial. + cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 8u, ps_regs[0], + 0u); // SPI_SHADER_PGM_LO_PS/SPI_SHADER_PGM_HI_PS + cmdbuf = + PM4CmdSetData::SetShReg(cmdbuf, 10u, ps_regs[2], + ps_regs[3]); // SPI_SHADER_USER_DATA_PS_4/SPI_SHADER_USER_DATA_PS_5 + cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x1c4u, ps_regs[4], + ps_regs[5]); // SPI_SHADER_Z_FORMAT/SPI_SHADER_COL_FORMAT + cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x1b3u, ps_regs[6], + ps_regs[7]); // SPI_PS_INPUT_ENA/SPI_PS_INPUT_ADDR + cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x1b6u, ps_regs[8]); // SPI_PS_IN_CONTROL + cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x1b8u, ps_regs[9]); // SPI_BARYC_CNTL + cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x203u, ps_regs[10]); // DB_SHADER_CONTROL + cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x8fu, ps_regs[11]); // CB_SHADER_MASK + + WriteTrailingNop<11>(cmdbuf); + return ORBIS_OK; } @@ -981,10 +1046,15 @@ s32 PS4_SYSV_ABI sceGnmSetEmbeddedVsShader(u32* cmdbuf, u32 size, u32 shader_id, u32 shader_modifier) { LOG_TRACE(Lib_GnmDriver, "called"); + if (shader_id != 0) { + LOG_ERROR(Lib_GnmDriver, "Unknown shader id {}", shader_id); + return 0x8eee00ff; + } + // A fullscreen triangle with one uv set // clang-format off constexpr static std::array shader_code alignas(256) = { - 0xbeeb03ffu, 00000007u, // s_mov_b32 vcc_hi, $0x00000007 + 0xbeeb03ffu, 0x00000007u, // s_mov_b32 vcc_hi, $0x00000007 0x36020081u, // v_and_b32 v1, 1, v0 0x34020281u, // v_lshlrev_b32 v1, 1, v1 0x360000c2u, // v_and_b32 v0, -2, v0 @@ -999,9 +1069,9 @@ s32 PS4_SYSV_ABI sceGnmSetEmbeddedVsShader(u32* cmdbuf, u32 size, u32 shader_id, 0xbf810000u, // s_endpgm // Binary header - 0x5362724fu, 0x07726468u, 0x00004047u, 0u, 0x47f8c29fu, 0x9b2da5cfu, 0xff7c5b7du, + 0x5362724fu, 0x07726468u, 0x00004047u, 0u, 0x47f8c29fu, 0x9b2da5cfu, 0xff7c5b7du, 0x00000017u, // VS regs - 0x00000017u, 0x0fe000f1u, 0u, 0x000c0000u, 4u, 0u, 4u, 0u, 7u, + 0x0fe000f1u, 0u, 0x000c0000u, 4u, 0u, 4u, 0u, 7u, }; // clang-format on @@ -1009,10 +1079,6 @@ s32 PS4_SYSV_ABI sceGnmSetEmbeddedVsShader(u32* cmdbuf, u32 size, u32 shader_id, const static u32 vs_regs[] = { u32(shader_addr >> 8), u32(shader_addr >> 40), 0xc0000u, 4, 0, 4, 0, 7}; - if (shader_id != 0) { - return 0x8eee00ff; - } - // Normally the driver will do a call to `sceGnmSetVsShader()`, but this function has // a check for zero in the upper part of shader address. In our case, the address is a // pointer to a stack memory, so the check will likely fail. To workaround it we will diff --git a/src/core/libraries/gnmdriver/gnmdriver.h b/src/core/libraries/gnmdriver/gnmdriver.h index e37ab81b..d55aab49 100644 --- a/src/core/libraries/gnmdriver/gnmdriver.h +++ b/src/core/libraries/gnmdriver/gnmdriver.h @@ -143,7 +143,8 @@ int PS4_SYSV_ABI sceGnmSdmaOpen(); s32 PS4_SYSV_ABI sceGnmSetCsShader(u32* cmdbuf, u32 size, const u32* cs_regs); s32 PS4_SYSV_ABI sceGnmSetCsShaderWithModifier(u32* cmdbuf, u32 size, const u32* cs_regs, u32 modifier); -int PS4_SYSV_ABI sceGnmSetEmbeddedPsShader(); +s32 PS4_SYSV_ABI sceGnmSetEmbeddedPsShader(u32* cmdbuf, u32 size, u32 shader_id, + u32 shader_modifier); s32 PS4_SYSV_ABI sceGnmSetEmbeddedVsShader(u32* cmdbuf, u32 size, u32 shader_id, u32 modifier); int PS4_SYSV_ABI sceGnmSetEsShader(); int PS4_SYSV_ABI sceGnmSetGsRingSizes(); From c04fbb75d86cb04ad58169915b3141bb01e44298 Mon Sep 17 00:00:00 2001 From: psucien Date: Mon, 24 Jun 2024 22:53:59 +0200 Subject: [PATCH 2/2] libraries: gnm_driver: added `sceGnmDrawIndexIndirect` and `sceGnmDrawIndirect` --- src/core/libraries/gnmdriver/gnmdriver.cpp | 65 ++++++++++++++++++++-- src/core/libraries/gnmdriver/gnmdriver.h | 7 ++- src/video_core/amdgpu/pm4_cmds.h | 14 +++++ 3 files changed, 78 insertions(+), 8 deletions(-) diff --git a/src/core/libraries/gnmdriver/gnmdriver.cpp b/src/core/libraries/gnmdriver/gnmdriver.cpp index 74769e8c..3b4d310f 100644 --- a/src/core/libraries/gnmdriver/gnmdriver.cpp +++ b/src/core/libraries/gnmdriver/gnmdriver.cpp @@ -24,6 +24,20 @@ using namespace AmdGpu; static std::unique_ptr liverpool; +enum ShaderStages : u32 { + Cs, + Ps, + Vs, + Gs, + Es, + Hs, + Ls, + + Max +}; + +static constexpr std::array indirect_sgpr_offsets{0u, 0u, 0x4cu, 0u, 0xccu, 0u, 0x14cu}; + // In case of precise gnm driver emulation we need to send a bunch of HW-specific // initialization commands. It may slowdown development at early stage as their // support is not important and can be ignored for a while. @@ -347,9 +361,29 @@ s32 PS4_SYSV_ABI sceGnmDrawIndexAuto(u32* cmdbuf, u32 size, u32 index_count, u32 return -1; } -int PS4_SYSV_ABI sceGnmDrawIndexIndirect() { - LOG_ERROR(Lib_GnmDriver, "(STUBBED) called"); - return ORBIS_OK; +s32 PS4_SYSV_ABI sceGnmDrawIndexIndirect(u32* cmdbuf, u32 size, u32 data_offset, u32 shader_stage, + u32 vertex_sgpr_offset, u32 instance_vgpr_offset, + u32 flags) { + LOG_TRACE(Lib_GnmDriver, "called"); + + if (cmdbuf && (size == 9) && (shader_stage < ShaderStages::Max) && + (vertex_sgpr_offset < 0x10u) && (instance_vgpr_offset < 0x10u)) { + + const auto predicate = flags & 1 ? PM4Predicate::PredEnable : PM4Predicate::PredDisable; + cmdbuf = WriteHeader( + cmdbuf, 4, PM4ShaderType::ShaderGraphics, predicate); + + const auto sgpr_offset = indirect_sgpr_offsets[shader_stage]; + + cmdbuf[0] = data_offset; + cmdbuf[1] = vertex_sgpr_offset == 0 ? 0 : (vertex_sgpr_offset & 0xffffu) + sgpr_offset; + cmdbuf[2] = instance_vgpr_offset == 0 ? 0 : (instance_vgpr_offset & 0xffffu) + sgpr_offset; + cmdbuf[3] = 0; + + WriteTrailingNop<3>(cmdbuf); + return ORBIS_OK; + } + return -1; } int PS4_SYSV_ABI sceGnmDrawIndexIndirectCountMulti() { @@ -383,9 +417,28 @@ s32 PS4_SYSV_ABI sceGnmDrawIndexOffset(u32* cmdbuf, u32 size, u32 index_offset, return -1; } -int PS4_SYSV_ABI sceGnmDrawIndirect() { - LOG_ERROR(Lib_GnmDriver, "(STUBBED) called"); - return ORBIS_OK; +s32 PS4_SYSV_ABI sceGnmDrawIndirect(u32* cmdbuf, u32 size, u32 data_offset, u32 shader_stage, + u32 vertex_sgpr_offset, u32 instance_vgpr_offset, u32 flags) { + LOG_TRACE(Lib_GnmDriver, "called"); + + if (cmdbuf && (size == 9) && (shader_stage < ShaderStages::Max) && + (vertex_sgpr_offset < 0x10u) && (instance_vgpr_offset < 0x10u)) { + + const auto predicate = flags & 1 ? PM4Predicate::PredEnable : PM4Predicate::PredDisable; + cmdbuf = WriteHeader(cmdbuf, 4, PM4ShaderType::ShaderGraphics, + predicate); + + const auto sgpr_offset = indirect_sgpr_offsets[shader_stage]; + + cmdbuf[0] = data_offset; + cmdbuf[1] = vertex_sgpr_offset == 0 ? 0 : (vertex_sgpr_offset & 0xffffu) + sgpr_offset; + cmdbuf[2] = instance_vgpr_offset == 0 ? 0 : (instance_vgpr_offset & 0xffffu) + sgpr_offset; + cmdbuf[3] = 2; // auto index + + WriteTrailingNop<3>(cmdbuf); + return ORBIS_OK; + } + return -1; } int PS4_SYSV_ABI sceGnmDrawIndirectCountMulti() { diff --git a/src/core/libraries/gnmdriver/gnmdriver.h b/src/core/libraries/gnmdriver/gnmdriver.h index d55aab49..b05d15f9 100644 --- a/src/core/libraries/gnmdriver/gnmdriver.h +++ b/src/core/libraries/gnmdriver/gnmdriver.h @@ -44,13 +44,16 @@ u32 PS4_SYSV_ABI sceGnmDispatchInitDefaultHardwareState(u32* cmdbuf, u32 size); s32 PS4_SYSV_ABI sceGnmDrawIndex(u32* cmdbuf, u32 size, u32 index_count, uintptr_t index_addr, u32 flags, u32 type); s32 PS4_SYSV_ABI sceGnmDrawIndexAuto(u32* cmdbuf, u32 size, u32 index_count, u32 flags); -int PS4_SYSV_ABI sceGnmDrawIndexIndirect(); +s32 PS4_SYSV_ABI sceGnmDrawIndexIndirect(u32* cmdbuf, u32 size, u32 data_offset, u32 shader_stage, + u32 vertex_sgpr_offset, u32 instance_vgpr_offset, + u32 flags); int PS4_SYSV_ABI sceGnmDrawIndexIndirectCountMulti(); int PS4_SYSV_ABI sceGnmDrawIndexIndirectMulti(); int PS4_SYSV_ABI sceGnmDrawIndexMultiInstanced(); s32 PS4_SYSV_ABI sceGnmDrawIndexOffset(u32* cmdbuf, u32 size, u32 index_offset, u32 index_count, u32 flags); -int PS4_SYSV_ABI sceGnmDrawIndirect(); +s32 PS4_SYSV_ABI sceGnmDrawIndirect(u32* cmdbuf, u32 size, u32 data_offset, u32 shader_stage, + u32 vertex_sgpr_offset, u32 instance_vgpr_offset, u32 flags); int PS4_SYSV_ABI sceGnmDrawIndirectCountMulti(); int PS4_SYSV_ABI sceGnmDrawIndirectMulti(); int PS4_SYSV_ABI sceGnmDrawInitDefaultHardwareState(); diff --git a/src/video_core/amdgpu/pm4_cmds.h b/src/video_core/amdgpu/pm4_cmds.h index a3f6cf63..88697d31 100644 --- a/src/video_core/amdgpu/pm4_cmds.h +++ b/src/video_core/amdgpu/pm4_cmds.h @@ -253,6 +253,20 @@ struct PM4CmdDrawIndexAuto { u32 draw_initiator; }; +struct PM4CmdDrawIndirect { + PM4Type3Header header; ///< header + u32 data_offset; ///< DWORD aligned offset + union { + u32 dw2; + BitField<0, 16, u32> base_vtx_loc; ///< base vertex location + }; + union { + u32 dw3; + BitField<0, 16, u32> start_inst_loc; ///< start instance location + }; + u32 draw_initiator; ///< Draw Initiator Register +}; + enum class DataSelect : u32 { None = 0, Data32Low = 1,