diff --git a/src/core/libraries/gnmdriver/gnmdriver.cpp b/src/core/libraries/gnmdriver/gnmdriver.cpp index 9c62c129b..8abcbbf74 100644 --- a/src/core/libraries/gnmdriver/gnmdriver.cpp +++ b/src/core/libraries/gnmdriver/gnmdriver.cpp @@ -1384,9 +1384,8 @@ s32 PS4_SYSV_ABI sceGnmSetEmbeddedPsShader(u32* cmdbuf, u32 size, u32 shader_id, // repeat set shader functionality here as it is trivial. cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 8u, ps_regs[0], 0u); // SPI_SHADER_PGM_LO_PS/SPI_SHADER_PGM_HI_PS - cmdbuf = - PM4CmdSetData::SetShReg(cmdbuf, 10u, ps_regs[2], - ps_regs[3]); // SPI_SHADER_USER_DATA_PS_4/SPI_SHADER_USER_DATA_PS_5 + cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 10u, ps_regs[2], + ps_regs[3]); // SPI_SHADER_PGM_RSRC1_PS/SPI_SHADER_PGM_RSRC2_PS cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x1c4u, ps_regs[4], ps_regs[5]); // SPI_SHADER_Z_FORMAT/SPI_SHADER_COL_FORMAT cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x1b3u, ps_regs[6], @@ -1443,11 +1442,11 @@ s32 PS4_SYSV_ABI sceGnmSetEmbeddedVsShader(u32* cmdbuf, u32 size, u32 shader_id, // pointer to a stack memory, so the check will likely fail. To workaround it we will // repeat set shader functionality here as it is trivial. cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 0x48u, vs_regs[0], vs_regs[1]); // SPI_SHADER_PGM_LO_VS - cmdbuf = - PM4CmdSetData::SetShReg(cmdbuf, 0x4au, vs_regs[2], vs_regs[3]); // SPI_SHADER_PGM_RSRC1_VS - cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x207u, vs_regs[6]); // PA_CL_VS_OUT_CNTL - cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x1b1u, vs_regs[4]); // SPI_VS_OUT_CONFIG - cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x1c3u, vs_regs[5]); // SPI_SHADER_POS_FORMAT + cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 0x4au, vs_regs[2], + vs_regs[3]); // SPI_SHADER_PGM_RSRC1_VS/SPI_SHADER_PGM_RSRC2_VS + cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x207u, vs_regs[6]); // PA_CL_VS_OUT_CNTL + cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x1b1u, vs_regs[4]); // SPI_VS_OUT_CONFIG + cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x1c3u, vs_regs[5]); // SPI_SHADER_POS_FORMAT WriteTrailingNop<11>(cmdbuf); return ORBIS_OK; @@ -1475,7 +1474,8 @@ s32 PS4_SYSV_ABI sceGnmSetEsShader(u32* cmdbuf, u32 size, const u32* es_regs, u3 return -1; } - const u32 var = shader_modifier == 0 ? es_regs[2] : (es_regs[2] & 0xfcfffc3f | shader_modifier); + const u32 var = + shader_modifier == 0 ? es_regs[2] : ((es_regs[2] & 0xfcfffc3f) | shader_modifier); cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 0xc8u, es_regs[0], 0u); // SPI_SHADER_PGM_LO_ES cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 0xcau, var, es_regs[3]); // SPI_SHADER_PGM_RSRC1_ES @@ -1506,9 +1506,8 @@ s32 PS4_SYSV_ABI sceGnmSetGsShader(u32* cmdbuf, u32 size, const u32* gs_regs) { } cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 0x88u, gs_regs[0], 0u); // SPI_SHADER_PGM_LO_GS - cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 0x8au, gs_regs[1], - gs_regs[1]); // SPI_SHADER_PGM_RSRC1_GS - + cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 0x8au, gs_regs[2], + gs_regs[3]); // SPI_SHADER_PGM_RSRC1_GS/SPI_SHADER_PGM_RSRC2_GS cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x2e5u, gs_regs[4]); // VGT_STRMOUT_CONFIG cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x29bu, gs_regs[5]); // VGT_GS_OUT_PRIM_TYPE cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x2e4u, gs_regs[6]); // VGT_GS_INSTANCE_CNT @@ -1535,9 +1534,8 @@ s32 PS4_SYSV_ABI sceGnmSetHsShader(u32* cmdbuf, u32 size, const u32* hs_regs, u3 } cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 0x108u, hs_regs[0], 0u); // SPI_SHADER_PGM_LO_HS - cmdbuf = - PM4CmdSetData::SetShReg(cmdbuf, 0x10au, hs_regs[1], hs_regs[1]); // SPI_SHADER_PGM_RSRC1_HS - + cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 0x10au, hs_regs[2], + hs_regs[3]); // SPI_SHADER_PGM_RSRC1_HS/SPI_SHADER_PGM_RSRC2_HS cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x286u, hs_regs[5], hs_regs[5]); // VGT_HOS_MAX_TESS_LEVEL cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x2dbu, hs_regs[4]); // VGT_TF_PARAM @@ -1559,7 +1557,8 @@ s32 PS4_SYSV_ABI sceGnmSetLsShader(u32* cmdbuf, u32 size, const u32* ls_regs, u3 return -1; } - if (shader_modifier & 0xfcfffc3f) { + const auto modifier_mask = ((shader_modifier & 0xfffffc3f) == 0) ? 0xfffffc3f : 0xfcfffc3f; + if (shader_modifier & modifier_mask) { LOG_ERROR(Lib_GnmDriver, "Invalid modifier mask"); return -1; } @@ -1569,7 +1568,8 @@ s32 PS4_SYSV_ABI sceGnmSetLsShader(u32* cmdbuf, u32 size, const u32* ls_regs, u3 return -1; } - const u32 var = shader_modifier == 0 ? ls_regs[2] : (ls_regs[2] & 0xfcfffc3f | shader_modifier); + const u32 var = + shader_modifier == 0 ? ls_regs[2] : ((ls_regs[2] & modifier_mask) | shader_modifier); cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 0x148u, ls_regs[0], 0u); // SPI_SHADER_PGM_LO_LS cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 0x14bu, ls_regs[3]); // SPI_SHADER_PGM_RSRC2_LS cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 0x14au, var, ls_regs[3]); // SPI_SHADER_PGM_RSRC1_LS @@ -1598,9 +1598,9 @@ s32 PS4_SYSV_ABI sceGnmSetPsShader(u32* cmdbuf, u32 size, const u32* ps_regs) { cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 8u, ps_regs[0], 0u); // SPI_SHADER_PGM_LO_PS/SPI_SHADER_PGM_HI_PS - cmdbuf = PM4CmdSetData::SetShReg( - cmdbuf, 10u, ps_regs[2], - ps_regs[3]); // SPI_SHADER_USER_DATA_PS_4/SPI_SHADER_USER_DATA_PS_5 + cmdbuf = + PM4CmdSetData::SetShReg(cmdbuf, 10u, ps_regs[2], + ps_regs[3]); // SPI_SHADER_PGM_RSRC1_PS/SPI_SHADER_PGM_RSRC2_PS cmdbuf = PM4CmdSetData::SetContextReg( cmdbuf, 0x1c4u, ps_regs[4], ps_regs[5]); // SPI_SHADER_Z_FORMAT/SPI_SHADER_COL_FORMAT cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x1b3u, ps_regs[6], @@ -1636,9 +1636,9 @@ s32 PS4_SYSV_ABI sceGnmSetPsShader350(u32* cmdbuf, u32 size, const u32* ps_regs) cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 8u, ps_regs[0], 0u); // SPI_SHADER_PGM_LO_PS/SPI_SHADER_PGM_HI_PS - cmdbuf = PM4CmdSetData::SetShReg( - cmdbuf, 10u, ps_regs[2], - ps_regs[3]); // SPI_SHADER_USER_DATA_PS_4/SPI_SHADER_USER_DATA_PS_5 + cmdbuf = + PM4CmdSetData::SetShReg(cmdbuf, 10u, ps_regs[2], + ps_regs[3]); // SPI_SHADER_PGM_RSRC1_PS/SPI_SHADER_PGM_RSRC2_PS cmdbuf = PM4CmdSetData::SetContextReg( cmdbuf, 0x1c4u, ps_regs[4], ps_regs[5]); // SPI_SHADER_Z_FORMAT/SPI_SHADER_COL_FORMAT cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x1b3u, ps_regs[6], @@ -2145,9 +2145,8 @@ s32 PS4_SYSV_ABI sceGnmUpdateGsShader(u32* cmdbuf, u32 size, const u32* gs_regs) } cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 0x88u, gs_regs[0], 0u); // SPI_SHADER_PGM_LO_GS - cmdbuf = - PM4CmdSetData::SetShReg(cmdbuf, 0x8au, gs_regs[1], gs_regs[1]); // SPI_SHADER_PGM_RSRC1_GS - + cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 0x8au, gs_regs[2], + gs_regs[3]); // SPI_SHADER_PGM_RSRC1_GS/SPI_SHADER_PGM_RSRC2_GS cmdbuf = WritePacket(cmdbuf, PM4ShaderType::ShaderGraphics, 0xc01e02e5u, gs_regs[4]); cmdbuf = WritePacket(cmdbuf, PM4ShaderType::ShaderGraphics, 0xc01e029bu, @@ -2184,9 +2183,9 @@ s32 PS4_SYSV_ABI sceGnmUpdatePsShader(u32* cmdbuf, u32 size, const u32* ps_regs) cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 8u, ps_regs[0], 0u); // SPI_SHADER_PGM_LO_PS/SPI_SHADER_PGM_HI_PS - cmdbuf = PM4CmdSetData::SetShReg( - cmdbuf, 10u, ps_regs[2], - ps_regs[3]); // SPI_SHADER_USER_DATA_PS_4/SPI_SHADER_USER_DATA_PS_5 + cmdbuf = + PM4CmdSetData::SetShReg(cmdbuf, 10u, ps_regs[2], + ps_regs[3]); // SPI_SHADER_PGM_RSRC1_PS/SPI_SHADER_PGM_RSRC2_PS cmdbuf = WritePacket( cmdbuf, PM4ShaderType::ShaderGraphics, 0xc01e01c4u, ps_regs[4], ps_regs[5]); // SPI_SHADER_Z_FORMAT/SPI_SHADER_COL_FORMAT update @@ -2229,9 +2228,9 @@ s32 PS4_SYSV_ABI sceGnmUpdatePsShader350(u32* cmdbuf, u32 size, const u32* ps_re cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 8u, ps_regs[0], 0u); // SPI_SHADER_PGM_LO_PS/SPI_SHADER_PGM_HI_PS - cmdbuf = PM4CmdSetData::SetShReg( - cmdbuf, 10u, ps_regs[2], - ps_regs[3]); // SPI_SHADER_USER_DATA_PS_4/SPI_SHADER_USER_DATA_PS_5 + cmdbuf = + PM4CmdSetData::SetShReg(cmdbuf, 10u, ps_regs[2], + ps_regs[3]); // SPI_SHADER_PGM_RSRC1_PS/SPI_SHADER_PGM_RSRC2_PS cmdbuf = WritePacket( cmdbuf, PM4ShaderType::ShaderGraphics, 0xc01e01c4u, ps_regs[4], ps_regs[5]); // SPI_SHADER_Z_FORMAT/SPI_SHADER_COL_FORMAT update @@ -2275,7 +2274,8 @@ s32 PS4_SYSV_ABI sceGnmUpdateVsShader(u32* cmdbuf, u32 size, const u32* vs_regs, return -1; } - const u32 var = shader_modifier == 0 ? vs_regs[2] : (vs_regs[2] & 0xfcfffc3f | shader_modifier); + const u32 var = + shader_modifier == 0 ? vs_regs[2] : ((vs_regs[2] & 0xfcfffc3f) | shader_modifier); cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 0x48u, vs_regs[0], 0u); // SPI_SHADER_PGM_LO_VS cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 0x4au, var, vs_regs[3]); // SPI_SHADER_PGM_RSRC1_VS cmdbuf = WritePacket(cmdbuf, PM4ShaderType::ShaderGraphics, 0xc01e0207u, diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index 61ed29d5a..98f9d1c7f 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -13,10 +13,12 @@ std::string_view StageName(Stage stage) { switch (stage) { case Stage::Vertex: return "vs"; - case Stage::TessellationControl: - return "tcs"; - case Stage::TessellationEval: - return "tes"; + case Stage::Local: + return "ls"; + case Stage::Export: + return "es"; + case Stage::Hull: + return "hs"; case Stage::Geometry: return "gs"; case Stage::Fragment: diff --git a/src/shader_recompiler/runtime_info.h b/src/shader_recompiler/runtime_info.h index ce3b64fc6..67b961850 100644 --- a/src/shader_recompiler/runtime_info.h +++ b/src/shader_recompiler/runtime_info.h @@ -17,11 +17,12 @@ namespace Shader { static constexpr size_t NumUserDataRegs = 16; enum class Stage : u32 { - Vertex, - TessellationControl, - TessellationEval, - Geometry, Fragment, + Vertex, + Geometry, + Export, + Hull, + Local, Compute, }; constexpr u32 MaxStageTypes = 6; @@ -203,7 +204,7 @@ struct fmt::formatter { return ctx.begin(); } auto format(const Shader::Stage& stage, format_context& ctx) const { - constexpr static std::array names = {"vs", "tc", "te", "gs", "fs", "cs"}; + constexpr static std::array names = {"fs", "vs", "gs", "es", "hs", "ls", "cs"}; return fmt::format_to(ctx.out(), "{}", names[static_cast(stage)]); } }; diff --git a/src/video_core/amdgpu/liverpool.h b/src/video_core/amdgpu/liverpool.h index 536167ff1..54b6c5869 100644 --- a/src/video_core/amdgpu/liverpool.h +++ b/src/video_core/amdgpu/liverpool.h @@ -828,7 +828,15 @@ struct Liverpool { ShaderProgram ps_program; INSERT_PADDING_WORDS(0x2C); ShaderProgram vs_program; - INSERT_PADDING_WORDS(0x2E00 - 0x2C4C - 16); + INSERT_PADDING_WORDS(0x2C); + ShaderProgram gs_program; + INSERT_PADDING_WORDS(0x2C); + ShaderProgram es_program; + INSERT_PADDING_WORDS(0x2C); + ShaderProgram hs_program; + INSERT_PADDING_WORDS(0x2C); + ShaderProgram ls_program; + INSERT_PADDING_WORDS(0xA4); ComputeProgram cs_program; INSERT_PADDING_WORDS(0xA008 - 0x2E00 - 80 - 3 - 5); DepthRenderControl depth_render_control; @@ -907,12 +915,19 @@ struct Liverpool { const ShaderProgram* ProgramForStage(u32 index) const { switch (index) { case 0: - return &vs_program; - case 4: return &ps_program; - default: - return nullptr; + case 1: + return &vs_program; + case 2: + return &gs_program; + case 3: + return &es_program; + case 4: + return &hs_program; + case 5: + return &ls_program; } + return nullptr; } }; @@ -1017,6 +1032,10 @@ private: static_assert(GFX6_3D_REG_INDEX(ps_program) == 0x2C08); static_assert(GFX6_3D_REG_INDEX(vs_program) == 0x2C48); static_assert(GFX6_3D_REG_INDEX(vs_program.user_data) == 0x2C4C); +static_assert(GFX6_3D_REG_INDEX(gs_program) == 0x2C88); +static_assert(GFX6_3D_REG_INDEX(es_program) == 0x2CC8); +static_assert(GFX6_3D_REG_INDEX(hs_program) == 0x2D08); +static_assert(GFX6_3D_REG_INDEX(ls_program) == 0x2D48); static_assert(GFX6_3D_REG_INDEX(cs_program) == 0x2E00); static_assert(GFX6_3D_REG_INDEX(cs_program.dim_z) == 0x2E03); static_assert(GFX6_3D_REG_INDEX(cs_program.address_lo) == 0x2E0C); diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 019018369..a3ba2f77b 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -48,7 +48,7 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul boost::container::static_vector bindings; boost::container::static_vector attributes; - const auto& vs_info = stages[0]; + const auto& vs_info = stages[u32(Shader::Stage::Vertex)]; for (const auto& input : vs_info.vs_inputs) { if (input.instance_step_rate == Shader::Info::VsInput::InstanceIdType::OverStepRate0 || input.instance_step_rate == Shader::Info::VsInput::InstanceIdType::OverStepRate1) { @@ -179,20 +179,21 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul .maxDepthBounds = key.depth_bounds_max, }; - u32 shader_count = 1; + u32 shader_count{}; + auto stage = u32(Shader::Stage::Vertex); std::array shader_stages; - shader_stages[0] = vk::PipelineShaderStageCreateInfo{ + shader_stages[shader_count++] = vk::PipelineShaderStageCreateInfo{ .stage = vk::ShaderStageFlagBits::eVertex, - .module = modules[0], + .module = modules[stage], .pName = "main", }; - if (modules[4]) { - shader_stages[1] = vk::PipelineShaderStageCreateInfo{ + stage = u32(Shader::Stage::Fragment); + if (modules[stage]) { + shader_stages[shader_count++] = vk::PipelineShaderStageCreateInfo{ .stage = vk::ShaderStageFlagBits::eFragment, - .module = modules[4], + .module = modules[stage], .pName = "main", }; - ++shader_count; } const auto it = std::ranges::find(key.color_formats, vk::Format::eUndefined); @@ -411,7 +412,7 @@ void GraphicsPipeline::BindResources(Core::MemoryManager* memory, StreamBuffer& } void GraphicsPipeline::BindVertexBuffers(StreamBuffer& staging) const { - const auto& vs_info = stages[0]; + const auto& vs_info = stages[u32(Shader::Stage::Vertex)]; if (vs_info.vs_inputs.empty()) { return; } diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index ab8be78fb..e1564f8fd 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -77,7 +77,7 @@ public: bool IsEmbeddedVs() const noexcept { static constexpr size_t EmbeddedVsHash = 0x9b2da5cf47f8c29f; - return key.stage_hashes[0] == EmbeddedVsHash; + return key.stage_hashes[u32(Shader::Stage::Vertex)] == EmbeddedVsHash; } auto GetWriteMasks() const { diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index bf4bbc103..d601e525e 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -255,6 +255,12 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline() { block_pool.ReleaseContents(); inst_pool.ReleaseContents(); + if (stage != Shader::Stage::Compute && stage != Shader::Stage::Fragment && + stage != Shader::Stage::Vertex) { + LOG_ERROR(Render_Vulkan, "Unsupported shader stage {}. PL creation skipped.", stage); + return {}; + } + // Recompile shader to IR. try { LOG_INFO(Render_Vulkan, "Compiling {} shader {:#x}", stage, hash);