diff --git a/CMakeLists.txt b/CMakeLists.txt index 22d0e0a2..59f15add 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -518,6 +518,7 @@ set(SHADER_RECOMPILER src/shader_recompiler/exception.h src/shader_recompiler/frontend/translate/data_share.cpp src/shader_recompiler/frontend/translate/export.cpp src/shader_recompiler/frontend/translate/scalar_alu.cpp + src/shader_recompiler/frontend/translate/scalar_flow.cpp src/shader_recompiler/frontend/translate/scalar_memory.cpp src/shader_recompiler/frontend/translate/translate.cpp src/shader_recompiler/frontend/translate/translate.h @@ -526,6 +527,8 @@ set(SHADER_RECOMPILER src/shader_recompiler/exception.h src/shader_recompiler/frontend/translate/vector_memory.cpp src/shader_recompiler/frontend/control_flow_graph.cpp src/shader_recompiler/frontend/control_flow_graph.h + src/shader_recompiler/frontend/copy_shader.cpp + src/shader_recompiler/frontend/copy_shader.h src/shader_recompiler/frontend/decode.cpp src/shader_recompiler/frontend/decode.h src/shader_recompiler/frontend/fetch_shader.cpp @@ -542,6 +545,7 @@ set(SHADER_RECOMPILER src/shader_recompiler/exception.h src/shader_recompiler/ir/passes/ir_passes.h src/shader_recompiler/ir/passes/lower_shared_mem_to_registers.cpp src/shader_recompiler/ir/passes/resource_tracking_pass.cpp + src/shader_recompiler/ir/passes/ring_access_elimination.cpp src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp src/shader_recompiler/ir/passes/ssa_rewrite_pass.cpp src/shader_recompiler/ir/abstract_syntax_list.h @@ -574,6 +578,7 @@ set(VIDEO_CORE src/video_core/amdgpu/liverpool.cpp src/video_core/amdgpu/pm4_cmds.h src/video_core/amdgpu/pm4_opcodes.h src/video_core/amdgpu/resource.h + src/video_core/amdgpu/types.h src/video_core/amdgpu/default_context.cpp src/video_core/buffer_cache/buffer.cpp src/video_core/buffer_cache/buffer.h diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 891b2608..a585f328 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -5,6 +5,7 @@ #include #include #include +#include "common/assert.h" #include "common/func_traits.h" #include "shader_recompiler/backend/spirv/emit_spirv.h" #include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" @@ -12,10 +13,38 @@ #include "shader_recompiler/frontend/translate/translate.h" #include "shader_recompiler/ir/basic_block.h" #include "shader_recompiler/ir/program.h" +#include "video_core/amdgpu/types.h" namespace Shader::Backend::SPIRV { namespace { +static constexpr spv::ExecutionMode GetInputPrimitiveType(AmdGpu::PrimitiveType type) { + switch (type) { + case AmdGpu::PrimitiveType::PointList: + return spv::ExecutionMode::InputPoints; + case AmdGpu::PrimitiveType::LineList: + return spv::ExecutionMode::InputLines; + case AmdGpu::PrimitiveType::TriangleList: + case AmdGpu::PrimitiveType::TriangleStrip: + return spv::ExecutionMode::Triangles; + default: + UNREACHABLE(); + } +} + +static constexpr spv::ExecutionMode GetOutputPrimitiveType(AmdGpu::GsOutputPrimitiveType type) { + switch (type) { + case AmdGpu::GsOutputPrimitiveType::PointList: + return spv::ExecutionMode::OutputVertices; + case AmdGpu::GsOutputPrimitiveType::LineStrip: + return spv::ExecutionMode::OutputLineStrip; + case AmdGpu::GsOutputPrimitiveType::TriangleStrip: + return spv::ExecutionMode::OutputTriangleStrip; + default: + UNREACHABLE(); + } +} + template void SetDefinition(EmitContext& ctx, IR::Inst* inst, Args... args) { inst->SetDefinition(func(ctx, std::forward(args)...)); @@ -222,6 +251,7 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) { workgroup_size[1], workgroup_size[2]); break; } + case Stage::Export: case Stage::Vertex: execution_model = spv::ExecutionModel::Vertex; break; @@ -240,6 +270,16 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) { ctx.AddExecutionMode(main, spv::ExecutionMode::DepthReplacing); } break; + case Stage::Geometry: + execution_model = spv::ExecutionModel::Geometry; + ctx.AddExecutionMode(main, GetInputPrimitiveType(ctx.runtime_info.gs_info.in_primitive)); + ctx.AddExecutionMode(main, + GetOutputPrimitiveType(ctx.runtime_info.gs_info.out_primitive[0])); + ctx.AddExecutionMode(main, spv::ExecutionMode::OutputVertices, + ctx.runtime_info.gs_info.output_vertices); + ctx.AddExecutionMode(main, spv::ExecutionMode::Invocations, + ctx.runtime_info.gs_info.num_invocations); + break; default: throw NotImplementedException("Stage {}", u32(program.info.stage)); } @@ -270,11 +310,20 @@ std::vector EmitSPIRV(const Profile& profile, const RuntimeInfo& runtime_in EmitContext ctx{profile, runtime_info, program.info, binding}; const Id main{DefineMain(ctx, program)}; DefineEntryPoint(program, ctx, main); - if (program.info.stage == Stage::Vertex) { + switch (program.info.stage) { + case Stage::Export: + case Stage::Vertex: ctx.AddExtension("SPV_KHR_shader_draw_parameters"); ctx.AddCapability(spv::Capability::DrawParameters); + break; + case Stage::Geometry: + ctx.AddCapability(spv::Capability::Geometry); + break; + default: + break; } PatchPhiNodes(program, ctx); + binding.user_data += program.info.ud_mask.NumRegs(); return ctx.Assemble(); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 605fd6fe..2d48999c 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -46,6 +46,7 @@ Id OutputAttrPointer(EmitContext& ctx, IR::Attribute attr, u32 element) { if (IR::IsParam(attr)) { const u32 index{u32(attr) - u32(IR::Attribute::Param0)}; const auto& info{ctx.output_params.at(index)}; + ASSERT(info.num_components > 0); if (info.num_components == 1) { return info.id; } else { @@ -164,7 +165,30 @@ Id EmitReadStepRate(EmitContext& ctx, int rate_idx) { rate_idx == 0 ? ctx.u32_zero_value : ctx.u32_one_value)); } -Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp) { +Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp, u32 index) { + if (ctx.info.stage == Stage::Geometry) { + if (IR::IsPosition(attr)) { + ASSERT(attr == IR::Attribute::Position0); + const auto position_arr_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[4]); + const auto pointer{ctx.OpAccessChain(position_arr_ptr, ctx.gl_in, ctx.ConstU32(index), + ctx.ConstU32(0u))}; + const auto position_comp_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[1]); + return ctx.OpLoad(ctx.F32[1], + ctx.OpAccessChain(position_comp_ptr, pointer, ctx.ConstU32(comp))); + } + + if (IR::IsParam(attr)) { + const u32 param_id{u32(attr) - u32(IR::Attribute::Param0)}; + const auto param = ctx.input_params.at(param_id).id; + const auto param_arr_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[4]); + const auto pointer{ctx.OpAccessChain(param_arr_ptr, param, ctx.ConstU32(index))}; + const auto position_comp_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[1]); + return ctx.OpLoad(ctx.F32[1], + ctx.OpAccessChain(position_comp_ptr, pointer, ctx.ConstU32(comp))); + } + UNREACHABLE(); + } + if (IR::IsParam(attr)) { const u32 index{u32(attr) - u32(IR::Attribute::Param0)}; const auto& param{ctx.input_params.at(index)}; @@ -232,6 +256,9 @@ Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp) { case IR::Attribute::IsFrontFace: return ctx.OpSelect(ctx.U32[1], ctx.OpLoad(ctx.U1[1], ctx.front_facing), ctx.u32_one_value, ctx.u32_zero_value); + case IR::Attribute::PrimitiveId: + ASSERT(ctx.info.stage == Stage::Geometry); + return ctx.OpLoad(ctx.U32[1], ctx.primitive_id); default: throw NotImplementedException("Read U32 attribute {}", attr); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index 3bdea9c1..ec86e5cc 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -27,7 +27,6 @@ Id EmitConditionRef(EmitContext& ctx, const IR::Value& value); void EmitReference(EmitContext&); void EmitPhiMove(EmitContext&); void EmitJoin(EmitContext& ctx); -void EmitBarrier(EmitContext& ctx); void EmitWorkgroupMemoryBarrier(EmitContext& ctx); void EmitDeviceMemoryBarrier(EmitContext& ctx); void EmitGetScc(EmitContext& ctx); @@ -85,7 +84,7 @@ Id EmitBufferAtomicAnd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id addres Id EmitBufferAtomicOr32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); Id EmitBufferAtomicXor32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); Id EmitBufferAtomicSwap32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); -Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp); +Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp, u32 index); Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp); void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 comp); void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, Id value); @@ -409,4 +408,7 @@ Id EmitWriteLane(EmitContext& ctx, Id value, Id write_value, u32 lane); Id EmitDataAppend(EmitContext& ctx, u32 gds_addr, u32 binding); Id EmitDataConsume(EmitContext& ctx, u32 gds_addr, u32 binding); +void EmitEmitVertex(EmitContext& ctx); +void EmitEmitPrimitive(EmitContext& ctx); + } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp index 283c9b16..c12e4997 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp @@ -41,6 +41,14 @@ void EmitDiscardCond(EmitContext& ctx, Id condition) { ctx.AddLabel(merge_label); } +void EmitEmitVertex(EmitContext& ctx) { + ctx.OpEmitVertex(); +} + +void EmitEmitPrimitive(EmitContext& ctx) { + ctx.OpEndPrimitive(); +} + void EmitEmitVertex(EmitContext& ctx, const IR::Value& stream) { throw NotImplementedException("Geometry streams"); } diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index 32abd8e7..f5b60d51 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -1,8 +1,10 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#include "common/assert.h" #include "common/div_ceil.h" #include "shader_recompiler/backend/spirv/spirv_emit_context.h" +#include "video_core/amdgpu/types.h" #include #include @@ -32,6 +34,19 @@ std::string_view StageName(Stage stage) { throw InvalidArgument("Invalid stage {}", u32(stage)); } +static constexpr u32 NumVertices(AmdGpu::GsOutputPrimitiveType type) { + switch (type) { + case AmdGpu::GsOutputPrimitiveType::PointList: + return 1u; + case AmdGpu::GsOutputPrimitiveType::LineStrip: + return 2u; + case AmdGpu::GsOutputPrimitiveType::TriangleStrip: + return 3u; + default: + UNREACHABLE(); + } +} + template void Name(EmitContext& ctx, Id object, std::string_view format_str, Args&&... args) { ctx.Name(object, fmt::format(fmt::runtime(format_str), StageName(ctx.stage), @@ -222,6 +237,7 @@ void EmitContext::DefineInputs() { Decorate(subgroup_local_invocation_id, spv::Decoration::Flat); } switch (stage) { + case Stage::Export: case Stage::Vertex: { vertex_index = DefineVariable(U32[1], spv::BuiltIn::VertexIndex, spv::StorageClass::Input); base_vertex = DefineVariable(U32[1], spv::BuiltIn::BaseVertex, spv::StorageClass::Input); @@ -290,6 +306,38 @@ void EmitContext::DefineInputs() { local_invocation_id = DefineVariable(U32[3], spv::BuiltIn::LocalInvocationId, spv::StorageClass::Input); break; + case Stage::Geometry: { + primitive_id = DefineVariable(U32[1], spv::BuiltIn::PrimitiveId, spv::StorageClass::Input); + const auto gl_per_vertex = + Name(TypeStruct(TypeVector(F32[1], 4), F32[1], TypeArray(F32[1], ConstU32(1u))), + "gl_PerVertex"); + MemberName(gl_per_vertex, 0, "gl_Position"); + MemberName(gl_per_vertex, 1, "gl_PointSize"); + MemberName(gl_per_vertex, 2, "gl_ClipDistance"); + MemberDecorate(gl_per_vertex, 0, spv::Decoration::BuiltIn, + static_cast(spv::BuiltIn::Position)); + MemberDecorate(gl_per_vertex, 1, spv::Decoration::BuiltIn, + static_cast(spv::BuiltIn::PointSize)); + MemberDecorate(gl_per_vertex, 2, spv::Decoration::BuiltIn, + static_cast(spv::BuiltIn::ClipDistance)); + Decorate(gl_per_vertex, spv::Decoration::Block); + const auto vertices_in = + TypeArray(gl_per_vertex, ConstU32(NumVertices(runtime_info.gs_info.out_primitive[0]))); + gl_in = Name(DefineVar(vertices_in, spv::StorageClass::Input), "gl_in"); + interfaces.push_back(gl_in); + + const auto num_params = runtime_info.gs_info.in_vertex_data_size / 4 - 1u; + for (int param_id = 0; param_id < num_params; ++param_id) { + const IR::Attribute param{IR::Attribute::Param0 + param_id}; + const Id type{ + TypeArray(F32[4], ConstU32(NumVertices(runtime_info.gs_info.out_primitive[0])))}; + const Id id{DefineInput(type, param_id)}; + Name(id, fmt::format("in_attr{}", param_id)); + input_params[param_id] = {id, input_f32, F32[1], 4}; + interfaces.push_back(id); + } + break; + } default: break; } @@ -297,6 +345,7 @@ void EmitContext::DefineInputs() { void EmitContext::DefineOutputs() { switch (stage) { + case Stage::Export: case Stage::Vertex: { output_position = DefineVariable(F32[4], spv::BuiltIn::Position, spv::StorageClass::Output); const bool has_extra_pos_stores = info.stores.Get(IR::Attribute::Position1) || @@ -338,6 +387,18 @@ void EmitContext::DefineOutputs() { interfaces.push_back(id); } break; + case Stage::Geometry: { + output_position = DefineVariable(F32[4], spv::BuiltIn::Position, spv::StorageClass::Output); + + for (u32 attr_id = 0; attr_id < runtime_info.gs_info.copy_data.num_attrs; attr_id++) { + const IR::Attribute param{IR::Attribute::Param0 + attr_id}; + const Id id{DefineOutput(F32[4], attr_id)}; + Name(id, fmt::format("out_attr{}", attr_id)); + output_params[attr_id] = {id, output_f32, F32[1], 4u}; + interfaces.push_back(id); + } + break; + } default: break; } diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.h b/src/shader_recompiler/backend/spirv/spirv_emit_context.h index 9db01994..147b4c84 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.h +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.h @@ -168,9 +168,12 @@ public: Id output_f32{}; Id output_s32{}; + Id gl_in{}; + boost::container::small_vector interfaces; Id output_position{}; + Id primitive_id{}; Id vertex_index{}; Id instance_id{}; Id push_data_block{}; diff --git a/src/shader_recompiler/frontend/copy_shader.cpp b/src/shader_recompiler/frontend/copy_shader.cpp new file mode 100644 index 00000000..a194aec9 --- /dev/null +++ b/src/shader_recompiler/frontend/copy_shader.cpp @@ -0,0 +1,65 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "shader_recompiler/frontend/copy_shader.h" +#include "shader_recompiler/frontend/decode.h" +#include "shader_recompiler/ir/attribute.h" + +namespace Shader { + +CopyShaderData ParseCopyShader(const std::span& code) { + Gcn::GcnCodeSlice code_slice{code.data(), code.data() + code.size()}; + Gcn::GcnDecodeContext decoder; + + constexpr u32 token_mov_vcchi = 0xBEEB03FF; + ASSERT_MSG(code[0] == token_mov_vcchi, "First instruction is not s_mov_b32 vcc_hi, #imm"); + + std::array offsets{}; + std::fill(offsets.begin(), offsets.end(), -1); + + CopyShaderData data{}; + Gcn::OperandField sgpr{}; + auto last_attr{IR::Attribute::Position0}; + s32 soffset{0}; + while (!code_slice.atEnd()) { + auto inst = decoder.decodeInstruction(code_slice); + switch (inst.opcode) { + case Gcn::Opcode::S_MOVK_I32: { + sgpr = inst.dst[0].field; + soffset = inst.control.sopk.simm; + break; + } + case Gcn::Opcode::EXP: { + const auto& exp = inst.control.exp; + const IR::Attribute semantic = static_cast(exp.target); + for (int i = 0; i < inst.src_count; ++i) { + const auto ofs = offsets[inst.src[i].code]; + if (ofs != -1) { + data.attr_map[ofs] = {semantic, i}; + if (semantic > last_attr) { + last_attr = semantic; + } + } + } + break; + } + case Gcn::Opcode::BUFFER_LOAD_DWORD: { + offsets[inst.src[1].code] = inst.control.mubuf.offset; + if (inst.src[3].field != Gcn::OperandField::ConstZero) { + ASSERT(inst.src[3].field == sgpr); + offsets[inst.src[1].code] += soffset; + } + break; + } + default: + break; + } + } + + if (last_attr != IR::Attribute::Position0) { + data.num_attrs = static_cast(last_attr) - static_cast(IR::Attribute::Param0) + 1; + } + return data; +} + +} // namespace Shader diff --git a/src/shader_recompiler/frontend/copy_shader.h b/src/shader_recompiler/frontend/copy_shader.h new file mode 100644 index 00000000..ca3e1ac3 --- /dev/null +++ b/src/shader_recompiler/frontend/copy_shader.h @@ -0,0 +1,21 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include +#include + +#include "common/types.h" +#include "shader_recompiler/ir/attribute.h" + +namespace Shader { + +struct CopyShaderData { + std::unordered_map> attr_map; + u32 num_attrs{0}; +}; + +CopyShaderData ParseCopyShader(const std::span& code); + +} // namespace Shader diff --git a/src/shader_recompiler/frontend/opcodes.h b/src/shader_recompiler/frontend/opcodes.h index cdc1e474..7390a394 100644 --- a/src/shader_recompiler/frontend/opcodes.h +++ b/src/shader_recompiler/frontend/opcodes.h @@ -2491,4 +2491,23 @@ enum class ImageAddrComponent : u32 { Clamp, }; +struct SendMsgSimm { + enum class Message : u32 { + Interrupt = 1, + Gs = 2, + GsDone = 3, + System = 15, + }; + + enum class GsOp : u32 { + Nop = 0, + Cut = 1, + Emit = 2, + EmitCut = 3, + }; + + Message msg : 4; + GsOp op : 2; +}; + } // namespace Shader::Gcn diff --git a/src/shader_recompiler/frontend/translate/data_share.cpp b/src/shader_recompiler/frontend/translate/data_share.cpp index f5fce311..a453023f 100644 --- a/src/shader_recompiler/frontend/translate/data_share.cpp +++ b/src/shader_recompiler/frontend/translate/data_share.cpp @@ -55,12 +55,6 @@ void Translator::EmitDataShare(const GcnInst& inst) { } } -// SOPP - -void Translator::S_BARRIER() { - ir.Barrier(); -} - // VOP2 void Translator::V_READFIRSTLANE_B32(const GcnInst& inst) { diff --git a/src/shader_recompiler/frontend/translate/scalar_alu.cpp b/src/shader_recompiler/frontend/translate/scalar_alu.cpp index 1e572a97..36c1ec85 100644 --- a/src/shader_recompiler/frontend/translate/scalar_alu.cpp +++ b/src/shader_recompiler/frontend/translate/scalar_alu.cpp @@ -540,14 +540,6 @@ void Translator::S_BREV_B32(const GcnInst& inst) { SetDst(inst.dst[0], ir.BitReverse(GetSrc(inst.src[0]))); } -void Translator::S_GETPC_B64(u32 pc, const GcnInst& inst) { - // This only really exists to let resource tracking pass know - // there is an inline cbuf. - const IR::ScalarReg dst{inst.dst[0].code}; - ir.SetScalarReg(dst, ir.Imm32(pc)); - ir.SetScalarReg(dst + 1, ir.Imm32(0)); -} - void Translator::S_AND_SAVEEXEC_B64(const GcnInst& inst) { // This instruction normally operates on 64-bit data (EXEC, VCC, SGPRs) // However here we flatten it to 1-bit EXEC and 1-bit VCC. For the destination diff --git a/src/shader_recompiler/frontend/translate/scalar_flow.cpp b/src/shader_recompiler/frontend/translate/scalar_flow.cpp new file mode 100644 index 00000000..ef8bab78 --- /dev/null +++ b/src/shader_recompiler/frontend/translate/scalar_flow.cpp @@ -0,0 +1,75 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "shader_recompiler/frontend/opcodes.h" +#include "shader_recompiler/frontend/translate/translate.h" + +namespace Shader::Gcn { + +void Translator::EmitFlowControl(u32 pc, const GcnInst& inst) { + switch (inst.opcode) { + case Opcode::S_BARRIER: + return S_BARRIER(); + case Opcode::S_TTRACEDATA: + LOG_WARNING(Render_Vulkan, "S_TTRACEDATA instruction!"); + return; + case Opcode::S_GETPC_B64: + return S_GETPC_B64(pc, inst); + case Opcode::S_WAITCNT: + case Opcode::S_NOP: + case Opcode::S_ENDPGM: + case Opcode::S_CBRANCH_EXECZ: + case Opcode::S_CBRANCH_SCC0: + case Opcode::S_CBRANCH_SCC1: + case Opcode::S_CBRANCH_VCCNZ: + case Opcode::S_CBRANCH_VCCZ: + case Opcode::S_CBRANCH_EXECNZ: + case Opcode::S_BRANCH: + return; + case Opcode::S_SENDMSG: + S_SENDMSG(inst); + return; + default: + UNREACHABLE(); + } +} + +void Translator::S_BARRIER() { + ir.Barrier(); +} + +void Translator::S_GETPC_B64(u32 pc, const GcnInst& inst) { + // This only really exists to let resource tracking pass know + // there is an inline cbuf. + const IR::ScalarReg dst{inst.dst[0].code}; + ir.SetScalarReg(dst, ir.Imm32(pc)); + ir.SetScalarReg(dst + 1, ir.Imm32(0)); +} + +void Translator::S_SENDMSG(const GcnInst& inst) { + const auto& simm = reinterpret_cast(inst.control.sopp.simm); + switch (simm.msg) { + case SendMsgSimm::Message::Gs: { + switch (simm.op) { + case SendMsgSimm::GsOp::Nop: + break; + case SendMsgSimm::GsOp::Cut: + ir.EmitPrimitive(); + break; + case SendMsgSimm::GsOp::Emit: + ir.EmitVertex(); + break; + default: + UNREACHABLE(); + } + break; + } + case SendMsgSimm::Message::GsDone: { + break; + } + default: + UNREACHABLE(); + } +} + +} // namespace Shader::Gcn diff --git a/src/shader_recompiler/frontend/translate/translate.cpp b/src/shader_recompiler/frontend/translate/translate.cpp index cfef5858..bae6681c 100644 --- a/src/shader_recompiler/frontend/translate/translate.cpp +++ b/src/shader_recompiler/frontend/translate/translate.cpp @@ -10,6 +10,7 @@ #include "shader_recompiler/info.h" #include "shader_recompiler/runtime_info.h" #include "video_core/amdgpu/resource.h" +#include "video_core/amdgpu/types.h" #define MAGIC_ENUM_RANGE_MIN 0 #define MAGIC_ENUM_RANGE_MAX 1515 @@ -35,6 +36,7 @@ void Translator::EmitPrologue() { IR::VectorReg dst_vreg = IR::VectorReg::V0; switch (info.stage) { case Stage::Vertex: + case Stage::Export: // v0: vertex ID, always present ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::VertexId)); // v1: instance ID, step rate 0 @@ -76,6 +78,20 @@ void Translator::EmitPrologue() { ir.SetScalarReg(dst_sreg++, ir.GetAttributeU32(IR::Attribute::WorkgroupId, 2)); } break; + case Stage::Geometry: + switch (runtime_info.gs_info.out_primitive[0]) { + case AmdGpu::GsOutputPrimitiveType::TriangleStrip: + ir.SetVectorReg(IR::VectorReg::V3, ir.Imm32(2u)); // vertex 2 + [[fallthrough]]; + case AmdGpu::GsOutputPrimitiveType::LineStrip: + ir.SetVectorReg(IR::VectorReg::V1, ir.Imm32(1u)); // vertex 1 + [[fallthrough]]; + default: + ir.SetVectorReg(IR::VectorReg::V0, ir.Imm32(0u)); // vertex 0 + break; + } + ir.SetVectorReg(IR::VectorReg::V2, ir.GetAttributeU32(IR::Attribute::PrimitiveId)); + break; default: throw NotImplementedException("Unknown shader stage"); } @@ -359,7 +375,7 @@ void Translator::EmitFetch(const GcnInst& inst) { if (!std::filesystem::exists(dump_dir)) { std::filesystem::create_directories(dump_dir); } - const auto filename = fmt::format("vs_{:#018x}_fetch.bin", info.pgm_hash); + const auto filename = fmt::format("vs_{:#018x}.fetch.bin", info.pgm_hash); const auto file = IOFile{dump_dir / filename, FileAccessMode::Write}; file.WriteRaw(code, fetch_size); } @@ -424,31 +440,6 @@ void Translator::EmitFetch(const GcnInst& inst) { } } -void Translator::EmitFlowControl(u32 pc, const GcnInst& inst) { - switch (inst.opcode) { - case Opcode::S_BARRIER: - return S_BARRIER(); - case Opcode::S_TTRACEDATA: - LOG_WARNING(Render_Vulkan, "S_TTRACEDATA instruction!"); - return; - case Opcode::S_GETPC_B64: - return S_GETPC_B64(pc, inst); - case Opcode::S_WAITCNT: - case Opcode::S_NOP: - case Opcode::S_ENDPGM: - case Opcode::S_CBRANCH_EXECZ: - case Opcode::S_CBRANCH_SCC0: - case Opcode::S_CBRANCH_SCC1: - case Opcode::S_CBRANCH_VCCNZ: - case Opcode::S_CBRANCH_VCCZ: - case Opcode::S_CBRANCH_EXECNZ: - case Opcode::S_BRANCH: - return; - default: - UNREACHABLE(); - } -} - void Translator::LogMissingOpcode(const GcnInst& inst) { LOG_ERROR(Render_Recompiler, "Unknown opcode {} ({}, category = {})", magic_enum::enum_name(inst.opcode), u32(inst.opcode), @@ -467,7 +458,7 @@ void Translate(IR::Block* block, u32 pc, std::span inst_list, Inf // Special case for emitting fetch shader. if (inst.opcode == Opcode::S_SWAPPC_B64) { - ASSERT(info.stage == Stage::Vertex); + ASSERT(info.stage == Stage::Vertex || info.stage == Stage::Export); translator.EmitFetch(inst); continue; } diff --git a/src/shader_recompiler/frontend/translate/translate.h b/src/shader_recompiler/frontend/translate/translate.h index 6ce9ef10..c7758828 100644 --- a/src/shader_recompiler/frontend/translate/translate.h +++ b/src/shader_recompiler/frontend/translate/translate.h @@ -116,6 +116,7 @@ public: // SOPP void S_BARRIER(); + void S_SENDMSG(const GcnInst& inst); // Scalar Memory // SMRD diff --git a/src/shader_recompiler/frontend/translate/vector_memory.cpp b/src/shader_recompiler/frontend/translate/vector_memory.cpp index c0957c3d..35de7595 100644 --- a/src/shader_recompiler/frontend/translate/vector_memory.cpp +++ b/src/shader_recompiler/frontend/translate/vector_memory.cpp @@ -160,9 +160,19 @@ void Translator::EmitVectorMemory(const GcnInst& inst) { void Translator::BUFFER_LOAD(u32 num_dwords, bool is_typed, const GcnInst& inst) { const auto& mtbuf = inst.control.mtbuf; + const bool is_ring = mtbuf.glc && mtbuf.slc; const IR::VectorReg vaddr{inst.src[0].code}; const IR::ScalarReg sharp{inst.src[2].code * 4}; + const IR::Value soffset{GetSrc(inst.src[3])}; + if (info.stage != Stage::Geometry) { + ASSERT_MSG(soffset.IsImmediate() && soffset.U32() == 0, + "Non immediate offset not supported"); + } + const IR::Value address = [&] -> IR::Value { + if (is_ring) { + return ir.CompositeConstruct(ir.GetVectorReg(vaddr), soffset); + } if (mtbuf.idxen && mtbuf.offen) { return ir.CompositeConstruct(ir.GetVectorReg(vaddr), ir.GetVectorReg(vaddr + 1)); } @@ -171,13 +181,12 @@ void Translator::BUFFER_LOAD(u32 num_dwords, bool is_typed, const GcnInst& inst) } return {}; }(); - const IR::Value soffset{GetSrc(inst.src[3])}; - ASSERT_MSG(soffset.IsImmediate() && soffset.U32() == 0, "Non immediate offset not supported"); - IR::BufferInstInfo info{}; - info.index_enable.Assign(mtbuf.idxen); - info.offset_enable.Assign(mtbuf.offen); - info.inst_offset.Assign(mtbuf.offset); + IR::BufferInstInfo buffer_info{}; + buffer_info.index_enable.Assign(mtbuf.idxen); + buffer_info.offset_enable.Assign(mtbuf.offen); + buffer_info.inst_offset.Assign(mtbuf.offset); + buffer_info.ring_access.Assign(is_ring); if (is_typed) { const auto dmft = static_cast(mtbuf.dfmt); const auto nfmt = static_cast(mtbuf.nfmt); @@ -190,7 +199,7 @@ void Translator::BUFFER_LOAD(u32 num_dwords, bool is_typed, const GcnInst& inst) const IR::Value handle = ir.CompositeConstruct(ir.GetScalarReg(sharp), ir.GetScalarReg(sharp + 1), ir.GetScalarReg(sharp + 2), ir.GetScalarReg(sharp + 3)); - const IR::Value value = ir.LoadBuffer(num_dwords, handle, address, info); + const IR::Value value = ir.LoadBuffer(num_dwords, handle, address, buffer_info); const IR::VectorReg dst_reg{inst.src[1].code}; if (num_dwords == 1) { ir.SetVectorReg(dst_reg, IR::U32{value}); @@ -230,9 +239,20 @@ void Translator::BUFFER_LOAD_FORMAT(u32 num_dwords, const GcnInst& inst) { void Translator::BUFFER_STORE(u32 num_dwords, bool is_typed, const GcnInst& inst) { const auto& mtbuf = inst.control.mtbuf; + const bool is_ring = mtbuf.glc && mtbuf.slc; const IR::VectorReg vaddr{inst.src[0].code}; const IR::ScalarReg sharp{inst.src[2].code * 4}; - const IR::Value address = [&] -> IR::Value { + const IR::Value soffset{GetSrc(inst.src[3])}; + + if (info.stage != Stage::Export && info.stage != Stage::Geometry) { + ASSERT_MSG(soffset.IsImmediate() && soffset.U32() == 0, + "Non immediate offset not supported"); + } + + IR::Value address = [&] -> IR::Value { + if (is_ring) { + return ir.CompositeConstruct(ir.GetVectorReg(vaddr), soffset); + } if (mtbuf.idxen && mtbuf.offen) { return ir.CompositeConstruct(ir.GetVectorReg(vaddr), ir.GetVectorReg(vaddr + 1)); } @@ -241,13 +261,12 @@ void Translator::BUFFER_STORE(u32 num_dwords, bool is_typed, const GcnInst& inst } return {}; }(); - const IR::Value soffset{GetSrc(inst.src[3])}; - ASSERT_MSG(soffset.IsImmediate() && soffset.U32() == 0, "Non immediate offset not supported"); - IR::BufferInstInfo info{}; - info.index_enable.Assign(mtbuf.idxen); - info.offset_enable.Assign(mtbuf.offen); - info.inst_offset.Assign(mtbuf.offset); + IR::BufferInstInfo buffer_info{}; + buffer_info.index_enable.Assign(mtbuf.idxen); + buffer_info.offset_enable.Assign(mtbuf.offen); + buffer_info.inst_offset.Assign(mtbuf.offset); + buffer_info.ring_access.Assign(is_ring); if (is_typed) { const auto dmft = static_cast(mtbuf.dfmt); const auto nfmt = static_cast(mtbuf.nfmt); @@ -278,7 +297,7 @@ void Translator::BUFFER_STORE(u32 num_dwords, bool is_typed, const GcnInst& inst const IR::Value handle = ir.CompositeConstruct(ir.GetScalarReg(sharp), ir.GetScalarReg(sharp + 1), ir.GetScalarReg(sharp + 2), ir.GetScalarReg(sharp + 3)); - ir.StoreBuffer(num_dwords, handle, address, value, info); + ir.StoreBuffer(num_dwords, handle, address, value, buffer_info); } void Translator::BUFFER_STORE_FORMAT(u32 num_dwords, const GcnInst& inst) { diff --git a/src/shader_recompiler/info.h b/src/shader_recompiler/info.h index 04268f6a..7fc7be75 100644 --- a/src/shader_recompiler/info.h +++ b/src/shader_recompiler/info.h @@ -225,8 +225,9 @@ struct Info { } void AddBindings(Backend::Bindings& bnd) const { - bnd.buffer += buffers.size() + texture_buffers.size(); - bnd.unified += bnd.buffer + images.size() + samplers.size(); + const auto total_buffers = buffers.size() + texture_buffers.size(); + bnd.buffer += total_buffers; + bnd.unified += total_buffers + images.size() + samplers.size(); bnd.user_data += ud_mask.NumRegs(); } diff --git a/src/shader_recompiler/ir/attribute.cpp b/src/shader_recompiler/ir/attribute.cpp index 540cb8af..e219dfb6 100644 --- a/src/shader_recompiler/ir/attribute.cpp +++ b/src/shader_recompiler/ir/attribute.cpp @@ -6,14 +6,6 @@ namespace Shader::IR { -bool IsParam(Attribute attribute) noexcept { - return attribute >= Attribute::Param0 && attribute <= Attribute::Param31; -} - -bool IsMrt(Attribute attribute) noexcept { - return attribute >= Attribute::RenderTarget0 && attribute <= Attribute::RenderTarget7; -} - std::string NameOf(Attribute attribute) { switch (attribute) { case Attribute::RenderTarget0: diff --git a/src/shader_recompiler/ir/attribute.h b/src/shader_recompiler/ir/attribute.h index 2c67411f..0890e88f 100644 --- a/src/shader_recompiler/ir/attribute.h +++ b/src/shader_recompiler/ir/attribute.h @@ -81,9 +81,17 @@ constexpr size_t NumAttributes = static_cast(Attribute::Max); constexpr size_t NumRenderTargets = 8; constexpr size_t NumParams = 32; -[[nodiscard]] bool IsParam(Attribute attribute) noexcept; +constexpr bool IsPosition(Attribute attribute) noexcept { + return attribute >= Attribute::Position0 && attribute <= Attribute::Position3; +} -[[nodiscard]] bool IsMrt(Attribute attribute) noexcept; +constexpr bool IsParam(Attribute attribute) noexcept { + return attribute >= Attribute::Param0 && attribute <= Attribute::Param31; +} + +constexpr bool IsMrt(Attribute attribute) noexcept { + return attribute >= Attribute::RenderTarget0 && attribute <= Attribute::RenderTarget7; +} [[nodiscard]] std::string NameOf(Attribute attribute); diff --git a/src/shader_recompiler/ir/ir_emitter.cpp b/src/shader_recompiler/ir/ir_emitter.cpp index 412c9581..01336c56 100644 --- a/src/shader_recompiler/ir/ir_emitter.cpp +++ b/src/shader_recompiler/ir/ir_emitter.cpp @@ -249,8 +249,8 @@ void IREmitter::SetM0(const U32& value) { Inst(Opcode::SetM0, value); } -F32 IREmitter::GetAttribute(IR::Attribute attribute, u32 comp) { - return Inst(Opcode::GetAttribute, attribute, Imm32(comp)); +F32 IREmitter::GetAttribute(IR::Attribute attribute, u32 comp, u32 index) { + return Inst(Opcode::GetAttribute, attribute, Imm32(comp), Imm32(index)); } U32 IREmitter::GetAttributeU32(IR::Attribute attribute, u32 comp) { @@ -1553,4 +1553,12 @@ void IREmitter::ImageWrite(const Value& handle, const Value& coords, const Value Inst(Opcode::ImageWrite, Flags{info}, handle, coords, color); } +void IREmitter::EmitVertex() { + Inst(Opcode::EmitVertex); +} + +void IREmitter::EmitPrimitive() { + Inst(Opcode::EmitPrimitive); +} + } // namespace Shader::IR diff --git a/src/shader_recompiler/ir/ir_emitter.h b/src/shader_recompiler/ir/ir_emitter.h index 958f2e88..8657c430 100644 --- a/src/shader_recompiler/ir/ir_emitter.h +++ b/src/shader_recompiler/ir/ir_emitter.h @@ -78,7 +78,7 @@ public: [[nodiscard]] U1 Condition(IR::Condition cond); - [[nodiscard]] F32 GetAttribute(Attribute attribute, u32 comp = 0); + [[nodiscard]] F32 GetAttribute(Attribute attribute, u32 comp = 0, u32 index = 0); [[nodiscard]] U32 GetAttributeU32(Attribute attribute, u32 comp = 0); void SetAttribute(Attribute attribute, const F32& value, u32 comp = 0); @@ -310,6 +310,9 @@ public: void ImageWrite(const Value& handle, const Value& coords, const Value& color, TextureInstInfo info); + void EmitVertex(); + void EmitPrimitive(); + private: IR::Block::iterator insertion_point; diff --git a/src/shader_recompiler/ir/microinstruction.cpp b/src/shader_recompiler/ir/microinstruction.cpp index 601c453d..8d606a6c 100644 --- a/src/shader_recompiler/ir/microinstruction.cpp +++ b/src/shader_recompiler/ir/microinstruction.cpp @@ -89,6 +89,8 @@ bool Inst::MayHaveSideEffects() const noexcept { case Opcode::ImageAtomicOr32: case Opcode::ImageAtomicXor32: case Opcode::ImageAtomicExchange32: + case Opcode::EmitVertex: + case Opcode::EmitPrimitive: return true; default: return false; diff --git a/src/shader_recompiler/ir/opcodes.inc b/src/shader_recompiler/ir/opcodes.inc index 41cc553f..c69dc90a 100644 --- a/src/shader_recompiler/ir/opcodes.inc +++ b/src/shader_recompiler/ir/opcodes.inc @@ -24,6 +24,10 @@ OPCODE(Barrier, Void, OPCODE(WorkgroupMemoryBarrier, Void, ) OPCODE(DeviceMemoryBarrier, Void, ) +// Geometry shader specific +OPCODE(EmitVertex, Void, ) +OPCODE(EmitPrimitive, Void, ) + // Shared memory operations OPCODE(LoadSharedU32, U32, U32, ) OPCODE(LoadSharedU64, U32x2, U32, ) @@ -49,7 +53,7 @@ OPCODE(GetVectorRegister, U32, Vect OPCODE(SetVectorRegister, Void, VectorReg, U32, ) OPCODE(GetGotoVariable, U1, U32, ) OPCODE(SetGotoVariable, Void, U32, U1, ) -OPCODE(GetAttribute, F32, Attribute, U32, ) +OPCODE(GetAttribute, F32, Attribute, U32, U32, ) OPCODE(GetAttributeU32, U32, Attribute, U32, ) OPCODE(SetAttribute, Void, Attribute, F32, U32, ) diff --git a/src/shader_recompiler/ir/passes/ir_passes.h b/src/shader_recompiler/ir/passes/ir_passes.h index 7e2b962b..e6e389d1 100644 --- a/src/shader_recompiler/ir/passes/ir_passes.h +++ b/src/shader_recompiler/ir/passes/ir_passes.h @@ -15,5 +15,7 @@ void ConstantPropagationPass(IR::BlockList& program); void ResourceTrackingPass(IR::Program& program); void CollectShaderInfoPass(IR::Program& program); void LowerSharedMemToRegisters(IR::Program& program); +void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtime_info, + Stage stage); } // namespace Shader::Optimization diff --git a/src/shader_recompiler/ir/passes/ring_access_elimination.cpp b/src/shader_recompiler/ir/passes/ring_access_elimination.cpp new file mode 100644 index 00000000..857921b1 --- /dev/null +++ b/src/shader_recompiler/ir/passes/ring_access_elimination.cpp @@ -0,0 +1,110 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "shader_recompiler/frontend/translate/translate.h" +#include "shader_recompiler/ir/opcodes.h" +#include "shader_recompiler/ir/program.h" +#include "shader_recompiler/ir/reg.h" +#include "shader_recompiler/recompiler.h" + +namespace Shader::Optimization { + +void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtime_info, + Stage stage) { + const auto& ForEachInstruction = [&](auto func) { + for (IR::Block* block : program.blocks) { + for (IR::Inst& inst : block->Instructions()) { + IR::IREmitter ir{*block, IR::Block::InstructionList::s_iterator_to(inst)}; + func(ir, inst); + } + } + }; + + switch (stage) { + case Stage::Export: { + ForEachInstruction([=](IR::IREmitter& ir, IR::Inst& inst) { + const auto opcode = inst.GetOpcode(); + switch (opcode) { + case IR::Opcode::StoreBufferU32: { + if (!inst.Flags().ring_access) { + break; + } + + const auto offset = inst.Flags().inst_offset.Value(); + ASSERT(offset < runtime_info.es_info.vertex_data_size * 4); + const auto data = ir.BitCast(IR::U32{inst.Arg(2)}); + const auto attrib = + IR::Value{offset < 16 ? IR::Attribute::Position0 + : IR::Attribute::Param0 + (offset / 16 - 1)}; + const auto comp = (offset / 4) % 4; + + inst.ReplaceOpcode(IR::Opcode::SetAttribute); + inst.ClearArgs(); + inst.SetArg(0, attrib); + inst.SetArg(1, data); + inst.SetArg(2, ir.Imm32(comp)); + break; + } + default: + break; + } + }); + break; + } + case Stage::Geometry: { + ForEachInstruction([&](IR::IREmitter& ir, IR::Inst& inst) { + const auto opcode = inst.GetOpcode(); + switch (opcode) { + case IR::Opcode::LoadBufferU32: { + if (!inst.Flags().ring_access) { + break; + } + + const auto shl_inst = inst.Arg(1).TryInstRecursive(); + const auto vertex_id = shl_inst->Arg(0).Resolve().U32() >> 2; + const auto offset = inst.Arg(1).TryInstRecursive()->Arg(1); + const auto bucket = offset.Resolve().U32() / 256u; + const auto attrib = bucket < 4 ? IR::Attribute::Position0 + : IR::Attribute::Param0 + (bucket / 4 - 1); + const auto comp = bucket % 4; + + auto attr_value = ir.GetAttribute(attrib, comp, vertex_id); + inst.ReplaceOpcode(IR::Opcode::BitCastU32F32); + inst.ClearArgs(); + inst.SetArg(0, attr_value); + break; + } + case IR::Opcode::StoreBufferU32: { + if (!inst.Flags().ring_access) { + break; + } + + const auto offset = inst.Flags().inst_offset.Value(); + const auto data = ir.BitCast(IR::U32{inst.Arg(2)}); + const auto comp_ofs = runtime_info.gs_info.output_vertices * 4u; + const auto output_size = comp_ofs * runtime_info.gs_info.out_vertex_data_size; + + const auto vc_read_ofs = (((offset / comp_ofs) * comp_ofs) % output_size) * 16u; + const auto& it = runtime_info.gs_info.copy_data.attr_map.find(vc_read_ofs); + ASSERT(it != runtime_info.gs_info.copy_data.attr_map.cend()); + const auto& [attr, comp] = it->second; + + inst.ReplaceOpcode(IR::Opcode::SetAttribute); + inst.ClearArgs(); + inst.SetArg(0, IR::Value{attr}); + inst.SetArg(1, data); + inst.SetArg(2, ir.Imm32(comp)); + break; + } + default: + break; + } + }); + break; + } + default: + break; + } +} + +} // namespace Shader::Optimization diff --git a/src/shader_recompiler/ir/reg.h b/src/shader_recompiler/ir/reg.h index 4783d08e..9ec77e5f 100644 --- a/src/shader_recompiler/ir/reg.h +++ b/src/shader_recompiler/ir/reg.h @@ -7,7 +7,6 @@ #include "common/bit_field.h" #include "common/enum.h" #include "common/types.h" -#include "video_core/amdgpu/pixel_format.h" namespace Shader::IR { @@ -67,6 +66,7 @@ union BufferInstInfo { BitField<0, 1, u32> index_enable; BitField<1, 1, u32> offset_enable; BitField<2, 12, u32> inst_offset; + BitField<14, 1, u32> ring_access; // global + system coherency }; enum class ScalarReg : u32 { diff --git a/src/shader_recompiler/recompiler.cpp b/src/shader_recompiler/recompiler.cpp index 12dbc6c1..e13e5d00 100644 --- a/src/shader_recompiler/recompiler.cpp +++ b/src/shader_recompiler/recompiler.cpp @@ -63,6 +63,7 @@ IR::Program TranslateProgram(std::span code, Pools& pools, Info& info if (program.info.stage != Stage::Compute) { Shader::Optimization::LowerSharedMemToRegisters(program); } + Shader::Optimization::RingAccessElimination(program, runtime_info, program.info.stage); Shader::Optimization::ResourceTrackingPass(program); Shader::Optimization::IdentityRemovalPass(program.blocks); Shader::Optimization::DeadCodeEliminationPass(program); diff --git a/src/shader_recompiler/runtime_info.h b/src/shader_recompiler/runtime_info.h index 115bbe10..8c0838c9 100644 --- a/src/shader_recompiler/runtime_info.h +++ b/src/shader_recompiler/runtime_info.h @@ -8,6 +8,8 @@ #include "common/assert.h" #include "common/types.h" +#include "frontend/copy_shader.h" +#include "video_core/amdgpu/types.h" namespace Shader { @@ -26,13 +28,11 @@ constexpr u32 MaxStageTypes = 6; return static_cast(index); } -enum class MrtSwizzle : u8 { - Identity = 0, - Alt = 1, - Reverse = 2, - ReverseAlt = 3, +struct ExportRuntimeInfo { + u32 vertex_data_size; + + auto operator<=>(const ExportRuntimeInfo&) const noexcept = default; }; -static constexpr u32 MaxColorBuffers = 8; enum class VsOutput : u8 { None, @@ -70,6 +70,33 @@ struct VertexRuntimeInfo { } }; +static constexpr auto GsMaxOutputStreams = 4u; +using GsOutputPrimTypes = std::array; +struct GeometryRuntimeInfo { + u32 num_invocations{}; + u32 output_vertices{}; + u32 in_vertex_data_size{}; + u32 out_vertex_data_size{}; + AmdGpu::PrimitiveType in_primitive; + GsOutputPrimTypes out_primitive; + CopyShaderData copy_data; + + bool operator==(const GeometryRuntimeInfo& other) const noexcept { + return num_invocations && other.num_invocations && + output_vertices == other.output_vertices && in_primitive == other.in_primitive && + std::ranges::equal(out_primitive, other.out_primitive) && + std::ranges::equal(copy_data.attr_map, other.copy_data.attr_map); + } +}; + +enum class MrtSwizzle : u8 { + Identity = 0, + Alt = 1, + Reverse = 2, + ReverseAlt = 3, +}; +static constexpr u32 MaxColorBuffers = 8; + struct FragmentRuntimeInfo { struct PsInput { u8 param_index; @@ -114,7 +141,9 @@ struct RuntimeInfo { u32 num_user_data; u32 num_input_vgprs; u32 num_allocated_vgprs; + ExportRuntimeInfo es_info; VertexRuntimeInfo vs_info; + GeometryRuntimeInfo gs_info; FragmentRuntimeInfo fs_info; ComputeRuntimeInfo cs_info; @@ -128,6 +157,10 @@ struct RuntimeInfo { return vs_info == other.vs_info; case Stage::Compute: return cs_info == other.cs_info; + case Stage::Export: + return es_info == other.es_info; + case Stage::Geometry: + return gs_info == other.gs_info; default: return true; } diff --git a/src/video_core/amdgpu/liverpool.h b/src/video_core/amdgpu/liverpool.h index 7cc7e5a9..508420bc 100644 --- a/src/video_core/amdgpu/liverpool.h +++ b/src/video_core/amdgpu/liverpool.h @@ -19,6 +19,7 @@ #include "common/types.h" #include "common/unique_function.h" #include "shader_recompiler/params.h" +#include "types.h" #include "video_core/amdgpu/pixel_format.h" #include "video_core/amdgpu/resource.h" @@ -842,26 +843,6 @@ struct Liverpool { } }; - enum class PrimitiveType : u32 { - None = 0, - PointList = 1, - LineList = 2, - LineStrip = 3, - TriangleList = 4, - TriangleFan = 5, - TriangleStrip = 6, - PatchPrimitive = 9, - AdjLineList = 10, - AdjLineStrip = 11, - AdjTriangleList = 12, - AdjTriangleStrip = 13, - RectList = 17, - LineLoop = 18, - QuadList = 19, - QuadStrip = 20, - Polygon = 21, - }; - enum ContextRegs : u32 { DbZInfo = 0xA010, CbColor0Base = 0xA318, @@ -936,7 +917,12 @@ struct Liverpool { }; union ShaderStageEnable { - u32 raw; + enum VgtStages : u32 { + Vs = 0u, // always enabled + EsGs = 0xB0u, + }; + + VgtStages raw; BitField<0, 2, u32> ls_en; BitField<2, 1, u32> hs_en; BitField<3, 2, u32> es_en; @@ -962,6 +948,81 @@ struct Liverpool { } }; + union GsInstances { + u32 raw; + struct { + u32 enable : 2; + u32 count : 6; + }; + + bool IsEnabled() const { + return enable && count > 0; + } + }; + + union GsOutPrimitiveType { + u32 raw; + struct { + GsOutputPrimitiveType outprim_type : 6; + GsOutputPrimitiveType outprim_type1 : 6; + GsOutputPrimitiveType outprim_type2 : 6; + GsOutputPrimitiveType outprim_type3 : 6; + u32 reserved : 3; + u32 unique_type_per_stream : 1; + }; + + GsOutputPrimitiveType GetPrimitiveType(u32 stream) const { + if (unique_type_per_stream == 0) { + return outprim_type; + } + + switch (stream) { + case 0: + return outprim_type; + case 1: + return outprim_type1; + case 2: + return outprim_type2; + case 3: + return outprim_type3; + default: + UNREACHABLE(); + } + } + }; + + union GsMode { + u32 raw; + BitField<0, 3, u32> mode; + BitField<3, 2, u32> cut_mode; + BitField<22, 2, u32> onchip; + }; + + union StreamOutConfig { + u32 raw; + struct { + u32 streamout_0_en : 1; + u32 streamout_1_en : 1; + u32 streamout_2_en : 1; + u32 streamout_3_en : 1; + u32 rast_stream : 3; + u32 : 1; + u32 rast_stream_mask : 4; + u32 : 19; + u32 use_rast_stream_mask : 1; + }; + }; + + union StreamOutBufferConfig { + u32 raw; + struct { + u32 stream_0_buf_en : 4; + u32 stream_1_buf_en : 4; + u32 stream_2_buf_en : 4; + u32 stream_3_buf_en : 4; + }; + }; + union Eqaa { u32 raw; BitField<0, 1, u32> max_anchor_samples; @@ -1053,9 +1114,13 @@ struct Liverpool { PolygonControl polygon_control; ViewportControl viewport_control; VsOutputControl vs_output_control; - INSERT_PADDING_WORDS(0xA292 - 0xA207 - 1); + INSERT_PADDING_WORDS(0xA290 - 0xA207 - 1); + GsMode vgt_gs_mode; + INSERT_PADDING_WORDS(1); ModeControl mode_control; - INSERT_PADDING_WORDS(0xA29D - 0xA292 - 1); + INSERT_PADDING_WORDS(8); + GsOutPrimitiveType vgt_gs_out_prim_type; + INSERT_PADDING_WORDS(1); u32 index_size; u32 max_index_size; IndexBufferType index_buffer_type; @@ -1066,11 +1131,21 @@ struct Liverpool { INSERT_PADDING_WORDS(0xA2A8 - 0xA2A5 - 1); u32 vgt_instance_step_rate_0; u32 vgt_instance_step_rate_1; - INSERT_PADDING_WORDS(0xA2D5 - 0xA2A9 - 1); + INSERT_PADDING_WORDS(0xA2AB - 0xA2A9 - 1); + u32 vgt_esgs_ring_itemsize; + u32 vgt_gsvs_ring_itemsize; + INSERT_PADDING_WORDS(0xA2CE - 0xA2AC - 1); + BitField<0, 11, u32> vgt_gs_max_vert_out; + INSERT_PADDING_WORDS(0xA2D5 - 0xA2CE - 1); ShaderStageEnable stage_enable; - INSERT_PADDING_WORDS(9); + INSERT_PADDING_WORDS(1); + u32 vgt_gs_vert_itemsize[4]; + INSERT_PADDING_WORDS(4); PolygonOffset poly_offset; - INSERT_PADDING_WORDS(0xA2F8 - 0xA2DF - 5); + GsInstances vgt_gs_instance_cnt; + StreamOutConfig vgt_strmout_config; + StreamOutBufferConfig vgt_strmout_buffer_config; + INSERT_PADDING_WORDS(0xA2F8 - 0xA2E6 - 1); AaConfig aa_config; INSERT_PADDING_WORDS(0xA318 - 0xA2F8 - 1); ColorBuffer color_buffers[NumColorBuffers]; @@ -1291,15 +1366,24 @@ static_assert(GFX6_3D_REG_INDEX(color_control) == 0xA202); static_assert(GFX6_3D_REG_INDEX(clipper_control) == 0xA204); static_assert(GFX6_3D_REG_INDEX(viewport_control) == 0xA206); static_assert(GFX6_3D_REG_INDEX(vs_output_control) == 0xA207); +static_assert(GFX6_3D_REG_INDEX(vgt_gs_mode) == 0xA290); static_assert(GFX6_3D_REG_INDEX(mode_control) == 0xA292); +static_assert(GFX6_3D_REG_INDEX(vgt_gs_out_prim_type) == 0xA29B); static_assert(GFX6_3D_REG_INDEX(index_size) == 0xA29D); static_assert(GFX6_3D_REG_INDEX(index_buffer_type) == 0xA29F); static_assert(GFX6_3D_REG_INDEX(enable_primitive_id) == 0xA2A1); static_assert(GFX6_3D_REG_INDEX(enable_primitive_restart) == 0xA2A5); static_assert(GFX6_3D_REG_INDEX(vgt_instance_step_rate_0) == 0xA2A8); static_assert(GFX6_3D_REG_INDEX(vgt_instance_step_rate_1) == 0xA2A9); +static_assert(GFX6_3D_REG_INDEX(vgt_esgs_ring_itemsize) == 0xA2AB); +static_assert(GFX6_3D_REG_INDEX(vgt_gsvs_ring_itemsize) == 0xA2AC); +static_assert(GFX6_3D_REG_INDEX(vgt_gs_max_vert_out) == 0xA2CE); static_assert(GFX6_3D_REG_INDEX(stage_enable) == 0xA2D5); +static_assert(GFX6_3D_REG_INDEX(vgt_gs_vert_itemsize[0]) == 0xA2D7); static_assert(GFX6_3D_REG_INDEX(poly_offset) == 0xA2DF); +static_assert(GFX6_3D_REG_INDEX(vgt_gs_instance_cnt) == 0xA2E4); +static_assert(GFX6_3D_REG_INDEX(vgt_strmout_config) == 0xA2E5); +static_assert(GFX6_3D_REG_INDEX(vgt_strmout_buffer_config) == 0xA2E6); static_assert(GFX6_3D_REG_INDEX(aa_config) == 0xA2F8); static_assert(GFX6_3D_REG_INDEX(color_buffers[0].base_address) == 0xA318); static_assert(GFX6_3D_REG_INDEX(color_buffers[0].pitch) == 0xA319); diff --git a/src/video_core/amdgpu/pixel_format.h b/src/video_core/amdgpu/pixel_format.h index 88da4963..e83313ea 100644 --- a/src/video_core/amdgpu/pixel_format.h +++ b/src/video_core/amdgpu/pixel_format.h @@ -6,78 +6,10 @@ #include #include #include "common/types.h" +#include "video_core/amdgpu/types.h" namespace AmdGpu { -// Table 8.13 Data and Image Formats [Sea Islands Series Instruction Set Architecture] -enum class DataFormat : u32 { - FormatInvalid = 0, - Format8 = 1, - Format16 = 2, - Format8_8 = 3, - Format32 = 4, - Format16_16 = 5, - Format10_11_11 = 6, - Format11_11_10 = 7, - Format10_10_10_2 = 8, - Format2_10_10_10 = 9, - Format8_8_8_8 = 10, - Format32_32 = 11, - Format16_16_16_16 = 12, - Format32_32_32 = 13, - Format32_32_32_32 = 14, - Format5_6_5 = 16, - Format1_5_5_5 = 17, - Format5_5_5_1 = 18, - Format4_4_4_4 = 19, - Format8_24 = 20, - Format24_8 = 21, - FormatX24_8_32 = 22, - FormatGB_GR = 32, - FormatBG_RG = 33, - Format5_9_9_9 = 34, - FormatBc1 = 35, - FormatBc2 = 36, - FormatBc3 = 37, - FormatBc4 = 38, - FormatBc5 = 39, - FormatBc6 = 40, - FormatBc7 = 41, - FormatFmask8_1 = 47, - FormatFmask8_2 = 48, - FormatFmask8_4 = 49, - FormatFmask16_1 = 50, - FormatFmask16_2 = 51, - FormatFmask32_2 = 52, - FormatFmask32_4 = 53, - FormatFmask32_8 = 54, - FormatFmask64_4 = 55, - FormatFmask64_8 = 56, - Format4_4 = 57, - Format6_5_5 = 58, - Format1 = 59, - Format1_Reversed = 60, - Format32_As_8 = 61, - Format32_As_8_8 = 62, - Format32_As_32_32_32_32 = 63, -}; - -enum class NumberFormat : u32 { - Unorm = 0, - Snorm = 1, - Uscaled = 2, - Sscaled = 3, - Uint = 4, - Sint = 5, - SnormNz = 6, - Float = 7, - Srgb = 9, - Ubnorm = 10, - UbnromNz = 11, - Ubint = 12, - Ubscaled = 13, -}; - [[nodiscard]] constexpr bool IsInteger(NumberFormat nfmt) { return nfmt == AmdGpu::NumberFormat::Sint || nfmt == AmdGpu::NumberFormat::Uint; } diff --git a/src/video_core/amdgpu/types.h b/src/video_core/amdgpu/types.h new file mode 100644 index 00000000..8cc023a7 --- /dev/null +++ b/src/video_core/amdgpu/types.h @@ -0,0 +1,106 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "common/types.h" + +namespace AmdGpu { + +// See `VGT_PRIMITIVE_TYPE` description in [Radeon Sea Islands 3D/Compute Register Reference Guide] +enum class PrimitiveType : u32 { + None = 0, + PointList = 1, + LineList = 2, + LineStrip = 3, + TriangleList = 4, + TriangleFan = 5, + TriangleStrip = 6, + PatchPrimitive = 9, + AdjLineList = 10, + AdjLineStrip = 11, + AdjTriangleList = 12, + AdjTriangleStrip = 13, + RectList = 17, + LineLoop = 18, + QuadList = 19, + QuadStrip = 20, + Polygon = 21, +}; + +enum class GsOutputPrimitiveType : u32 { + PointList = 0, + LineStrip = 1, + TriangleStrip = 2, +}; + +// Table 8.13 Data and Image Formats [Sea Islands Series Instruction Set Architecture] +enum class DataFormat : u32 { + FormatInvalid = 0, + Format8 = 1, + Format16 = 2, + Format8_8 = 3, + Format32 = 4, + Format16_16 = 5, + Format10_11_11 = 6, + Format11_11_10 = 7, + Format10_10_10_2 = 8, + Format2_10_10_10 = 9, + Format8_8_8_8 = 10, + Format32_32 = 11, + Format16_16_16_16 = 12, + Format32_32_32 = 13, + Format32_32_32_32 = 14, + Format5_6_5 = 16, + Format1_5_5_5 = 17, + Format5_5_5_1 = 18, + Format4_4_4_4 = 19, + Format8_24 = 20, + Format24_8 = 21, + FormatX24_8_32 = 22, + FormatGB_GR = 32, + FormatBG_RG = 33, + Format5_9_9_9 = 34, + FormatBc1 = 35, + FormatBc2 = 36, + FormatBc3 = 37, + FormatBc4 = 38, + FormatBc5 = 39, + FormatBc6 = 40, + FormatBc7 = 41, + FormatFmask8_1 = 47, + FormatFmask8_2 = 48, + FormatFmask8_4 = 49, + FormatFmask16_1 = 50, + FormatFmask16_2 = 51, + FormatFmask32_2 = 52, + FormatFmask32_4 = 53, + FormatFmask32_8 = 54, + FormatFmask64_4 = 55, + FormatFmask64_8 = 56, + Format4_4 = 57, + Format6_5_5 = 58, + Format1 = 59, + Format1_Reversed = 60, + Format32_As_8 = 61, + Format32_As_8_8 = 62, + Format32_As_32_32_32_32 = 63, +}; + +enum class NumberFormat : u32 { + Unorm = 0, + Snorm = 1, + Uscaled = 2, + Sscaled = 3, + Uint = 4, + Sint = 5, + SnormNz = 6, + Float = 7, + Srgb = 9, + Ubnorm = 10, + UbnromNz = 11, + Ubint = 12, + Ubscaled = 13, +}; + +} // namespace AmdGpu \ No newline at end of file diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp index 43dc2fdc..e806705d 100644 --- a/src/video_core/buffer_cache/buffer_cache.cpp +++ b/src/video_core/buffer_cache/buffer_cache.cpp @@ -224,7 +224,7 @@ bool BufferCache::BindVertexBuffers(const Shader::Info& vs_info) { u32 BufferCache::BindIndexBuffer(bool& is_indexed, u32 index_offset) { // Emulate QuadList primitive type with CPU made index buffer. const auto& regs = liverpool->regs; - if (regs.primitive_type == AmdGpu::Liverpool::PrimitiveType::QuadList) { + if (regs.primitive_type == AmdGpu::PrimitiveType::QuadList) { is_indexed = true; // Emit indices. diff --git a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp index cd527c62..a68ec1e7 100644 --- a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp +++ b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp @@ -61,34 +61,34 @@ vk::CompareOp CompareOp(Liverpool::CompareFunc func) { } } -vk::PrimitiveTopology PrimitiveType(Liverpool::PrimitiveType type) { +vk::PrimitiveTopology PrimitiveType(AmdGpu::PrimitiveType type) { switch (type) { - case Liverpool::PrimitiveType::PointList: + case AmdGpu::PrimitiveType::PointList: return vk::PrimitiveTopology::ePointList; - case Liverpool::PrimitiveType::LineList: + case AmdGpu::PrimitiveType::LineList: return vk::PrimitiveTopology::eLineList; - case Liverpool::PrimitiveType::LineStrip: + case AmdGpu::PrimitiveType::LineStrip: return vk::PrimitiveTopology::eLineStrip; - case Liverpool::PrimitiveType::TriangleList: + case AmdGpu::PrimitiveType::TriangleList: return vk::PrimitiveTopology::eTriangleList; - case Liverpool::PrimitiveType::TriangleFan: + case AmdGpu::PrimitiveType::TriangleFan: return vk::PrimitiveTopology::eTriangleFan; - case Liverpool::PrimitiveType::TriangleStrip: + case AmdGpu::PrimitiveType::TriangleStrip: return vk::PrimitiveTopology::eTriangleStrip; - case Liverpool::PrimitiveType::AdjLineList: + case AmdGpu::PrimitiveType::AdjLineList: return vk::PrimitiveTopology::eLineListWithAdjacency; - case Liverpool::PrimitiveType::AdjLineStrip: + case AmdGpu::PrimitiveType::AdjLineStrip: return vk::PrimitiveTopology::eLineStripWithAdjacency; - case Liverpool::PrimitiveType::AdjTriangleList: + case AmdGpu::PrimitiveType::AdjTriangleList: return vk::PrimitiveTopology::eTriangleListWithAdjacency; - case Liverpool::PrimitiveType::AdjTriangleStrip: + case AmdGpu::PrimitiveType::AdjTriangleStrip: return vk::PrimitiveTopology::eTriangleStripWithAdjacency; - case Liverpool::PrimitiveType::PatchPrimitive: + case AmdGpu::PrimitiveType::PatchPrimitive: return vk::PrimitiveTopology::ePatchList; - case Liverpool::PrimitiveType::QuadList: + case AmdGpu::PrimitiveType::QuadList: // Needs to generate index buffer on the fly. return vk::PrimitiveTopology::eTriangleList; - case Liverpool::PrimitiveType::RectList: + case AmdGpu::PrimitiveType::RectList: return vk::PrimitiveTopology::eTriangleStrip; default: UNREACHABLE(); diff --git a/src/video_core/renderer_vulkan/liverpool_to_vk.h b/src/video_core/renderer_vulkan/liverpool_to_vk.h index 6a7825ee..5fb04e5f 100644 --- a/src/video_core/renderer_vulkan/liverpool_to_vk.h +++ b/src/video_core/renderer_vulkan/liverpool_to_vk.h @@ -18,7 +18,7 @@ vk::StencilOp StencilOp(Liverpool::StencilFunc op); vk::CompareOp CompareOp(Liverpool::CompareFunc func); -vk::PrimitiveTopology PrimitiveType(Liverpool::PrimitiveType type); +vk::PrimitiveTopology PrimitiveType(AmdGpu::PrimitiveType type); vk::PolygonMode PolygonMode(Liverpool::PolygonMode mode); diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index c10cac6c..9de551ba 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -16,6 +16,10 @@ namespace Vulkan { +static constexpr auto gp_stage_flags = vk::ShaderStageFlagBits::eVertex | + vk::ShaderStageFlagBits::eGeometry | + vk::ShaderStageFlagBits::eFragment; + GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& scheduler_, DescriptorHeap& desc_heap_, const GraphicsPipelineKey& key_, vk::PipelineCache pipeline_cache, @@ -27,7 +31,7 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul BuildDescSetLayout(); const vk::PushConstantRange push_constants = { - .stageFlags = vk::ShaderStageFlagBits::eVertex | vk::ShaderStageFlagBits::eFragment, + .stageFlags = gp_stage_flags, .offset = 0, .size = sizeof(Shader::PushData), }; @@ -83,7 +87,7 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul .pVertexAttributeDescriptions = vertex_attributes.data(), }; - if (key.prim_type == Liverpool::PrimitiveType::RectList && !IsEmbeddedVs()) { + if (key.prim_type == AmdGpu::PrimitiveType::RectList && !IsEmbeddedVs()) { LOG_WARNING(Render_Vulkan, "Rectangle List primitive type is only supported for embedded VS"); } @@ -196,9 +200,9 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul }, }; - auto stage = u32(Shader::Stage::Vertex); boost::container::static_vector shader_stages; + auto stage = u32(Shader::Stage::Vertex); if (infos[stage]) { shader_stages.emplace_back(vk::PipelineShaderStageCreateInfo{ .stage = vk::ShaderStageFlagBits::eVertex, @@ -206,6 +210,14 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul .pName = "main", }); } + stage = u32(Shader::Stage::Geometry); + if (infos[stage]) { + shader_stages.emplace_back(vk::PipelineShaderStageCreateInfo{ + .stage = vk::ShaderStageFlagBits::eGeometry, + .module = modules[stage], + .pName = "main", + }); + } stage = u32(Shader::Stage::Fragment); if (infos[stage]) { shader_stages.emplace_back(vk::PipelineShaderStageCreateInfo{ @@ -322,7 +334,7 @@ void GraphicsPipeline::BuildDescSetLayout() { .descriptorType = buffer.IsStorage(sharp) ? vk::DescriptorType::eStorageBuffer : vk::DescriptorType::eUniformBuffer, .descriptorCount = 1, - .stageFlags = vk::ShaderStageFlagBits::eVertex | vk::ShaderStageFlagBits::eFragment, + .stageFlags = gp_stage_flags, }); } for (const auto& tex_buffer : stage->texture_buffers) { @@ -331,7 +343,7 @@ void GraphicsPipeline::BuildDescSetLayout() { .descriptorType = tex_buffer.is_written ? vk::DescriptorType::eStorageTexelBuffer : vk::DescriptorType::eUniformTexelBuffer, .descriptorCount = 1, - .stageFlags = vk::ShaderStageFlagBits::eVertex | vk::ShaderStageFlagBits::eFragment, + .stageFlags = gp_stage_flags, }); } for (const auto& image : stage->images) { @@ -340,7 +352,7 @@ void GraphicsPipeline::BuildDescSetLayout() { .descriptorType = image.is_storage ? vk::DescriptorType::eStorageImage : vk::DescriptorType::eSampledImage, .descriptorCount = 1, - .stageFlags = vk::ShaderStageFlagBits::eVertex | vk::ShaderStageFlagBits::eFragment, + .stageFlags = gp_stage_flags, }); } for (const auto& sampler : stage->samplers) { @@ -348,7 +360,7 @@ void GraphicsPipeline::BuildDescSetLayout() { .binding = binding++, .descriptorType = vk::DescriptorType::eSampler, .descriptorCount = 1, - .stageFlags = vk::ShaderStageFlagBits::eVertex | vk::ShaderStageFlagBits::eFragment, + .stageFlags = gp_stage_flags, }); } } @@ -518,9 +530,7 @@ void GraphicsPipeline::BindResources(const Liverpool::Regs& regs, desc_set, {}); } } - cmdbuf.pushConstants(*pipeline_layout, - vk::ShaderStageFlagBits::eVertex | vk::ShaderStageFlagBits::eFragment, 0U, - sizeof(push_data), &push_data); + cmdbuf.pushConstants(*pipeline_layout, gp_stage_flags, 0U, sizeof(push_data), &push_data); cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, Handle()); } diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index ba499674..f7762eb1 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -36,7 +36,7 @@ struct GraphicsPipelineKey { u32 num_samples; u32 mrt_mask; Liverpool::StencilControl stencil; - Liverpool::PrimitiveType prim_type; + AmdGpu::PrimitiveType prim_type; u32 enable_primitive_restart; u32 primitive_restart_index; Liverpool::PolygonMode polygon_mode; @@ -86,13 +86,13 @@ public: } [[nodiscard]] bool IsPrimitiveListTopology() const { - return key.prim_type == Liverpool::PrimitiveType::PointList || - key.prim_type == Liverpool::PrimitiveType::LineList || - key.prim_type == Liverpool::PrimitiveType::TriangleList || - key.prim_type == Liverpool::PrimitiveType::AdjLineList || - key.prim_type == Liverpool::PrimitiveType::AdjTriangleList || - key.prim_type == Liverpool::PrimitiveType::RectList || - key.prim_type == Liverpool::PrimitiveType::QuadList; + return key.prim_type == AmdGpu::PrimitiveType::PointList || + key.prim_type == AmdGpu::PrimitiveType::LineList || + key.prim_type == AmdGpu::PrimitiveType::TriangleList || + key.prim_type == AmdGpu::PrimitiveType::AdjLineList || + key.prim_type == AmdGpu::PrimitiveType::AdjTriangleList || + key.prim_type == AmdGpu::PrimitiveType::RectList || + key.prim_type == AmdGpu::PrimitiveType::QuadList; } private: diff --git a/src/video_core/renderer_vulkan/vk_instance.cpp b/src/video_core/renderer_vulkan/vk_instance.cpp index 3f874ff8..7a1d784f 100644 --- a/src/video_core/renderer_vulkan/vk_instance.cpp +++ b/src/video_core/renderer_vulkan/vk_instance.cpp @@ -322,6 +322,7 @@ bool Instance::CreateDevice() { .geometryShader = features.geometryShader, .logicOp = features.logicOp, .depthBiasClamp = features.depthBiasClamp, + .fillModeNonSolid = features.fillModeNonSolid, .multiViewport = features.multiViewport, .samplerAnisotropy = features.samplerAnisotropy, .vertexPipelineStoresAndAtomics = features.vertexPipelineStoresAndAtomics, diff --git a/src/video_core/renderer_vulkan/vk_instance.h b/src/video_core/renderer_vulkan/vk_instance.h index 2d03b156..d77d0c20 100644 --- a/src/video_core/renderer_vulkan/vk_instance.h +++ b/src/video_core/renderer_vulkan/vk_instance.h @@ -147,6 +147,16 @@ public: return list_restart; } + /// Returns true when geometry shaders are supported by the device + bool IsGeometryStageSupported() const { + return features.geometryShader; + } + + /// Returns true when tessellation is supported by the device + bool IsTessellationSupported() const { + return features.tessellationShader; + } + /// Returns the vendor ID of the physical device u32 GetVendorID() const { return properties.vendorID; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 955393b5..929fa9cc 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -7,7 +7,10 @@ #include "common/io_file.h" #include "common/path_util.h" #include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/frontend/copy_shader.h" #include "shader_recompiler/info.h" +#include "shader_recompiler/recompiler.h" +#include "shader_recompiler/runtime_info.h" #include "video_core/renderer_vulkan/renderer_vulkan.h" #include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_pipeline_cache.h" @@ -82,6 +85,13 @@ Shader::RuntimeInfo PipelineCache::BuildRuntimeInfo(Shader::Stage stage) { auto info = Shader::RuntimeInfo{stage}; const auto& regs = liverpool->regs; switch (stage) { + case Shader::Stage::Export: { + info.num_user_data = regs.es_program.settings.num_user_regs; + info.num_input_vgprs = regs.es_program.settings.vgpr_comp_cnt; + info.num_allocated_vgprs = regs.es_program.settings.num_vgprs * 4; + info.es_info.vertex_data_size = regs.vgt_esgs_ring_itemsize; + break; + } case Shader::Stage::Vertex: { info.num_user_data = regs.vs_program.settings.num_user_regs; info.num_input_vgprs = regs.vs_program.settings.vgpr_comp_cnt; @@ -92,6 +102,29 @@ Shader::RuntimeInfo PipelineCache::BuildRuntimeInfo(Shader::Stage stage) { regs.clipper_control.clip_space == Liverpool::ClipSpace::MinusWToW; break; } + case Shader::Stage::Geometry: { + info.num_user_data = regs.gs_program.settings.num_user_regs; + info.num_input_vgprs = regs.gs_program.settings.vgpr_comp_cnt; + info.num_allocated_vgprs = regs.gs_program.settings.num_vgprs * 4; + info.gs_info.output_vertices = regs.vgt_gs_max_vert_out; + info.gs_info.num_invocations = + regs.vgt_gs_instance_cnt.IsEnabled() ? regs.vgt_gs_instance_cnt.count : 1; + info.gs_info.in_primitive = regs.primitive_type; + for (u32 stream_id = 0; stream_id < Shader::GsMaxOutputStreams; ++stream_id) { + info.gs_info.out_primitive[stream_id] = + regs.vgt_gs_out_prim_type.GetPrimitiveType(stream_id); + } + info.gs_info.in_vertex_data_size = regs.vgt_esgs_ring_itemsize; + info.gs_info.out_vertex_data_size = regs.vgt_gs_vert_itemsize[0]; + + // Extract semantics offsets from a copy shader + const auto vc_stage = Shader::Stage::Vertex; + const auto* pgm_vc = regs.ProgramForStage(static_cast(vc_stage)); + const auto params_vc = Liverpool::GetParams(*pgm_vc); + DumpShader(params_vc.code, params_vc.hash, Shader::Stage::Vertex, 0, "copy.bin"); + info.gs_info.copy_data = Shader::ParseCopyShader(params_vc.code); + break; + } case Shader::Stage::Fragment: { info.num_user_data = regs.ps_program.settings.num_user_regs; info.num_allocated_vgprs = regs.ps_program.settings.num_vgprs * 4; @@ -149,7 +182,7 @@ PipelineCache::~PipelineCache() = default; const GraphicsPipeline* PipelineCache::GetGraphicsPipeline() { const auto& regs = liverpool->regs; // Tessellation is unsupported so skip the draw to avoid locking up the driver. - if (regs.primitive_type == Liverpool::PrimitiveType::PatchPrimitive) { + if (regs.primitive_type == AmdGpu::PrimitiveType::PatchPrimitive) { return nullptr; } // There are several cases (e.g. FCE, FMask/HTile decompression) where we don't need to do an @@ -163,7 +196,7 @@ const GraphicsPipeline* PipelineCache::GetGraphicsPipeline() { LOG_TRACE(Render_Vulkan, "FMask decompression pass skipped"); return nullptr; } - if (regs.primitive_type == Liverpool::PrimitiveType::None) { + if (regs.primitive_type == AmdGpu::PrimitiveType::None) { LOG_TRACE(Render_Vulkan, "Primitive type 'None' skipped"); return nullptr; } @@ -190,15 +223,6 @@ const ComputePipeline* PipelineCache::GetComputePipeline() { return it->second; } -bool ShouldSkipShader(u64 shader_hash, const char* shader_type) { - static constexpr std::array skip_hashes = {}; - if (std::ranges::contains(skip_hashes, shader_hash)) { - LOG_WARNING(Render_Vulkan, "Skipped {} shader hash {:#x}.", shader_type, shader_hash); - return true; - } - return false; -} - bool PipelineCache::RefreshGraphicsKey() { std::memset(&graphics_key, 0, sizeof(GraphicsPipelineKey)); @@ -275,46 +299,66 @@ bool PipelineCache::RefreshGraphicsKey() { } Shader::Backend::Bindings binding{}; - for (u32 i = 0; i < MaxShaderStages; i++) { - if (!regs.stage_enable.IsStageEnabled(i)) { - key.stage_hashes[i] = 0; - infos[i] = nullptr; - continue; + const auto& TryBindStageRemap = [&](Shader::Stage stage_in, Shader::Stage stage_out) -> bool { + const auto stage_in_idx = static_cast(stage_in); + const auto stage_out_idx = static_cast(stage_out); + if (!regs.stage_enable.IsStageEnabled(stage_in_idx)) { + key.stage_hashes[stage_out_idx] = 0; + infos[stage_out_idx] = nullptr; + return false; } - auto* pgm = regs.ProgramForStage(i); + + const auto* pgm = regs.ProgramForStage(stage_in_idx); if (!pgm || !pgm->Address()) { - key.stage_hashes[i] = 0; - infos[i] = nullptr; - continue; + key.stage_hashes[stage_out_idx] = 0; + infos[stage_out_idx] = nullptr; + return false; } + const auto* bininfo = Liverpool::GetBinaryInfo(*pgm); if (!bininfo->Valid()) { LOG_WARNING(Render_Vulkan, "Invalid binary info structure!"); - key.stage_hashes[i] = 0; - infos[i] = nullptr; - continue; - } - if (ShouldSkipShader(bininfo->shader_hash, "graphics")) { - return false; - } - const auto stage = Shader::StageFromIndex(i); - const auto params = Liverpool::GetParams(*pgm); - - if (stage != Shader::Stage::Vertex && stage != Shader::Stage::Fragment) { + key.stage_hashes[stage_out_idx] = 0; + infos[stage_out_idx] = nullptr; return false; } - static bool TessMissingLogged = false; - if (auto* pgm = regs.ProgramForStage(3); - regs.stage_enable.IsStageEnabled(3) && pgm->Address() != 0) { - if (!TessMissingLogged) { - LOG_WARNING(Render_Vulkan, "Tess pipeline compilation skipped"); - TessMissingLogged = true; - } + auto params = Liverpool::GetParams(*pgm); + std::tie(infos[stage_out_idx], modules[stage_out_idx], key.stage_hashes[stage_out_idx]) = + GetProgram(stage_in, params, binding); + return true; + }; + + const auto& TryBindStage = [&](Shader::Stage stage) { return TryBindStageRemap(stage, stage); }; + + const auto& IsGsFeaturesSupported = [&]() -> bool { + // These checks are temporary until all functionality is implemented. + return !regs.vgt_gs_mode.onchip && !regs.vgt_strmout_config.raw; + }; + + TryBindStage(Shader::Stage::Fragment); + + const auto* fs_info = infos[static_cast(Shader::Stage::Fragment)]; + key.mrt_mask = fs_info ? fs_info->mrt_mask : 0u; + + switch (regs.stage_enable.raw) { + case Liverpool::ShaderStageEnable::VgtStages::EsGs: { + if (!instance.IsGeometryStageSupported() || !IsGsFeaturesSupported()) { + break; + } + if (!TryBindStageRemap(Shader::Stage::Export, Shader::Stage::Vertex)) { return false; } - - std::tie(infos[i], modules[i], key.stage_hashes[i]) = GetProgram(stage, params, binding); + if (!TryBindStage(Shader::Stage::Geometry)) { + return false; + } + break; + } + default: { + TryBindStage(Shader::Stage::Vertex); + infos[static_cast(Shader::Stage::Geometry)] = nullptr; + break; + } } const auto* vs_info = infos[static_cast(Shader::Stage::Vertex)]; @@ -336,9 +380,6 @@ bool PipelineCache::RefreshGraphicsKey() { } } - const auto* fs_info = infos[static_cast(Shader::Stage::Fragment)]; - key.mrt_mask = fs_info ? fs_info->mrt_mask : 0u; - // Second pass to fill remain CB pipeline key data for (auto cb = 0u, remapped_cb = 0u; cb < Liverpool::NumColorBuffers; ++cb) { auto const& col_buf = regs.color_buffers[cb]; @@ -364,9 +405,6 @@ bool PipelineCache::RefreshComputeKey() { Shader::Backend::Bindings binding{}; const auto* cs_pgm = &liverpool->regs.cs_program; const auto cs_params = Liverpool::GetParams(*cs_pgm); - if (ShouldSkipShader(cs_params.hash, "compute")) { - return false; - } std::tie(infos[0], modules[0], compute_key) = GetProgram(Shader::Stage::Compute, cs_params, binding); return true; @@ -378,15 +416,11 @@ vk::ShaderModule PipelineCache::CompileModule(Shader::Info& info, Shader::Backend::Bindings& binding) { LOG_INFO(Render_Vulkan, "Compiling {} shader {:#x} {}", info.stage, info.pgm_hash, perm_idx != 0 ? "(permutation)" : ""); - if (Config::dumpShaders()) { - DumpShader(code, info.pgm_hash, info.stage, perm_idx, "bin"); - } + DumpShader(code, info.pgm_hash, info.stage, perm_idx, "bin"); const auto ir_program = Shader::TranslateProgram(code, pools, info, runtime_info, profile); const auto spv = Shader::Backend::SPIRV::EmitSPIRV(profile, runtime_info, ir_program, binding); - if (Config::dumpShaders()) { - DumpShader(spv, info.pgm_hash, info.stage, perm_idx, "spv"); - } + DumpShader(spv, info.pgm_hash, info.stage, perm_idx, "spv"); const auto module = CompileSPV(spv, instance.GetDevice()); const auto name = fmt::format("{}_{:#x}_{}", info.stage, info.pgm_hash, perm_idx); @@ -429,6 +463,10 @@ std::tuple PipelineCache::GetProgram void PipelineCache::DumpShader(std::span code, u64 hash, Shader::Stage stage, size_t perm_idx, std::string_view ext) { + if (!Config::dumpShaders()) { + return; + } + using namespace Common::FS; const auto dump_dir = GetUserPath(PathType::ShaderDir) / "dumps"; if (!std::filesystem::exists(dump_dir)) { diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 159b489d..6088d99c 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -70,9 +70,8 @@ void Rasterizer::Draw(bool is_indexed, u32 index_offset) { cmdbuf.drawIndexed(num_indices, regs.num_instances.NumInstances(), 0, s32(vertex_offset), instance_offset); } else { - const u32 num_vertices = regs.primitive_type == AmdGpu::Liverpool::PrimitiveType::RectList - ? 4 - : regs.num_indices; + const u32 num_vertices = + regs.primitive_type == AmdGpu::PrimitiveType::RectList ? 4 : regs.num_indices; cmdbuf.draw(num_vertices, regs.num_instances.NumInstances(), vertex_offset, instance_offset); } @@ -88,7 +87,7 @@ void Rasterizer::DrawIndirect(bool is_indexed, VAddr address, u32 offset, u32 si return; } - ASSERT_MSG(regs.primitive_type != AmdGpu::Liverpool::PrimitiveType::RectList, + ASSERT_MSG(regs.primitive_type != AmdGpu::PrimitiveType::RectList, "Unsupported primitive type for indirect draw"); try {