diff --git a/CMakeLists.txt b/CMakeLists.txt index 1980b368..697095f1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -474,6 +474,7 @@ set(SHADER_RECOMPILER src/shader_recompiler/exception.h src/shader_recompiler/params.h src/shader_recompiler/runtime_info.h src/shader_recompiler/specialization.h + src/shader_recompiler/backend/bindings.h src/shader_recompiler/backend/spirv/emit_spirv.cpp src/shader_recompiler/backend/spirv/emit_spirv.h src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp diff --git a/src/shader_recompiler/backend/bindings.h b/src/shader_recompiler/backend/bindings.h index 1b53c74e..510b0c0e 100644 --- a/src/shader_recompiler/backend/bindings.h +++ b/src/shader_recompiler/backend/bindings.h @@ -9,10 +9,10 @@ namespace Shader::Backend { struct Bindings { u32 unified{}; - u32 uniform_buffer{}; - u32 storage_buffer{}; - u32 texture{}; - u32 image{}; + u32 buffer{}; + u32 user_data{}; + + auto operator<=>(const Bindings&) const = default; }; } // namespace Shader::Backend diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index e671a37e..8aa292b1 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -265,7 +265,7 @@ void PatchPhiNodes(const IR::Program& program, EmitContext& ctx) { } // Anonymous namespace std::vector EmitSPIRV(const Profile& profile, const RuntimeInfo& runtime_info, - const IR::Program& program, u32& binding) { + const IR::Program& program, Bindings& binding) { EmitContext ctx{profile, runtime_info, program.info, binding}; const Id main{DefineMain(ctx, program)}; DefineEntryPoint(program, ctx, main); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index aada0ff6..5b8da449 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -4,12 +4,13 @@ #pragma once #include +#include "shader_recompiler/backend/bindings.h" #include "shader_recompiler/ir/program.h" #include "shader_recompiler/profile.h" namespace Shader::Backend::SPIRV { [[nodiscard]] std::vector EmitSPIRV(const Profile& profile, const RuntimeInfo& runtime_info, - const IR::Program& program, u32& binding); + const IR::Program& program, Bindings& binding); } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 5fed9b4d..92279c5f 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -86,7 +86,14 @@ Id OutputAttrPointer(EmitContext& ctx, IR::Attribute attr, u32 element) { } // Anonymous namespace Id EmitGetUserData(EmitContext& ctx, IR::ScalarReg reg) { - return ctx.ConstU32(ctx.info.user_data[static_cast(reg)]); + const u32 index = ctx.binding.user_data + ctx.info.ud_mask.Index(reg); + const u32 half = PushData::UdRegsIndex + (index >> 2); + const Id ud_ptr{ctx.OpAccessChain(ctx.TypePointer(spv::StorageClass::PushConstant, ctx.U32[1]), + ctx.push_data_block, ctx.ConstU32(half), + ctx.ConstU32(index & 3))}; + const Id ud_reg{ctx.OpLoad(ctx.U32[1], ud_ptr)}; + ctx.Name(ud_reg, fmt::format("ud_{}", u32(reg))); + return ud_reg; } void EmitGetThreadBitScalarReg(EmitContext& ctx) { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp index 03cfbdcb..50d9cc8c 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp @@ -181,6 +181,7 @@ Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, u32 handle, Id lod case AmdGpu::ImageType::Color1DArray: case AmdGpu::ImageType::Color2D: case AmdGpu::ImageType::Cube: + case AmdGpu::ImageType::Color2DMsaa: return ctx.OpCompositeConstruct(ctx.U32[4], query(ctx.U32[2]), zero, mips()); case AmdGpu::ImageType::Color2DArray: case AmdGpu::ImageType::Color3D: diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index 164c30c5..06f033f1 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -42,7 +42,7 @@ void Name(EmitContext& ctx, Id object, std::string_view format_str, Args&&... ar } // Anonymous namespace EmitContext::EmitContext(const Profile& profile_, const RuntimeInfo& runtime_info_, - const Info& info_, u32& binding_) + const Info& info_, Bindings& binding_) : Sirit::Module(profile_.supported_spirv), info{info_}, runtime_info{runtime_info_}, profile{profile_}, stage{info.stage}, binding{binding_} { AddCapability(spv::Capability::Shader); @@ -173,7 +173,7 @@ EmitContext::SpirvAttribute EmitContext::GetAttributeInfo(AmdGpu::NumberFormat f } void EmitContext::DefineBufferOffsets() { - for (auto& buffer : buffers) { + for (BufferDefinition& buffer : buffers) { const u32 binding = buffer.binding; const u32 half = PushData::BufOffsetIndex + (binding >> 4); const u32 comp = (binding & 0xf) >> 2; @@ -182,9 +182,11 @@ void EmitContext::DefineBufferOffsets() { push_data_block, ConstU32(half), ConstU32(comp))}; const Id value{OpLoad(U32[1], ptr)}; buffer.offset = OpBitFieldUExtract(U32[1], value, ConstU32(offset), ConstU32(8U)); + Name(buffer.offset, fmt::format("buf{}_off", binding)); buffer.offset_dwords = OpShiftRightLogical(U32[1], buffer.offset, ConstU32(2U)); + Name(buffer.offset_dwords, fmt::format("buf{}_dword_off", binding)); } - for (auto& tex_buffer : texture_buffers) { + for (TextureBufferDefinition& tex_buffer : texture_buffers) { const u32 binding = tex_buffer.binding; const u32 half = PushData::BufOffsetIndex + (binding >> 4); const u32 comp = (binding & 0xf) >> 2; @@ -192,7 +194,8 @@ void EmitContext::DefineBufferOffsets() { const Id ptr{OpAccessChain(TypePointer(spv::StorageClass::PushConstant, U32[1]), push_data_block, ConstU32(half), ConstU32(comp))}; const Id value{OpLoad(U32[1], ptr)}; - tex_buffer.coord_offset = OpBitFieldUExtract(U32[1], value, ConstU32(offset), ConstU32(8U)); + tex_buffer.coord_offset = OpBitFieldUExtract(U32[1], value, ConstU32(offset), ConstU32(6U)); + Name(tex_buffer.coord_offset, fmt::format("texbuf{}_off", binding)); } } @@ -330,18 +333,21 @@ void EmitContext::DefineOutputs() { void EmitContext::DefinePushDataBlock() { // Create push constants block for instance steps rates - const Id struct_type{Name(TypeStruct(U32[1], U32[1], U32[4], U32[4], U32[4]), "AuxData")}; + const Id struct_type{ + Name(TypeStruct(U32[1], U32[1], U32[4], U32[4], U32[4], U32[4]), "AuxData")}; Decorate(struct_type, spv::Decoration::Block); MemberName(struct_type, 0, "sr0"); MemberName(struct_type, 1, "sr1"); MemberName(struct_type, 2, "buf_offsets0"); MemberName(struct_type, 3, "buf_offsets1"); - MemberName(struct_type, 4, "buf_offsets2"); + MemberName(struct_type, 4, "ud_regs0"); + MemberName(struct_type, 5, "ud_regs1"); MemberDecorate(struct_type, 0, spv::Decoration::Offset, 0U); MemberDecorate(struct_type, 1, spv::Decoration::Offset, 4U); MemberDecorate(struct_type, 2, spv::Decoration::Offset, 8U); MemberDecorate(struct_type, 3, spv::Decoration::Offset, 24U); MemberDecorate(struct_type, 4, spv::Decoration::Offset, 40U); + MemberDecorate(struct_type, 5, spv::Decoration::Offset, 56U); push_data_block = DefineVar(struct_type, spv::StorageClass::PushConstant); Name(push_data_block, "push_data"); interfaces.push_back(push_data_block); @@ -379,7 +385,7 @@ void EmitContext::DefineBuffers() { const Id struct_pointer_type{TypePointer(storage_class, struct_type)}; const Id pointer_type = TypePointer(storage_class, data_type); const Id id{AddGlobalVariable(struct_pointer_type, storage_class)}; - Decorate(id, spv::Decoration::Binding, binding); + Decorate(id, spv::Decoration::Binding, binding.unified++); Decorate(id, spv::Decoration::DescriptorSet, 0U); if (is_storage && !desc.is_written) { Decorate(id, spv::Decoration::NonWritable); @@ -388,7 +394,7 @@ void EmitContext::DefineBuffers() { buffers.push_back({ .id = id, - .binding = binding++, + .binding = binding.buffer++, .data_types = data_types, .pointer_type = pointer_type, }); @@ -406,12 +412,12 @@ void EmitContext::DefineTextureBuffers() { sampled, spv::ImageFormat::Unknown)}; const Id pointer_type{TypePointer(spv::StorageClass::UniformConstant, image_type)}; const Id id{AddGlobalVariable(pointer_type, spv::StorageClass::UniformConstant)}; - Decorate(id, spv::Decoration::Binding, binding); + Decorate(id, spv::Decoration::Binding, binding.unified++); Decorate(id, spv::Decoration::DescriptorSet, 0U); Name(id, fmt::format("{}_{}", desc.is_written ? "imgbuf" : "texbuf", desc.sgpr_base)); texture_buffers.push_back({ .id = id, - .binding = binding++, + .binding = binding.buffer++, .image_type = image_type, .result_type = sampled_type[4], .is_integer = is_integer, @@ -507,6 +513,8 @@ Id ImageType(EmitContext& ctx, const ImageResource& desc, Id sampled_type) { return ctx.TypeImage(sampled_type, spv::Dim::Dim2D, false, false, false, sampled, format); case AmdGpu::ImageType::Color2DArray: return ctx.TypeImage(sampled_type, spv::Dim::Dim2D, false, true, false, sampled, format); + case AmdGpu::ImageType::Color2DMsaa: + return ctx.TypeImage(sampled_type, spv::Dim::Dim2D, false, false, true, sampled, format); case AmdGpu::ImageType::Color3D: return ctx.TypeImage(sampled_type, spv::Dim::Dim3D, false, false, false, sampled, format); case AmdGpu::ImageType::Cube: @@ -525,7 +533,7 @@ void EmitContext::DefineImagesAndSamplers() { const Id image_type{ImageType(*this, image_desc, sampled_type)}; const Id pointer_type{TypePointer(spv::StorageClass::UniformConstant, image_type)}; const Id id{AddGlobalVariable(pointer_type, spv::StorageClass::UniformConstant)}; - Decorate(id, spv::Decoration::Binding, binding); + Decorate(id, spv::Decoration::Binding, binding.unified++); Decorate(id, spv::Decoration::DescriptorSet, 0U); Name(id, fmt::format("{}_{}{}_{:02x}", stage, "img", image_desc.sgpr_base, image_desc.dword_offset)); @@ -538,7 +546,6 @@ void EmitContext::DefineImagesAndSamplers() { .is_storage = image_desc.is_storage, }); interfaces.push_back(id); - ++binding; } if (std::ranges::any_of(info.images, &ImageResource::is_atomic)) { image_u32 = TypePointer(spv::StorageClass::Image, U32[1]); @@ -550,13 +557,12 @@ void EmitContext::DefineImagesAndSamplers() { sampler_pointer_type = TypePointer(spv::StorageClass::UniformConstant, sampler_type); for (const auto& samp_desc : info.samplers) { const Id id{AddGlobalVariable(sampler_pointer_type, spv::StorageClass::UniformConstant)}; - Decorate(id, spv::Decoration::Binding, binding); + Decorate(id, spv::Decoration::Binding, binding.unified++); Decorate(id, spv::Decoration::DescriptorSet, 0U); Name(id, fmt::format("{}_{}{}_{:02x}", stage, "samp", samp_desc.sgpr_base, samp_desc.dword_offset)); samplers.push_back(id); interfaces.push_back(id); - ++binding; } } diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.h b/src/shader_recompiler/backend/spirv/spirv_emit_context.h index 1a968390..9029866b 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.h +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.h @@ -6,6 +6,7 @@ #include #include +#include "shader_recompiler/backend/bindings.h" #include "shader_recompiler/info.h" #include "shader_recompiler/ir/program.h" #include "shader_recompiler/profile.h" @@ -37,7 +38,7 @@ struct VectorIds { class EmitContext final : public Sirit::Module { public: explicit EmitContext(const Profile& profile, const RuntimeInfo& runtime_info, const Info& info, - u32& binding); + Bindings& binding); ~EmitContext(); Id Def(const IR::Value& value); @@ -221,7 +222,7 @@ public: bool is_storage = false; }; - u32& binding; + Bindings& binding; boost::container::small_vector buffers; boost::container::small_vector texture_buffers; boost::container::small_vector images; diff --git a/src/shader_recompiler/frontend/control_flow_graph.cpp b/src/shader_recompiler/frontend/control_flow_graph.cpp index 276bd9db..9d481d32 100644 --- a/src/shader_recompiler/frontend/control_flow_graph.cpp +++ b/src/shader_recompiler/frontend/control_flow_graph.cpp @@ -23,7 +23,6 @@ struct Compare { static IR::Condition MakeCondition(const GcnInst& inst) { if (inst.IsCmpx()) { - ASSERT(inst.opcode == Opcode::V_CMPX_NE_U32); return IR::Condition::Execnz; } @@ -99,7 +98,7 @@ void CFG::EmitDivergenceLabels() { // with SAVEEXEC to mask the threads that didn't pass the condition // of initial branch. (inst.opcode == Opcode::S_ANDN2_B64 && inst.dst[0].field == OperandField::ExecLo) || - inst.opcode == Opcode::V_CMPX_NE_U32; + inst.IsCmpx(); }; const auto is_close_scope = [](const GcnInst& inst) { // Closing an EXEC scope can be either a branch instruction @@ -109,7 +108,7 @@ void CFG::EmitDivergenceLabels() { // Sometimes compiler might insert instructions between the SAVEEXEC and the branch. // Those instructions need to be wrapped in the condition as well so allow branch // as end scope instruction. - inst.opcode == Opcode::S_CBRANCH_EXECZ || + inst.opcode == Opcode::S_CBRANCH_EXECZ || inst.opcode == Opcode::S_ENDPGM || (inst.opcode == Opcode::S_ANDN2_B64 && inst.dst[0].field == OperandField::ExecLo); }; @@ -127,7 +126,8 @@ void CFG::EmitDivergenceLabels() { s32 curr_begin = -1; for (size_t index = GetIndex(start); index < end_index; index++) { const auto& inst = inst_list[index]; - if (is_close_scope(inst) && curr_begin != -1) { + const bool is_close = is_close_scope(inst); + if ((is_close || index == end_index - 1) && curr_begin != -1) { // If there are no instructions inside scope don't do anything. if (index - curr_begin == 1) { curr_begin = -1; @@ -138,8 +138,16 @@ void CFG::EmitDivergenceLabels() { const auto& save_inst = inst_list[curr_begin]; const Label label = index_to_pc[curr_begin] + save_inst.length; AddLabel(label); - // Add a label to the close scope instruction as well. - AddLabel(index_to_pc[index]); + // Add a label to the close scope instruction. + // There are 3 cases where we need to close a scope. + // * Close scope instruction inside the block + // * Close scope instruction at the end of the block (cbranch or endpgm) + // * Normal instruction at the end of the block + // For the last case we must NOT add a label as that would cause + // the instruction to be separated into its own basic block. + if (is_close) { + AddLabel(index_to_pc[index]); + } // Reset scope begin. curr_begin = -1; } @@ -194,7 +202,7 @@ void CFG::LinkBlocks() { const auto end_inst{block.end_inst}; // Handle divergence block inserted here. if (end_inst.opcode == Opcode::S_AND_SAVEEXEC_B64 || - end_inst.opcode == Opcode::S_ANDN2_B64 || end_inst.opcode == Opcode::V_CMPX_NE_U32) { + end_inst.opcode == Opcode::S_ANDN2_B64 || end_inst.IsCmpx()) { // Blocks are stored ordered by address in the set auto next_it = std::next(it); auto* target_block = &(*next_it); diff --git a/src/shader_recompiler/frontend/translate/scalar_alu.cpp b/src/shader_recompiler/frontend/translate/scalar_alu.cpp index 0c9efdc4..1e572a97 100644 --- a/src/shader_recompiler/frontend/translate/scalar_alu.cpp +++ b/src/shader_recompiler/frontend/translate/scalar_alu.cpp @@ -281,6 +281,12 @@ void Translator::S_AND_B64(NegateMode negate, const GcnInst& inst) { return ir.GetExec(); case OperandField::ScalarGPR: return ir.GetThreadBitScalarReg(IR::ScalarReg(operand.code)); + case OperandField::ConstZero: + return ir.Imm1(false); + case OperandField::SignedConstIntNeg: + ASSERT_MSG(-s32(operand.code) + SignedConstIntNegMin - 1 == -1, + "SignedConstIntNeg must be -1"); + return ir.Imm1(true); default: UNREACHABLE(); } @@ -506,6 +512,8 @@ void Translator::S_NOT_B64(const GcnInst& inst) { return ir.GetExec(); case OperandField::ScalarGPR: return ir.GetThreadBitScalarReg(IR::ScalarReg(operand.code)); + case OperandField::ConstZero: + return ir.Imm1(false); default: UNREACHABLE(); } @@ -520,6 +528,9 @@ void Translator::S_NOT_B64(const GcnInst& inst) { case OperandField::ScalarGPR: ir.SetThreadBitScalarReg(IR::ScalarReg(inst.dst[0].code), result); break; + case OperandField::ExecLo: + ir.SetExec(result); + break; default: UNREACHABLE(); } diff --git a/src/shader_recompiler/frontend/translate/translate.cpp b/src/shader_recompiler/frontend/translate/translate.cpp index 6c7d51b5..cfef5858 100644 --- a/src/shader_recompiler/frontend/translate/translate.cpp +++ b/src/shader_recompiler/frontend/translate/translate.cpp @@ -174,7 +174,7 @@ T Translator::GetSrc(const InstOperand& operand) { value = ir.IAbs(value); } if (operand.input_modifier.neg) { - UNREACHABLE(); + value = ir.INeg(value); } } return value; diff --git a/src/shader_recompiler/frontend/translate/translate.h b/src/shader_recompiler/frontend/translate/translate.h index aeeb8e13..7559b853 100644 --- a/src/shader_recompiler/frontend/translate/translate.h +++ b/src/shader_recompiler/frontend/translate/translate.h @@ -155,6 +155,7 @@ public: void V_SUBREV_I32(const GcnInst& inst); void V_ADDC_U32(const GcnInst& inst); void V_LDEXP_F32(const GcnInst& inst); + void V_CVT_PKNORM_U16_F32(const GcnInst& inst); void V_CVT_PKRTZ_F16_F32(const GcnInst& inst); // VOP1 diff --git a/src/shader_recompiler/frontend/translate/vector_alu.cpp b/src/shader_recompiler/frontend/translate/vector_alu.cpp index a9bd5a80..f497e260 100644 --- a/src/shader_recompiler/frontend/translate/vector_alu.cpp +++ b/src/shader_recompiler/frontend/translate/vector_alu.cpp @@ -89,6 +89,8 @@ void Translator::EmitVectorAlu(const GcnInst& inst) { return V_ADDC_U32(inst); case Opcode::V_LDEXP_F32: return V_LDEXP_F32(inst); + case Opcode::V_CVT_PKNORM_U16_F32: + return V_CVT_PKNORM_U16_F32(inst); case Opcode::V_CVT_PKRTZ_F16_F32: return V_CVT_PKRTZ_F16_F32(inst); @@ -244,6 +246,8 @@ void Translator::EmitVectorAlu(const GcnInst& inst) { // V_CMPX_{OP8}_I32 case Opcode::V_CMPX_LT_I32: return V_CMP_U32(ConditionOp::LT, true, true, inst); + case Opcode::V_CMPX_EQ_I32: + return V_CMP_U32(ConditionOp::EQ, true, true, inst); case Opcode::V_CMPX_GT_I32: return V_CMP_U32(ConditionOp::GT, true, true, inst); case Opcode::V_CMPX_LG_I32: @@ -583,6 +587,15 @@ void Translator::V_LDEXP_F32(const GcnInst& inst) { SetDst(inst.dst[0], ir.FPLdexp(src0, src1)); } +void Translator::V_CVT_PKNORM_U16_F32(const GcnInst& inst) { + const IR::F32 src0{GetSrc(inst.src[0])}; + const IR::F32 src1{GetSrc(inst.src[1])}; + const IR::U32 dst0 = ir.ConvertFToU(32, ir.FPMul(src0, ir.Imm32(65535.f))); + const IR::U32 dst1 = ir.ConvertFToU(32, ir.FPMul(src1, ir.Imm32(65535.f))); + const IR::VectorReg dst_reg{inst.dst[0].code}; + ir.SetVectorReg(dst_reg, ir.BitFieldInsert(dst0, dst1, ir.Imm32(16), ir.Imm32(16))); +} + void Translator::V_CVT_PKRTZ_F16_F32(const GcnInst& inst) { const IR::Value vec_f32 = ir.CompositeConstruct(GetSrc(inst.src[0]), GetSrc(inst.src[1])); @@ -1046,6 +1059,11 @@ void Translator::V_LSHL_B64(const GcnInst& inst) { const IR::U64 src0{GetSrc64(inst.src[0])}; const IR::U64 src1{GetSrc64(inst.src[1])}; const IR::VectorReg dst_reg{inst.dst[0].code}; + if (src0.IsImmediate() && src0.U64() == -1) { + ir.SetVectorReg(dst_reg, ir.Imm32(0xFFFFFFFF)); + ir.SetVectorReg(dst_reg + 1, ir.Imm32(0xFFFFFFFF)); + return; + } ASSERT_MSG(src0.IsImmediate() && src0.U64() == 0 && src1.IsImmediate() && src1.U64() == 0, "V_LSHL_B64 with non-zero src0 or src1 is not supported"); ir.SetVectorReg(dst_reg, ir.Imm32(0)); diff --git a/src/shader_recompiler/info.h b/src/shader_recompiler/info.h index ac623253..2a9615fc 100644 --- a/src/shader_recompiler/info.h +++ b/src/shader_recompiler/info.h @@ -7,6 +7,7 @@ #include #include "common/assert.h" #include "common/types.h" +#include "shader_recompiler/backend/bindings.h" #include "shader_recompiler/ir/attribute.h" #include "shader_recompiler/ir/reg.h" #include "shader_recompiler/ir/type.h" @@ -85,11 +86,14 @@ struct SamplerResource { using SamplerResourceList = boost::container::small_vector; struct PushData { - static constexpr size_t BufOffsetIndex = 2; + static constexpr u32 BufOffsetIndex = 2; + static constexpr u32 UdRegsIndex = 4; + static constexpr u32 MaxUdRegs = 8; u32 step0; u32 step1; - std::array buf_offsets; + std::array buf_offsets; + std::array ud_regs; void AddOffset(u32 binding, u32 offset) { ASSERT(offset < 256 && binding < buf_offsets.size()); @@ -145,6 +149,24 @@ struct Info { AttributeFlags loads{}; AttributeFlags stores{}; + struct UserDataMask { + void Set(IR::ScalarReg reg) noexcept { + mask |= 1 << static_cast(reg); + } + + u32 Index(IR::ScalarReg reg) const noexcept { + const u32 reg_mask = (1 << static_cast(reg)) - 1; + return std::popcount(mask & reg_mask); + } + + u32 NumRegs() const noexcept { + return std::popcount(mask); + } + + u32 mask; + }; + UserDataMask ud_mask{}; + s8 vertex_offset_sgpr = -1; s8 instance_offset_sgpr = -1; @@ -190,11 +212,22 @@ struct Info { return data; } - size_t NumBindings() const noexcept { - return buffers.size() + texture_buffers.size() + images.size() + samplers.size(); + void PushUd(Backend::Bindings& bnd, PushData& push) const { + u32 mask = ud_mask.mask; + while (mask) { + const u32 index = std::countr_zero(mask); + mask &= ~(1U << index); + push.ud_regs[bnd.user_data++] = user_data[index]; + } } - [[nodiscard]] std::pair GetDrawOffsets() const noexcept { + void AddBindings(Backend::Bindings& bnd) const { + bnd.buffer += buffers.size() + texture_buffers.size(); + bnd.unified += bnd.buffer + images.size() + samplers.size(); + bnd.user_data += ud_mask.NumRegs(); + } + + [[nodiscard]] std::pair GetDrawOffsets() const { u32 vertex_offset = 0; u32 instance_offset = 0; if (vertex_offset_sgpr != -1) { diff --git a/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp b/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp index 7251473d..e995852d 100644 --- a/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp +++ b/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp @@ -8,14 +8,15 @@ namespace Shader::Optimization { void Visit(Info& info, IR::Inst& inst) { switch (inst.GetOpcode()) { case IR::Opcode::GetAttribute: - case IR::Opcode::GetAttributeU32: { + case IR::Opcode::GetAttributeU32: info.loads.Set(inst.Arg(0).Attribute(), inst.Arg(1).U32()); break; - } - case IR::Opcode::SetAttribute: { + case IR::Opcode::SetAttribute: info.stores.Set(inst.Arg(0).Attribute(), inst.Arg(2).U32()); break; - } + case IR::Opcode::GetUserData: + info.ud_mask.Set(inst.Arg(0).ScalarReg()); + break; case IR::Opcode::LoadSharedU32: case IR::Opcode::LoadSharedU64: case IR::Opcode::WriteSharedU32: diff --git a/src/shader_recompiler/specialization.h b/src/shader_recompiler/specialization.h index e95559d0..0a3a696b 100644 --- a/src/shader_recompiler/specialization.h +++ b/src/shader_recompiler/specialization.h @@ -6,6 +6,7 @@ #include #include "common/types.h" +#include "shader_recompiler/backend/bindings.h" #include "shader_recompiler/info.h" namespace Shader { @@ -45,11 +46,11 @@ struct StageSpecialization { boost::container::small_vector buffers; boost::container::small_vector tex_buffers; boost::container::small_vector images; - u32 start_binding{}; + Backend::Bindings start{}; explicit StageSpecialization(const Shader::Info& info_, RuntimeInfo runtime_info_, - u32 start_binding_) - : info{&info_}, runtime_info{runtime_info_}, start_binding{start_binding_} { + Backend::Bindings start_) + : info{&info_}, runtime_info{runtime_info_}, start{start_} { u32 binding{}; ForEachSharp(binding, buffers, info->buffers, [](auto& spec, const auto& desc, AmdGpu::Buffer sharp) { @@ -82,7 +83,7 @@ struct StageSpecialization { } bool operator==(const StageSpecialization& other) const { - if (start_binding != other.start_binding) { + if (start != other.start) { return false; } if (runtime_info != other.runtime_info) { diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp index ac168f18..8cbc3b94 100644 --- a/src/video_core/buffer_cache/buffer_cache.cpp +++ b/src/video_core/buffer_cache/buffer_cache.cpp @@ -581,13 +581,16 @@ bool BufferCache::SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr, return false; } Image& image = texture_cache.GetImage(image_id); + if (False(image.flags & ImageFlagBits::GpuModified)) { + return false; + } ASSERT_MSG(device_addr == image.info.guest_address, "Texel buffer aliases image subresources {:x} : {:x}", device_addr, image.info.guest_address); boost::container::small_vector copies; u32 offset = buffer.Offset(image.cpu_addr); const u32 num_layers = image.info.resources.layers; - u32 total_size = 0; + const u32 max_offset = offset + size; for (u32 m = 0; m < image.info.resources.levels; m++) { const u32 width = std::max(image.info.size.width >> m, 1u); const u32 height = std::max(image.info.size.height >> m, 1u); @@ -595,7 +598,7 @@ bool BufferCache::SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr, image.info.props.is_volume ? std::max(image.info.size.depth >> m, 1u) : 1u; const auto& [mip_size, mip_pitch, mip_height, mip_ofs] = image.info.mips_layout[m]; offset += mip_ofs * num_layers; - if (offset + (mip_size * num_layers) > buffer.SizeBytes()) { + if (offset + (mip_size * num_layers) > max_offset) { break; } copies.push_back({ @@ -611,7 +614,6 @@ bool BufferCache::SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr, .imageOffset = {0, 0, 0}, .imageExtent = {width, height, depth}, }); - total_size += mip_size * num_layers; } if (!copies.empty()) { scheduler.EndRendering(); diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index e10b7048..4fb445f6 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -112,10 +112,11 @@ bool ComputePipeline::BindResources(VideoCore::BufferCache& buffer_cache, boost::container::small_vector set_writes; boost::container::small_vector buffer_barriers; Shader::PushData push_data{}; - u32 binding{}; + Shader::Backend::Bindings binding{}; image_infos.clear(); + info->PushUd(binding, push_data); for (const auto& desc : info->buffers) { bool is_storage = true; if (desc.is_gds_buffer) { @@ -147,21 +148,20 @@ bool ComputePipeline::BindResources(VideoCore::BufferCache& buffer_cache, buffer_cache.ObtainBuffer(address, size, desc.is_written); const u32 offset_aligned = Common::AlignDown(offset, alignment); const u32 adjust = offset - offset_aligned; - if (adjust != 0) { - ASSERT(adjust % 4 == 0); - push_data.AddOffset(binding, adjust); - } + ASSERT(adjust % 4 == 0); + push_data.AddOffset(binding.buffer, adjust); buffer_infos.emplace_back(vk_buffer->Handle(), offset_aligned, size + adjust); } set_writes.push_back({ .dstSet = VK_NULL_HANDLE, - .dstBinding = binding++, + .dstBinding = binding.unified++, .dstArrayElement = 0, .descriptorCount = 1, .descriptorType = is_storage ? vk::DescriptorType::eStorageBuffer : vk::DescriptorType::eUniformBuffer, .pBufferInfo = &buffer_infos.back(), }); + ++binding.buffer; } for (const auto& desc : info->texture_buffers) { @@ -188,10 +188,8 @@ bool ComputePipeline::BindResources(VideoCore::BufferCache& buffer_cache, "Texel buffer stride must match format stride"); const u32 offset_aligned = Common::AlignDown(offset, alignment); const u32 adjust = offset - offset_aligned; - if (adjust != 0) { - ASSERT(adjust % fmt_stride == 0); - push_data.AddOffset(binding, adjust / fmt_stride); - } + ASSERT(adjust % fmt_stride == 0); + push_data.AddOffset(binding.buffer, adjust / fmt_stride); buffer_view = vk_buffer->View(offset_aligned, size + adjust, desc.is_written, vsharp.GetDataFmt(), vsharp.GetNumberFmt()); if (auto barrier = @@ -206,13 +204,14 @@ bool ComputePipeline::BindResources(VideoCore::BufferCache& buffer_cache, } set_writes.push_back({ .dstSet = VK_NULL_HANDLE, - .dstBinding = binding++, + .dstBinding = binding.unified++, .dstArrayElement = 0, .descriptorCount = 1, .descriptorType = desc.is_written ? vk::DescriptorType::eStorageTexelBuffer : vk::DescriptorType::eUniformTexelBuffer, .pTexelBufferView = &buffer_view, }); + ++binding.buffer; } BindTextures(texture_cache, *info, binding, set_writes); @@ -226,7 +225,7 @@ bool ComputePipeline::BindResources(VideoCore::BufferCache& buffer_cache, image_infos.emplace_back(vk_sampler, VK_NULL_HANDLE, vk::ImageLayout::eGeneral); set_writes.push_back({ .dstSet = VK_NULL_HANDLE, - .dstBinding = binding++, + .dstBinding = binding.unified++, .dstArrayElement = 0, .descriptorCount = 1, .descriptorType = vk::DescriptorType::eSampler, diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 5fa995b4..29cdf761 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -83,8 +83,9 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul .topology = LiverpoolToVK::PrimitiveType(key.prim_type), .primitiveRestartEnable = key.enable_primitive_restart != 0, }; - ASSERT_MSG(!key.enable_primitive_restart || key.primitive_restart_index == 0xFFFF, - "Primitive restart index other than 0xFFFF is not supported yet"); + ASSERT_MSG(!key.enable_primitive_restart || key.primitive_restart_index == 0xFFFF || + key.primitive_restart_index == 0xFFFFFFFF, + "Primitive restart index other than -1 is not supported yet"); const vk::PipelineRasterizationStateCreateInfo raster_state = { .depthClampEnable = false, @@ -356,7 +357,7 @@ void GraphicsPipeline::BindResources(const Liverpool::Regs& regs, boost::container::small_vector set_writes; boost::container::small_vector buffer_barriers; Shader::PushData push_data{}; - u32 binding{}; + Shader::Backend::Bindings binding{}; image_infos.clear(); @@ -368,6 +369,7 @@ void GraphicsPipeline::BindResources(const Liverpool::Regs& regs, push_data.step0 = regs.vgt_instance_step_rate_0; push_data.step1 = regs.vgt_instance_step_rate_1; } + stage->PushUd(binding, push_data); for (const auto& buffer : stage->buffers) { const auto vsharp = buffer.GetSharp(*stage); const bool is_storage = buffer.IsStorage(vsharp); @@ -383,10 +385,8 @@ void GraphicsPipeline::BindResources(const Liverpool::Regs& regs, buffer_cache.ObtainBuffer(address, size, buffer.is_written); const u32 offset_aligned = Common::AlignDown(offset, alignment); const u32 adjust = offset - offset_aligned; - if (adjust != 0) { - ASSERT(adjust % 4 == 0); - push_data.AddOffset(binding, adjust); - } + ASSERT(adjust % 4 == 0); + push_data.AddOffset(binding.buffer, adjust); buffer_infos.emplace_back(vk_buffer->Handle(), offset_aligned, size + adjust); } else if (instance.IsNullDescriptorSupported()) { buffer_infos.emplace_back(VK_NULL_HANDLE, 0, VK_WHOLE_SIZE); @@ -396,13 +396,14 @@ void GraphicsPipeline::BindResources(const Liverpool::Regs& regs, } set_writes.push_back({ .dstSet = VK_NULL_HANDLE, - .dstBinding = binding++, + .dstBinding = binding.unified++, .dstArrayElement = 0, .descriptorCount = 1, .descriptorType = is_storage ? vk::DescriptorType::eStorageBuffer : vk::DescriptorType::eUniformBuffer, .pBufferInfo = &buffer_infos.back(), }); + ++binding.buffer; } for (const auto& desc : stage->texture_buffers) { @@ -419,10 +420,8 @@ void GraphicsPipeline::BindResources(const Liverpool::Regs& regs, "Texel buffer stride must match format stride"); const u32 offset_aligned = Common::AlignDown(offset, alignment); const u32 adjust = offset - offset_aligned; - if (adjust != 0) { - ASSERT(adjust % fmt_stride == 0); - push_data.AddOffset(binding, adjust / fmt_stride); - } + ASSERT(adjust % fmt_stride == 0); + push_data.AddOffset(binding.buffer, adjust / fmt_stride); buffer_view = vk_buffer->View(offset_aligned, size + adjust, desc.is_written, vsharp.GetDataFmt(), vsharp.GetNumberFmt()); const auto dst_access = desc.is_written ? vk::AccessFlagBits2::eShaderWrite @@ -437,13 +436,14 @@ void GraphicsPipeline::BindResources(const Liverpool::Regs& regs, } set_writes.push_back({ .dstSet = VK_NULL_HANDLE, - .dstBinding = binding++, + .dstBinding = binding.unified++, .dstArrayElement = 0, .descriptorCount = 1, .descriptorType = desc.is_written ? vk::DescriptorType::eStorageTexelBuffer : vk::DescriptorType::eUniformTexelBuffer, .pTexelBufferView = &buffer_view, }); + ++binding.buffer; } BindTextures(texture_cache, *stage, binding, set_writes); @@ -463,7 +463,7 @@ void GraphicsPipeline::BindResources(const Liverpool::Regs& regs, image_infos.emplace_back(vk_sampler, VK_NULL_HANDLE, vk::ImageLayout::eGeneral); set_writes.push_back({ .dstSet = VK_NULL_HANDLE, - .dstBinding = binding++, + .dstBinding = binding.unified++, .dstArrayElement = 0, .descriptorCount = 1, .descriptorType = vk::DescriptorType::eSampler, diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 4fc1f46e..7a094f66 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -264,7 +264,7 @@ bool PipelineCache::RefreshGraphicsKey() { ++remapped_cb; } - u32 binding{}; + Shader::Backend::Bindings binding{}; for (u32 i = 0; i < MaxShaderStages; i++) { if (!regs.stage_enable.IsStageEnabled(i)) { key.stage_hashes[i] = 0; @@ -332,7 +332,7 @@ bool PipelineCache::RefreshGraphicsKey() { } bool PipelineCache::RefreshComputeKey() { - u32 binding{}; + Shader::Backend::Bindings binding{}; const auto* cs_pgm = &liverpool->regs.cs_program; const auto cs_params = Liverpool::GetParams(*cs_pgm); if (ShouldSkipShader(cs_params.hash, "compute")) { @@ -346,7 +346,7 @@ bool PipelineCache::RefreshComputeKey() { vk::ShaderModule PipelineCache::CompileModule(Shader::Info& info, const Shader::RuntimeInfo& runtime_info, std::span code, size_t perm_idx, - u32& binding) { + Shader::Backend::Bindings& binding) { LOG_INFO(Render_Vulkan, "Compiling {} shader {:#x} {}", info.stage, info.pgm_hash, perm_idx != 0 ? "(permutation)" : ""); if (Config::dumpShaders()) { @@ -366,14 +366,14 @@ vk::ShaderModule PipelineCache::CompileModule(Shader::Info& info, } std::tuple PipelineCache::GetProgram( - Shader::Stage stage, Shader::ShaderParams params, u32& binding) { + Shader::Stage stage, Shader::ShaderParams params, Shader::Backend::Bindings& binding) { const auto runtime_info = BuildRuntimeInfo(stage); auto [it_pgm, new_program] = program_cache.try_emplace(params.hash); if (new_program) { Program* program = program_pool.Create(stage, params); - u32 start_binding = binding; + auto start = binding; const auto module = CompileModule(program->info, runtime_info, params.code, 0, binding); - const auto spec = Shader::StageSpecialization(program->info, runtime_info, start_binding); + const auto spec = Shader::StageSpecialization(program->info, runtime_info, start); program->AddPermut(module, std::move(spec)); it_pgm.value() = program; return std::make_tuple(&program->info, module, HashCombine(params.hash, 0)); @@ -391,7 +391,7 @@ std::tuple PipelineCache::GetProgram module = CompileModule(new_info, runtime_info, params.code, perm_idx, binding); program->AddPermut(module, std::move(spec)); } else { - binding += info.NumBindings(); + info.AddBindings(binding); module = it->module; perm_idx = std::distance(program->modules.begin(), it); } diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index 7f0064fb..7e44bbf0 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -49,9 +49,8 @@ public: const ComputePipeline* GetComputePipeline(); - std::tuple GetProgram(Shader::Stage stage, - Shader::ShaderParams params, - u32& binding); + std::tuple GetProgram( + Shader::Stage stage, Shader::ShaderParams params, Shader::Backend::Bindings& binding); private: bool RefreshGraphicsKey(); @@ -60,7 +59,8 @@ private: void DumpShader(std::span code, u64 hash, Shader::Stage stage, size_t perm_idx, std::string_view ext); vk::ShaderModule CompileModule(Shader::Info& info, const Shader::RuntimeInfo& runtime_info, - std::span code, size_t perm_idx, u32& binding); + std::span code, size_t perm_idx, + Shader::Backend::Bindings& binding); Shader::RuntimeInfo BuildRuntimeInfo(Shader::Stage stage); private: diff --git a/src/video_core/renderer_vulkan/vk_pipeline_common.cpp b/src/video_core/renderer_vulkan/vk_pipeline_common.cpp index 77029602..61e56415 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_common.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_common.cpp @@ -20,7 +20,8 @@ Pipeline::Pipeline(const Instance& instance_, Scheduler& scheduler_, DescriptorH Pipeline::~Pipeline() = default; void Pipeline::BindTextures(VideoCore::TextureCache& texture_cache, const Shader::Info& stage, - u32& binding, DescriptorWrites& set_writes) const { + Shader::Backend::Bindings& binding, + DescriptorWrites& set_writes) const { using ImageBindingInfo = std::tuple; boost::container::static_vector image_bindings; @@ -67,7 +68,7 @@ void Pipeline::BindTextures(VideoCore::TextureCache& texture_cache, const Shader set_writes.push_back({ .dstSet = VK_NULL_HANDLE, - .dstBinding = binding++, + .dstBinding = binding.unified++, .dstArrayElement = 0, .descriptorCount = 1, .descriptorType = desc.is_storage ? vk::DescriptorType::eStorageImage diff --git a/src/video_core/renderer_vulkan/vk_pipeline_common.h b/src/video_core/renderer_vulkan/vk_pipeline_common.h index 627ce389..ab99e7b3 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_common.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_common.h @@ -3,6 +3,7 @@ #pragma once +#include "shader_recompiler/backend/bindings.h" #include "shader_recompiler/info.h" #include "video_core/renderer_vulkan/vk_common.h" @@ -33,7 +34,7 @@ public: using DescriptorWrites = boost::container::small_vector; void BindTextures(VideoCore::TextureCache& texture_cache, const Shader::Info& stage, - u32& binding, DescriptorWrites& set_writes) const; + Shader::Backend::Bindings& binding, DescriptorWrites& set_writes) const; protected: const Instance& instance;