From d91ad6174ebc48d15e28f2bc94c15bf8f2740f17 Mon Sep 17 00:00:00 2001 From: squidbus <175574877+squidbus@users.noreply.github.com> Date: Thu, 10 Oct 2024 09:27:34 -0700 Subject: [PATCH] shader_recompiler: Move sampling parameter resolution to tracking pass and support more derivative types. (#1290) * shader_recompiler: Move sampling parameter resolution to tracking pass and support more derivative types. * shader_recompiler: Only track sampler sharp on sample instructions. * shader_recompiler: Fix Inst args size. --- .../backend/spirv/emit_spirv_image.cpp | 27 +- .../backend/spirv/emit_spirv_instructions.h | 6 +- .../frontend/translate/vector_memory.cpp | 194 +++++------- src/shader_recompiler/ir/ir_emitter.cpp | 43 ++- src/shader_recompiler/ir/ir_emitter.h | 26 +- src/shader_recompiler/ir/opcodes.h | 2 +- src/shader_recompiler/ir/opcodes.inc | 11 +- .../ir/passes/resource_tracking_pass.cpp | 296 +++++++++++------- src/shader_recompiler/ir/reg.h | 3 +- src/shader_recompiler/ir/value.h | 2 +- 10 files changed, 338 insertions(+), 272 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp index 8f062d6e..fc99b892 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp @@ -59,19 +59,22 @@ struct ImageOperands { } } - void AddDerivatives(EmitContext& ctx, Id derivatives) { - if (!Sirit::ValidId(derivatives)) { + void AddDerivatives(EmitContext& ctx, Id derivatives_dx, Id derivatives_dy) { + if (!Sirit::ValidId(derivatives_dx) || !Sirit::ValidId(derivatives_dy)) { return; } - const Id dx{ctx.OpVectorShuffle(ctx.F32[2], derivatives, derivatives, 0, 1)}; - const Id dy{ctx.OpVectorShuffle(ctx.F32[2], derivatives, derivatives, 2, 3)}; - Add(spv::ImageOperandsMask::Grad, dx, dy); + Add(spv::ImageOperandsMask::Grad, derivatives_dx, derivatives_dy); } spv::ImageOperandsMask mask{}; boost::container::static_vector operands; }; +Id EmitImageSampleRaw(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address1, Id address2, + Id address3, Id address4) { + UNREACHABLE_MSG("Unreachable instruction"); +} + Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id bias, const IR::Value& offset) { const auto& texture = ctx.images[handle & 0xFFFF]; @@ -114,7 +117,9 @@ Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, operands.AddOffset(ctx, offset); const Id sample = ctx.OpImageSampleDrefImplicitLod(result_type, sampled_image, coords, dref, operands.mask, operands.operands); - return texture.is_integer ? ctx.OpBitcast(ctx.F32[1], sample) : sample; + const Id sample_typed = texture.is_integer ? ctx.OpBitcast(ctx.F32[1], sample) : sample; + return ctx.OpCompositeConstruct(ctx.F32[4], sample_typed, ctx.f32_zero_value, + ctx.f32_zero_value, ctx.f32_zero_value); } Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id dref, @@ -129,7 +134,9 @@ Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, operands.Add(spv::ImageOperandsMask::Lod, lod); const Id sample = ctx.OpImageSampleDrefExplicitLod(result_type, sampled_image, coords, dref, operands.mask, operands.operands); - return texture.is_integer ? ctx.OpBitcast(ctx.F32[1], sample) : sample; + const Id sample_typed = texture.is_integer ? ctx.OpBitcast(ctx.F32[1], sample) : sample; + return ctx.OpCompositeConstruct(ctx.F32[4], sample_typed, ctx.f32_zero_value, + ctx.f32_zero_value, ctx.f32_zero_value); } Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, @@ -212,15 +219,15 @@ Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords) { return ctx.OpImageQueryLod(ctx.F32[2], sampled_image, coords); } -Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id derivatives, - const IR::Value& offset, Id lod_clamp) { +Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id derivatives_dx, + Id derivatives_dy, const IR::Value& offset, const IR::Value& lod_clamp) { const auto& texture = ctx.images[handle & 0xFFFF]; const Id image = ctx.OpLoad(texture.image_type, texture.id); const Id result_type = texture.data_types->Get(4); const Id sampler = ctx.OpLoad(ctx.sampler_type, ctx.samplers[handle >> 16]); const Id sampled_image = ctx.OpSampledImage(texture.sampled_type, image, sampler); ImageOperands operands; - operands.AddDerivatives(ctx, derivatives); + operands.AddDerivatives(ctx, derivatives_dx, derivatives_dy); operands.AddOffset(ctx, offset); const Id sample = ctx.OpImageSampleExplicitLod(result_type, sampled_image, coords, operands.mask, operands.operands); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index 6ae1ef24..02b98b34 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -368,6 +368,8 @@ Id EmitConvertF64U64(EmitContext& ctx, Id value); Id EmitConvertU16U32(EmitContext& ctx, Id value); Id EmitConvertU32U16(EmitContext& ctx, Id value); +Id EmitImageSampleRaw(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address1, Id address2, + Id address3, Id address4); Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id bias, const IR::Value& offset); Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id lod, @@ -384,8 +386,8 @@ Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, const Id lod, Id ms); Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, u32 handle, Id lod, bool skip_mips); Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords); -Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id derivatives, - const IR::Value& offset, Id lod_clamp); +Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id derivatives_dx, + Id derivatives_dy, const IR::Value& offset, const IR::Value& lod_clamp); Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords); void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id color); diff --git a/src/shader_recompiler/frontend/translate/vector_memory.cpp b/src/shader_recompiler/frontend/translate/vector_memory.cpp index e76ba6d8..b7ad3b36 100644 --- a/src/shader_recompiler/frontend/translate/vector_memory.cpp +++ b/src/shader_recompiler/frontend/translate/vector_memory.cpp @@ -411,7 +411,7 @@ void Translator::IMAGE_LOAD(bool has_mip, const GcnInst& inst) { ir.GetVectorReg(addr_reg + 2), ir.GetVectorReg(addr_reg + 3)); IR::TextureInstInfo info{}; - info.explicit_lod.Assign(has_mip); + info.has_lod.Assign(has_mip); const IR::Value texel = ir.ImageFetch(handle, body, {}, {}, {}, info); for (u32 i = 0; i < 4; i++) { @@ -513,6 +513,76 @@ void Translator::IMAGE_ATOMIC(AtomicOp op, const GcnInst& inst) { } } +IR::Value EmitImageSample(IR::IREmitter& ir, const GcnInst& inst, const IR::ScalarReg tsharp_reg, + const IR::ScalarReg sampler_reg, const IR::VectorReg addr_reg, + bool gather) { + const auto& mimg = inst.control.mimg; + const auto flags = MimgModifierFlags(mimg.mod); + + IR::TextureInstInfo info{}; + info.is_depth.Assign(flags.test(MimgModifier::Pcf)); + info.has_bias.Assign(flags.test(MimgModifier::LodBias)); + info.has_lod_clamp.Assign(flags.test(MimgModifier::LodClamp)); + info.force_level0.Assign(flags.test(MimgModifier::Level0)); + info.has_offset.Assign(flags.test(MimgModifier::Offset)); + info.has_lod.Assign(flags.any(MimgModifier::Lod)); + info.is_array.Assign(mimg.da); + + if (gather) { + info.gather_comp.Assign(std::bit_width(mimg.dmask) - 1); + info.is_gather.Assign(true); + } else { + info.has_derivatives.Assign(flags.test(MimgModifier::Derivative)); + } + + // Load first dword of T# and S#. We will use them as the handle that will guide resource + // tracking pass where to read the sharps. This will later also get patched to the SPIRV texture + // binding index. + const IR::Value handle = + ir.CompositeConstruct(ir.GetScalarReg(tsharp_reg), ir.GetScalarReg(sampler_reg)); + + // Determine how many address registers need to be passed. + // The image type is unknown, so add all 4 possible base registers and resolve later. + int num_addr_regs = 4; + if (info.has_offset) { + ++num_addr_regs; + } + if (info.has_bias) { + ++num_addr_regs; + } + if (info.is_depth) { + ++num_addr_regs; + } + if (info.has_derivatives) { + // The image type is unknown, so add all 6 possible derivative registers and resolve later. + num_addr_regs += 6; + } + + // Fetch all the address registers to pass in the IR instruction. There can be up to 13 + // registers. + const auto get_addr_reg = [&](int index) -> IR::F32 { + if (index >= num_addr_regs) { + return ir.Imm32(0.f); + } + return ir.GetVectorReg(addr_reg + index); + }; + const IR::Value address1 = + ir.CompositeConstruct(get_addr_reg(0), get_addr_reg(1), get_addr_reg(2), get_addr_reg(3)); + const IR::Value address2 = + ir.CompositeConstruct(get_addr_reg(4), get_addr_reg(5), get_addr_reg(6), get_addr_reg(7)); + const IR::Value address3 = + ir.CompositeConstruct(get_addr_reg(8), get_addr_reg(9), get_addr_reg(10), get_addr_reg(11)); + const IR::Value address4 = get_addr_reg(12); + + // Issue the placeholder IR instruction. + IR::Value texel = ir.ImageSampleRaw(handle, address1, address2, address3, address4, info); + if (info.is_depth && !gather) { + // For non-gather depth sampling, only return a single value. + texel = ir.CompositeExtract(texel, 0); + } + return texel; +} + void Translator::IMAGE_SAMPLE(const GcnInst& inst) { const auto& mimg = inst.control.mimg; IR::VectorReg addr_reg{inst.src[0].code}; @@ -521,72 +591,7 @@ void Translator::IMAGE_SAMPLE(const GcnInst& inst) { const IR::ScalarReg sampler_reg{inst.src[3].code * 4}; const auto flags = MimgModifierFlags(mimg.mod); - // Load first dword of T# and S#. We will use them as the handle that will guide resource - // tracking pass where to read the sharps. This will later also get patched to the SPIRV texture - // binding index. - const IR::Value handle = - ir.CompositeConstruct(ir.GetScalarReg(tsharp_reg), ir.GetScalarReg(sampler_reg)); - - // Load first address components as denoted in 8.2.4 VGPR Usage Sea Islands Series Instruction - // Set Architecture - const IR::U32 offset = - flags.test(MimgModifier::Offset) ? ir.GetVectorReg(addr_reg++) : IR::U32{}; - const IR::F32 bias = - flags.test(MimgModifier::LodBias) ? ir.GetVectorReg(addr_reg++) : IR::F32{}; - const IR::F32 dref = - flags.test(MimgModifier::Pcf) ? ir.GetVectorReg(addr_reg++) : IR::F32{}; - const IR::Value derivatives = [&] -> IR::Value { - if (!flags.test(MimgModifier::Derivative)) { - return {}; - } - addr_reg = addr_reg + 4; - return ir.CompositeConstruct( - ir.GetVectorReg(addr_reg - 4), ir.GetVectorReg(addr_reg - 3), - ir.GetVectorReg(addr_reg - 2), ir.GetVectorReg(addr_reg - 1)); - }(); - - // Now we can load body components as noted in Table 8.9 Image Opcodes with Sampler - // Since these are at most 4 dwords, we load them into a single uvec4 and place them - // in coords field of the instruction. Then the resource tracking pass will patch the - // IR instruction to fill in lod_clamp field. - const IR::Value body = ir.CompositeConstruct( - ir.GetVectorReg(addr_reg), ir.GetVectorReg(addr_reg + 1), - ir.GetVectorReg(addr_reg + 2), ir.GetVectorReg(addr_reg + 3)); - - // Derivatives are tricky because their number depends on the texture type which is located in - // T#. We don't have access to T# though until resource tracking pass. For now assume if - // derivatives are present, that a 2D image is bound. - const bool has_derivatives = flags.test(MimgModifier::Derivative); - const bool explicit_lod = flags.any(MimgModifier::Level0, MimgModifier::Lod); - - IR::TextureInstInfo info{}; - info.is_depth.Assign(flags.test(MimgModifier::Pcf)); - info.has_bias.Assign(flags.test(MimgModifier::LodBias)); - info.has_lod_clamp.Assign(flags.test(MimgModifier::LodClamp)); - info.force_level0.Assign(flags.test(MimgModifier::Level0)); - info.has_offset.Assign(flags.test(MimgModifier::Offset)); - info.explicit_lod.Assign(explicit_lod); - info.has_derivatives.Assign(has_derivatives); - info.is_array.Assign(mimg.da); - - // Issue IR instruction, leaving unknown fields blank to patch later. - const IR::Value texel = [&]() -> IR::Value { - if (has_derivatives) { - return ir.ImageGradient(handle, body, derivatives, offset, {}, info); - } - if (!flags.test(MimgModifier::Pcf)) { - if (explicit_lod) { - return ir.ImageSampleExplicitLod(handle, body, offset, info); - } else { - return ir.ImageSampleImplicitLod(handle, body, bias, offset, info); - } - } - if (explicit_lod) { - return ir.ImageSampleDrefExplicitLod(handle, body, dref, offset, info); - } - return ir.ImageSampleDrefImplicitLod(handle, body, dref, bias, offset, info); - }(); - + const IR::Value texel = EmitImageSample(ir, inst, tsharp_reg, sampler_reg, addr_reg, false); for (u32 i = 0; i < 4; i++) { if (((mimg.dmask >> i) & 1) == 0) { continue; @@ -609,60 +614,13 @@ void Translator::IMAGE_GATHER(const GcnInst& inst) { const IR::ScalarReg sampler_reg{inst.src[3].code * 4}; const auto flags = MimgModifierFlags(mimg.mod); - // Load first dword of T# and S#. We will use them as the handle that will guide resource - // tracking pass where to read the sharps. This will later also get patched to the SPIRV texture - // binding index. - const IR::Value handle = - ir.CompositeConstruct(ir.GetScalarReg(tsharp_reg), ir.GetScalarReg(sampler_reg)); - - // Load first address components as denoted in 8.2.4 VGPR Usage Sea Islands Series Instruction - // Set Architecture - const IR::Value offset = - flags.test(MimgModifier::Offset) ? ir.GetVectorReg(addr_reg++) : IR::Value{}; - const IR::F32 bias = - flags.test(MimgModifier::LodBias) ? ir.GetVectorReg(addr_reg++) : IR::F32{}; - const IR::F32 dref = - flags.test(MimgModifier::Pcf) ? ir.GetVectorReg(addr_reg++) : IR::F32{}; - - // Derivatives are tricky because their number depends on the texture type which is located in - // T#. We don't have access to T# though until resource tracking pass. For now assume no - // derivatives are present, otherwise we don't know where coordinates are placed in the address - // stream. - ASSERT_MSG(!flags.test(MimgModifier::Derivative), "Derivative image instruction"); - - // Now we can load body components as noted in Table 8.9 Image Opcodes with Sampler - // Since these are at most 4 dwords, we load them into a single uvec4 and place them - // in coords field of the instruction. Then the resource tracking pass will patch the - // IR instruction to fill in lod_clamp field. - const IR::Value body = ir.CompositeConstruct( - ir.GetVectorReg(addr_reg), ir.GetVectorReg(addr_reg + 1), - ir.GetVectorReg(addr_reg + 2), ir.GetVectorReg(addr_reg + 3)); - - const bool explicit_lod = flags.any(MimgModifier::Level0, MimgModifier::Lod); - - IR::TextureInstInfo info{}; - info.is_depth.Assign(flags.test(MimgModifier::Pcf)); - info.has_bias.Assign(flags.test(MimgModifier::LodBias)); - info.has_lod_clamp.Assign(flags.test(MimgModifier::LodClamp)); - info.force_level0.Assign(flags.test(MimgModifier::Level0)); - info.has_offset.Assign(flags.test(MimgModifier::Offset)); - // info.explicit_lod.Assign(explicit_lod); - info.gather_comp.Assign(std::bit_width(mimg.dmask) - 1); - info.is_array.Assign(mimg.da); - - // Issue IR instruction, leaving unknown fields blank to patch later. - const IR::Value texel = [&]() -> IR::Value { - const IR::F32 lod = flags.test(MimgModifier::Level0) ? ir.Imm32(0.f) : IR::F32{}; - if (!flags.test(MimgModifier::Pcf)) { - return ir.ImageGather(handle, body, offset, info); - } - ASSERT(mimg.dmask & 1); // should be always 1st (R) component - return ir.ImageGatherDref(handle, body, offset, dref, info); - }(); - // For gather4 instructions dmask selects which component to read and must have // only one bit set to 1 ASSERT_MSG(std::popcount(mimg.dmask) == 1, "Unexpected bits in gather dmask"); + // should be always 1st (R) component for depth + ASSERT(!flags.test(MimgModifier::Pcf) || mimg.dmask & 1); + + const IR::Value texel = EmitImageSample(ir, inst, tsharp_reg, sampler_reg, addr_reg, true); for (u32 i = 0; i < 4; i++) { const IR::F32 value = IR::F32{ir.CompositeExtract(texel, i)}; ir.SetVectorReg(dest_reg++, value); diff --git a/src/shader_recompiler/ir/ir_emitter.cpp b/src/shader_recompiler/ir/ir_emitter.cpp index 4f5eb5c3..e1b0eeed 100644 --- a/src/shader_recompiler/ir/ir_emitter.cpp +++ b/src/shader_recompiler/ir/ir_emitter.cpp @@ -1492,27 +1492,34 @@ Value IREmitter::ImageAtomicExchange(const Value& handle, const Value& coords, c return Inst(Opcode::ImageAtomicExchange32, Flags{info}, handle, coords, value); } -Value IREmitter::ImageSampleImplicitLod(const Value& handle, const Value& body, const F32& bias, - const U32& offset, TextureInstInfo info) { - return Inst(Opcode::ImageSampleImplicitLod, Flags{info}, handle, body, bias, offset); +Value IREmitter::ImageSampleRaw(const Value& handle, const Value& address1, const Value& address2, + const Value& address3, const Value& address4, + TextureInstInfo info) { + return Inst(Opcode::ImageSampleRaw, Flags{info}, handle, address1, address2, address3, + address4); } -Value IREmitter::ImageSampleExplicitLod(const Value& handle, const Value& body, const U32& offset, - TextureInstInfo info) { - return Inst(Opcode::ImageSampleExplicitLod, Flags{info}, handle, body, IR::F32{}, offset); +Value IREmitter::ImageSampleImplicitLod(const Value& handle, const Value& coords, const F32& bias, + const Value& offset, TextureInstInfo info) { + return Inst(Opcode::ImageSampleImplicitLod, Flags{info}, handle, coords, bias, offset); } -F32 IREmitter::ImageSampleDrefImplicitLod(const Value& handle, const Value& body, const F32& dref, - const F32& bias, const U32& offset, - TextureInstInfo info) { - return Inst(Opcode::ImageSampleDrefImplicitLod, Flags{info}, handle, body, dref, bias, - offset); +Value IREmitter::ImageSampleExplicitLod(const Value& handle, const Value& coords, const F32& lod, + const Value& offset, TextureInstInfo info) { + return Inst(Opcode::ImageSampleExplicitLod, Flags{info}, handle, coords, lod, offset); } -F32 IREmitter::ImageSampleDrefExplicitLod(const Value& handle, const Value& body, const F32& dref, - const U32& offset, TextureInstInfo info) { - return Inst(Opcode::ImageSampleDrefExplicitLod, Flags{info}, handle, body, dref, IR::F32{}, - offset); +Value IREmitter::ImageSampleDrefImplicitLod(const Value& handle, const Value& coords, + const F32& dref, const F32& bias, const Value& offset, + TextureInstInfo info) { + return Inst(Opcode::ImageSampleDrefImplicitLod, Flags{info}, handle, coords, dref, bias, + offset); +} + +Value IREmitter::ImageSampleDrefExplicitLod(const Value& handle, const Value& coords, + const F32& dref, const F32& lod, const Value& offset, + TextureInstInfo info) { + return Inst(Opcode::ImageSampleDrefExplicitLod, Flags{info}, handle, coords, dref, lod, offset); } Value IREmitter::ImageGather(const Value& handle, const Value& coords, const Value& offset, @@ -1544,9 +1551,11 @@ Value IREmitter::ImageQueryLod(const Value& handle, const Value& coords, Texture return Inst(Opcode::ImageQueryLod, Flags{info}, handle, coords); } -Value IREmitter::ImageGradient(const Value& handle, const Value& coords, const Value& derivatives, +Value IREmitter::ImageGradient(const Value& handle, const Value& coords, + const Value& derivatives_dx, const Value& derivatives_dy, const Value& offset, const F32& lod_clamp, TextureInstInfo info) { - return Inst(Opcode::ImageGradient, Flags{info}, handle, coords, derivatives, offset, lod_clamp); + return Inst(Opcode::ImageGradient, Flags{info}, handle, coords, derivatives_dx, derivatives_dy, + offset, lod_clamp); } Value IREmitter::ImageRead(const Value& handle, const Value& coords, TextureInstInfo info) { diff --git a/src/shader_recompiler/ir/ir_emitter.h b/src/shader_recompiler/ir/ir_emitter.h index 2ebac037..b3f51308 100644 --- a/src/shader_recompiler/ir/ir_emitter.h +++ b/src/shader_recompiler/ir/ir_emitter.h @@ -277,20 +277,25 @@ public: [[nodiscard]] Value ImageAtomicExchange(const Value& handle, const Value& coords, const Value& value, TextureInstInfo info); + [[nodiscard]] Value ImageSampleRaw(const Value& handle, const Value& address1, + const Value& address2, const Value& address3, + const Value& address4, TextureInstInfo info); + [[nodiscard]] Value ImageSampleImplicitLod(const Value& handle, const Value& body, - const F32& bias, const U32& offset, + const F32& bias, const Value& offset, TextureInstInfo info); [[nodiscard]] Value ImageSampleExplicitLod(const Value& handle, const Value& body, - const U32& offset, TextureInstInfo info); + const F32& lod, const Value& offset, + TextureInstInfo info); - [[nodiscard]] F32 ImageSampleDrefImplicitLod(const Value& handle, const Value& body, - const F32& dref, const F32& bias, - const U32& offset, TextureInstInfo info); + [[nodiscard]] Value ImageSampleDrefImplicitLod(const Value& handle, const Value& body, + const F32& dref, const F32& bias, + const Value& offset, TextureInstInfo info); - [[nodiscard]] F32 ImageSampleDrefExplicitLod(const Value& handle, const Value& body, - const F32& dref, const U32& offset, - TextureInstInfo info); + [[nodiscard]] Value ImageSampleDrefExplicitLod(const Value& handle, const Value& body, + const F32& dref, const F32& lod, + const Value& offset, TextureInstInfo info); [[nodiscard]] Value ImageQueryDimension(const Value& handle, const U32& lod, const U1& skip_mips); @@ -306,8 +311,9 @@ public: [[nodiscard]] Value ImageFetch(const Value& handle, const Value& coords, const Value& offset, const U32& lod, const U32& multisampling, TextureInstInfo info); [[nodiscard]] Value ImageGradient(const Value& handle, const Value& coords, - const Value& derivatives, const Value& offset, - const F32& lod_clamp, TextureInstInfo info); + const Value& derivatives_dx, const Value& derivatives_dy, + const Value& offset, const F32& lod_clamp, + TextureInstInfo info); [[nodiscard]] Value ImageRead(const Value& handle, const Value& coords, TextureInstInfo info); void ImageWrite(const Value& handle, const Value& coords, const Value& color, TextureInstInfo info); diff --git a/src/shader_recompiler/ir/opcodes.h b/src/shader_recompiler/ir/opcodes.h index 2cea7009..200d7f42 100644 --- a/src/shader_recompiler/ir/opcodes.h +++ b/src/shader_recompiler/ir/opcodes.h @@ -21,7 +21,7 @@ namespace Detail { struct OpcodeMeta { std::string_view name; Type type; - std::array arg_types; + std::array arg_types; }; // using enum Type; diff --git a/src/shader_recompiler/ir/opcodes.inc b/src/shader_recompiler/ir/opcodes.inc index 41e94ab1..51e10fb3 100644 --- a/src/shader_recompiler/ir/opcodes.inc +++ b/src/shader_recompiler/ir/opcodes.inc @@ -317,16 +317,17 @@ OPCODE(ConvertU16U32, U16, U32, OPCODE(ConvertU32U16, U32, U16, ) // Image operations -OPCODE(ImageSampleImplicitLod, F32x4, Opaque, Opaque, F32, Opaque, ) -OPCODE(ImageSampleExplicitLod, F32x4, Opaque, Opaque, U32, Opaque, ) -OPCODE(ImageSampleDrefImplicitLod, F32, Opaque, Opaque, Opaque, F32, Opaque, ) -OPCODE(ImageSampleDrefExplicitLod, F32, Opaque, Opaque, Opaque, U32, Opaque, ) +OPCODE(ImageSampleRaw, F32x4, Opaque, F32x4, F32x4, F32x4, F32, ) +OPCODE(ImageSampleImplicitLod, F32x4, Opaque, F32x4, F32, Opaque, ) +OPCODE(ImageSampleExplicitLod, F32x4, Opaque, Opaque, F32, Opaque, ) +OPCODE(ImageSampleDrefImplicitLod, F32x4, Opaque, Opaque, F32, F32, Opaque, ) +OPCODE(ImageSampleDrefExplicitLod, F32x4, Opaque, Opaque, F32, F32, Opaque, ) OPCODE(ImageGather, F32x4, Opaque, Opaque, Opaque, ) OPCODE(ImageGatherDref, F32x4, Opaque, Opaque, Opaque, F32, ) OPCODE(ImageFetch, F32x4, Opaque, Opaque, Opaque, U32, Opaque, ) OPCODE(ImageQueryDimensions, U32x4, Opaque, U32, U1, ) OPCODE(ImageQueryLod, F32x4, Opaque, Opaque, ) -OPCODE(ImageGradient, F32x4, Opaque, Opaque, Opaque, Opaque, Opaque, ) +OPCODE(ImageGradient, F32x4, Opaque, Opaque, Opaque, Opaque, Opaque, F32, ) OPCODE(ImageRead, U32x4, Opaque, Opaque, ) OPCODE(ImageWrite, Void, Opaque, Opaque, U32x4, ) diff --git a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp index db0d75f0..76ffec81 100644 --- a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp +++ b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp @@ -132,38 +132,16 @@ bool IsImageStorageInstruction(const IR::Inst& inst) { bool IsImageInstruction(const IR::Inst& inst) { switch (inst.GetOpcode()) { - case IR::Opcode::ImageSampleExplicitLod: - case IR::Opcode::ImageSampleImplicitLod: - case IR::Opcode::ImageSampleDrefExplicitLod: - case IR::Opcode::ImageSampleDrefImplicitLod: case IR::Opcode::ImageFetch: - case IR::Opcode::ImageGather: - case IR::Opcode::ImageGatherDref: case IR::Opcode::ImageQueryDimensions: case IR::Opcode::ImageQueryLod: - case IR::Opcode::ImageGradient: + case IR::Opcode::ImageSampleRaw: return true; default: return IsImageStorageInstruction(inst); } } -u32 ImageOffsetArgumentPosition(const IR::Inst& inst) { - switch (inst.GetOpcode()) { - case IR::Opcode::ImageGather: - case IR::Opcode::ImageGatherDref: - return 2; - case IR::Opcode::ImageSampleExplicitLod: - case IR::Opcode::ImageSampleImplicitLod: - return 3; - case IR::Opcode::ImageSampleDrefExplicitLod: - case IR::Opcode::ImageSampleDrefImplicitLod: - return 4; - default: - UNREACHABLE(); - } -} - class Descriptors { public: explicit Descriptors(Info& info_) @@ -467,6 +445,185 @@ IR::Value PatchCubeCoord(IR::IREmitter& ir, const IR::Value& s, const IR::Value& } } +void PatchImageSampleInstruction(IR::Block& block, IR::Inst& inst, Info& info, + Descriptors& descriptors, const IR::Inst* producer, + const u32 image_binding, const AmdGpu::Image& image) { + // Read sampler sharp. This doesn't exist for IMAGE_LOAD/IMAGE_STORE instructions + const u32 sampler_binding = [&] { + ASSERT(producer->GetOpcode() == IR::Opcode::CompositeConstructU32x2); + const IR::Value& handle = producer->Arg(1); + // Inline sampler resource. + if (handle.IsImmediate()) { + LOG_WARNING(Render_Vulkan, "Inline sampler detected"); + return descriptors.Add(SamplerResource{ + .sgpr_base = std::numeric_limits::max(), + .dword_offset = 0, + .inline_sampler = AmdGpu::Sampler{.raw0 = handle.U32()}, + }); + } + // Normal sampler resource. + const auto ssharp_handle = handle.InstRecursive(); + const auto& [ssharp_ud, disable_aniso] = TryDisableAnisoLod0(ssharp_handle); + const auto ssharp = TrackSharp(ssharp_ud); + return descriptors.Add(SamplerResource{ + .sgpr_base = ssharp.sgpr_base, + .dword_offset = ssharp.dword_offset, + .associated_image = image_binding, + .disable_aniso = disable_aniso, + }); + }(); + + IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; + + const auto inst_info = inst.Flags(); + const IR::U32 handle = ir.Imm32(image_binding | sampler_binding << 16); + + IR::Inst* body1 = inst.Arg(1).InstRecursive(); + IR::Inst* body2 = inst.Arg(2).InstRecursive(); + IR::Inst* body3 = inst.Arg(3).InstRecursive(); + IR::Inst* body4 = inst.Arg(4).InstRecursive(); + const auto get_addr_reg = [&](u32 index) -> IR::F32 { + if (index <= 3) { + return IR::F32{body1->Arg(index)}; + } + if (index >= 4 && index <= 7) { + return IR::F32{body2->Arg(index - 4)}; + } + if (index >= 8 && index <= 11) { + return IR::F32{body3->Arg(index - 8)}; + } + if (index == 12) { + return IR::F32{body4}; + } + UNREACHABLE(); + }; + u32 addr_reg = 0; + + // Load first address components as denoted in 8.2.4 VGPR Usage Sea Islands Series Instruction + // Set Architecture + const IR::Value offset = [&] -> IR::Value { + if (!inst_info.has_offset) { + return IR::U32{}; + } + + // The offsets are six-bit signed integers: X=[5:0], Y=[13:8], and Z=[21:16]. + const IR::Value arg = get_addr_reg(addr_reg++); + + const auto read = [&](u32 off) -> IR::U32 { + if (arg.IsImmediate()) { + const u16 comp = (arg.U32() >> off) & 0x3F; + return ir.Imm32(s32(comp << 26) >> 26); + } + return ir.BitFieldExtract(IR::U32{arg}, ir.Imm32(off), ir.Imm32(6), true); + }; + + switch (image.GetType()) { + case AmdGpu::ImageType::Color1D: + case AmdGpu::ImageType::Color1DArray: + return read(0); + case AmdGpu::ImageType::Color2D: + case AmdGpu::ImageType::Color2DArray: + case AmdGpu::ImageType::Color2DMsaa: + return ir.CompositeConstruct(read(0), read(8)); + case AmdGpu::ImageType::Color3D: + case AmdGpu::ImageType::Cube: + return ir.CompositeConstruct(read(0), read(8), read(16)); + default: + UNREACHABLE(); + } + }(); + const IR::F32 bias = inst_info.has_bias ? get_addr_reg(addr_reg++) : IR::F32{}; + const IR::F32 dref = inst_info.is_depth ? get_addr_reg(addr_reg++) : IR::F32{}; + const auto [derivatives_dx, derivatives_dy] = [&] -> std::pair { + if (!inst_info.has_derivatives) { + return {}; + } + switch (image.GetType()) { + case AmdGpu::ImageType::Color1D: + case AmdGpu::ImageType::Color1DArray: + // du/dx, du/dy + addr_reg = addr_reg + 2; + return {get_addr_reg(addr_reg - 2), get_addr_reg(addr_reg - 1)}; + case AmdGpu::ImageType::Color2D: + case AmdGpu::ImageType::Color2DArray: + case AmdGpu::ImageType::Color2DMsaa: + // (du/dx, dv/dx), (du/dy, dv/dy) + addr_reg = addr_reg + 4; + return {ir.CompositeConstruct(get_addr_reg(addr_reg - 4), get_addr_reg(addr_reg - 3)), + ir.CompositeConstruct(get_addr_reg(addr_reg - 2), get_addr_reg(addr_reg - 1))}; + case AmdGpu::ImageType::Color3D: + case AmdGpu::ImageType::Cube: + // (du/dx, dv/dx, dw/dx), (du/dy, dv/dy, dw/dy) + addr_reg = addr_reg + 6; + return {ir.CompositeConstruct(get_addr_reg(addr_reg - 6), get_addr_reg(addr_reg - 5), + get_addr_reg(addr_reg - 4)), + ir.CompositeConstruct(get_addr_reg(addr_reg - 3), get_addr_reg(addr_reg - 2), + get_addr_reg(addr_reg - 1))}; + default: + UNREACHABLE(); + } + }(); + + // Now we can load body components as noted in Table 8.9 Image Opcodes with Sampler + const IR::Value coords = [&] -> IR::Value { + switch (image.GetType()) { + case AmdGpu::ImageType::Color1D: // x + addr_reg = addr_reg + 1; + return get_addr_reg(addr_reg - 1); + case AmdGpu::ImageType::Color1DArray: // x, slice + [[fallthrough]]; + case AmdGpu::ImageType::Color2D: // x, y + addr_reg = addr_reg + 2; + return ir.CompositeConstruct(get_addr_reg(addr_reg - 2), get_addr_reg(addr_reg - 1)); + case AmdGpu::ImageType::Color2DArray: // x, y, slice + [[fallthrough]]; + case AmdGpu::ImageType::Color2DMsaa: // x, y, frag + [[fallthrough]]; + case AmdGpu::ImageType::Color3D: // x, y, z + addr_reg = addr_reg + 3; + return ir.CompositeConstruct(get_addr_reg(addr_reg - 3), get_addr_reg(addr_reg - 2), + get_addr_reg(addr_reg - 1)); + case AmdGpu::ImageType::Cube: // x, y, face + addr_reg = addr_reg + 3; + return PatchCubeCoord(ir, get_addr_reg(addr_reg - 3), get_addr_reg(addr_reg - 2), + get_addr_reg(addr_reg - 1), false, inst_info.is_array); + default: + UNREACHABLE(); + } + }(); + + ASSERT(!inst_info.has_lod || !inst_info.has_lod_clamp); + const bool explicit_lod = inst_info.has_lod || inst_info.force_level0; + const IR::F32 lod = inst_info.has_lod ? get_addr_reg(addr_reg++) + : inst_info.force_level0 ? ir.Imm32(0.0f) + : IR::F32{}; + const IR::F32 lod_clamp = inst_info.has_lod_clamp ? get_addr_reg(addr_reg++) : IR::F32{}; + + const auto new_inst = [&] -> IR::Value { + if (inst_info.is_gather) { + if (inst_info.is_depth) { + return ir.ImageGatherDref(handle, coords, offset, dref, inst_info); + } + return ir.ImageGather(handle, coords, offset, inst_info); + } + if (inst_info.has_derivatives) { + return ir.ImageGradient(handle, coords, derivatives_dx, derivatives_dy, offset, + lod_clamp, inst_info); + } + if (inst_info.is_depth) { + if (explicit_lod) { + return ir.ImageSampleDrefExplicitLod(handle, coords, dref, lod, offset, inst_info); + } + return ir.ImageSampleDrefImplicitLod(handle, coords, dref, bias, offset, inst_info); + } + if (explicit_lod) { + return ir.ImageSampleExplicitLod(handle, coords, lod, offset, inst_info); + } + return ir.ImageSampleImplicitLod(handle, coords, bias, offset, inst_info); + }(); + inst.ReplaceUsesWith(new_inst); +} + void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& descriptors) { const auto pred = [](const IR::Inst* inst) -> std::optional { const auto opcode = inst->GetOpcode(); @@ -498,40 +655,18 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip .sgpr_base = tsharp.sgpr_base, .dword_offset = tsharp.dword_offset, .type = type, - .nfmt = static_cast(image.GetNumberFmt()), + .nfmt = image.GetNumberFmt(), .is_storage = is_storage, .is_depth = bool(inst_info.is_depth), .is_atomic = IsImageAtomicInstruction(inst), .is_array = bool(inst_info.is_array), }); - // Read sampler sharp. This doesn't exist for IMAGE_LOAD/IMAGE_STORE instructions - const u32 sampler_binding = [&] { - if (!has_sampler) { - return 0U; - } - const IR::Value& handle = producer->Arg(1); - // Inline sampler resource. - if (handle.IsImmediate()) { - LOG_WARNING(Render_Vulkan, "Inline sampler detected"); - return descriptors.Add(SamplerResource{ - .sgpr_base = std::numeric_limits::max(), - .dword_offset = 0, - .inline_sampler = AmdGpu::Sampler{.raw0 = handle.U32()}, - }); - } - // Normal sampler resource. - const auto ssharp_handle = handle.InstRecursive(); - const auto& [ssharp_ud, disable_aniso] = TryDisableAnisoLod0(ssharp_handle); - const auto ssharp = TrackSharp(ssharp_ud); - return descriptors.Add(SamplerResource{ - .sgpr_base = ssharp.sgpr_base, - .dword_offset = ssharp.dword_offset, - .associated_image = image_binding, - .disable_aniso = disable_aniso, - }); - }(); - image_binding |= (sampler_binding << 16); + // Sample instructions must be resolved into a new instruction using address register data. + if (inst.GetOpcode() == IR::Opcode::ImageSampleRaw) { + PatchImageSampleInstruction(block, inst, info, descriptors, producer, image_binding, image); + return; + } // Patch image handle IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; @@ -568,62 +703,9 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip }(); inst.SetArg(1, coords); - if (inst_info.has_offset) { - // The offsets are six-bit signed integers: X=[5:0], Y=[13:8], and Z=[21:16]. - const u32 arg_pos = ImageOffsetArgumentPosition(inst); - const IR::Value arg = inst.Arg(arg_pos); - ASSERT_MSG(arg.Type() == IR::Type::U32, "Unexpected offset type"); - - const auto read = [&](u32 offset) -> IR::U32 { - if (arg.IsImmediate()) { - const u16 comp = (arg.U32() >> offset) & 0x3F; - return ir.Imm32(s32(comp << 26) >> 26); - } - return ir.BitFieldExtract(IR::U32{arg}, ir.Imm32(offset), ir.Imm32(6), true); - }; - - switch (image.GetType()) { - case AmdGpu::ImageType::Color1D: - case AmdGpu::ImageType::Color1DArray: - inst.SetArg(arg_pos, read(0)); - break; - case AmdGpu::ImageType::Color2D: - case AmdGpu::ImageType::Color2DArray: - inst.SetArg(arg_pos, ir.CompositeConstruct(read(0), read(8))); - break; - case AmdGpu::ImageType::Color3D: - inst.SetArg(arg_pos, ir.CompositeConstruct(read(0), read(8), read(16))); - break; - default: - UNREACHABLE(); - } - } - if (inst_info.has_derivatives) { - ASSERT_MSG(image.GetType() == AmdGpu::ImageType::Color2D || - image.GetType() == AmdGpu::ImageType::Color2DArray, - "User derivatives only supported for 2D images"); - } - if (inst_info.has_lod_clamp) { - const u32 arg_pos = [&]() -> u32 { - switch (inst.GetOpcode()) { - case IR::Opcode::ImageSampleImplicitLod: - return 2; - case IR::Opcode::ImageSampleDrefImplicitLod: - return 3; - default: - break; - } - return inst_info.is_depth ? 5 : 4; - }(); - inst.SetArg(arg_pos, arg); - } - if (inst_info.explicit_lod) { - ASSERT(inst.GetOpcode() == IR::Opcode::ImageFetch || - inst.GetOpcode() == IR::Opcode::ImageSampleExplicitLod || - inst.GetOpcode() == IR::Opcode::ImageSampleDrefExplicitLod); - const u32 pos = inst.GetOpcode() == IR::Opcode::ImageSampleExplicitLod ? 2 : 3; - const IR::Value value = inst_info.force_level0 ? ir.Imm32(0.f) : arg; - inst.SetArg(pos, value); + if (inst_info.has_lod) { + ASSERT(inst.GetOpcode() == IR::Opcode::ImageFetch); + inst.SetArg(3, arg); } } diff --git a/src/shader_recompiler/ir/reg.h b/src/shader_recompiler/ir/reg.h index 5facaf5c..d7c0b1db 100644 --- a/src/shader_recompiler/ir/reg.h +++ b/src/shader_recompiler/ir/reg.h @@ -33,11 +33,12 @@ union TextureInstInfo { BitField<1, 1, u32> has_bias; BitField<2, 1, u32> has_lod_clamp; BitField<3, 1, u32> force_level0; - BitField<4, 1, u32> explicit_lod; + BitField<4, 1, u32> has_lod; BitField<5, 1, u32> has_offset; BitField<6, 2, u32> gather_comp; BitField<8, 1, u32> has_derivatives; BitField<9, 1, u32> is_array; + BitField<10, 1, u32> is_gather; }; union BufferInstInfo { diff --git a/src/shader_recompiler/ir/value.h b/src/shader_recompiler/ir/value.h index 060b9d2b..a282b916 100644 --- a/src/shader_recompiler/ir/value.h +++ b/src/shader_recompiler/ir/value.h @@ -209,7 +209,7 @@ private: union { NonTriviallyDummy dummy{}; boost::container::small_vector, 2> phi_args; - std::array args; + std::array args; }; }; static_assert(sizeof(Inst) <= 128, "Inst size unintentionally increased");