From 028be3ba5d7da1a0782c053f43cf606c78d9b71b Mon Sep 17 00:00:00 2001 From: squidbus <175574877+squidbus@users.noreply.github.com> Date: Fri, 13 Dec 2024 11:49:07 -0800 Subject: [PATCH] shader_recompiler: Emulate unnormalized sampler coordinates in shader. (#1762) * shader_recompiler: Emulate unnormalized sampler coordinates in shader. * Address review comments. --- .../spirv/emit_spirv_floating_point.cpp | 8 ++++ .../backend/spirv/emit_spirv_instructions.h | 2 + .../frontend/translate/vector_memory.cpp | 1 + src/shader_recompiler/ir/ir_emitter.cpp | 14 +++++++ src/shader_recompiler/ir/ir_emitter.h | 1 + src/shader_recompiler/ir/opcodes.inc | 2 + .../ir/passes/resource_tracking_pass.cpp | 41 ++++++++++++++----- src/shader_recompiler/ir/reg.h | 3 +- src/shader_recompiler/specialization.h | 16 ++++++++ src/video_core/texture_cache/sampler.cpp | 2 +- 10 files changed, 78 insertions(+), 12 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp index e822eabe..1e8f31dd 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp @@ -87,6 +87,14 @@ Id EmitFPMul64(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { return Decorate(ctx, inst, ctx.OpFMul(ctx.F64[1], a, b)); } +Id EmitFPDiv32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { + return Decorate(ctx, inst, ctx.OpFDiv(ctx.F32[1], a, b)); +} + +Id EmitFPDiv64(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { + return Decorate(ctx, inst, ctx.OpFDiv(ctx.F64[1], a, b)); +} + Id EmitFPNeg16(EmitContext& ctx, Id value) { return ctx.OpFNegate(ctx.F16[1], value); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index cc3db880..071b430d 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -189,6 +189,8 @@ Id EmitFPMin64(EmitContext& ctx, Id a, Id b); Id EmitFPMul16(EmitContext& ctx, IR::Inst* inst, Id a, Id b); Id EmitFPMul32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); Id EmitFPMul64(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitFPDiv32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitFPDiv64(EmitContext& ctx, IR::Inst* inst, Id a, Id b); Id EmitFPNeg16(EmitContext& ctx, Id value); Id EmitFPNeg32(EmitContext& ctx, Id value); Id EmitFPNeg64(EmitContext& ctx, Id value); diff --git a/src/shader_recompiler/frontend/translate/vector_memory.cpp b/src/shader_recompiler/frontend/translate/vector_memory.cpp index b7ad3b36..74b9c905 100644 --- a/src/shader_recompiler/frontend/translate/vector_memory.cpp +++ b/src/shader_recompiler/frontend/translate/vector_memory.cpp @@ -527,6 +527,7 @@ IR::Value EmitImageSample(IR::IREmitter& ir, const GcnInst& inst, const IR::Scal info.has_offset.Assign(flags.test(MimgModifier::Offset)); info.has_lod.Assign(flags.any(MimgModifier::Lod)); info.is_array.Assign(mimg.da); + info.is_unnormalized.Assign(mimg.unrm); if (gather) { info.gather_comp.Assign(std::bit_width(mimg.dmask) - 1); diff --git a/src/shader_recompiler/ir/ir_emitter.cpp b/src/shader_recompiler/ir/ir_emitter.cpp index 78e7f228..5fa20b74 100644 --- a/src/shader_recompiler/ir/ir_emitter.cpp +++ b/src/shader_recompiler/ir/ir_emitter.cpp @@ -692,6 +692,20 @@ F32F64 IREmitter::FPMul(const F32F64& a, const F32F64& b) { } } +F32F64 IREmitter::FPDiv(const F32F64& a, const F32F64& b) { + if (a.Type() != b.Type()) { + UNREACHABLE_MSG("Mismatching types {} and {}", a.Type(), b.Type()); + } + switch (a.Type()) { + case Type::F32: + return Inst(Opcode::FPDiv32, a, b); + case Type::F64: + return Inst(Opcode::FPDiv64, a, b); + default: + ThrowInvalidType(a.Type()); + } +} + F32F64 IREmitter::FPFma(const F32F64& a, const F32F64& b, const F32F64& c) { if (a.Type() != b.Type() || a.Type() != c.Type()) { UNREACHABLE_MSG("Mismatching types {}, {}, and {}", a.Type(), b.Type(), c.Type()); diff --git a/src/shader_recompiler/ir/ir_emitter.h b/src/shader_recompiler/ir/ir_emitter.h index cbd3780d..e6608cba 100644 --- a/src/shader_recompiler/ir/ir_emitter.h +++ b/src/shader_recompiler/ir/ir_emitter.h @@ -158,6 +158,7 @@ public: [[nodiscard]] F32F64 FPAdd(const F32F64& a, const F32F64& b); [[nodiscard]] F32F64 FPSub(const F32F64& a, const F32F64& b); [[nodiscard]] F32F64 FPMul(const F32F64& a, const F32F64& b); + [[nodiscard]] F32F64 FPDiv(const F32F64& a, const F32F64& b); [[nodiscard]] F32F64 FPFma(const F32F64& a, const F32F64& b, const F32F64& c); [[nodiscard]] F32F64 FPAbs(const F32F64& value); diff --git a/src/shader_recompiler/ir/opcodes.inc b/src/shader_recompiler/ir/opcodes.inc index 0283ccd0..60232a3a 100644 --- a/src/shader_recompiler/ir/opcodes.inc +++ b/src/shader_recompiler/ir/opcodes.inc @@ -184,6 +184,8 @@ OPCODE(FPMin32, F32, F32, OPCODE(FPMin64, F64, F64, F64, ) OPCODE(FPMul32, F32, F32, F32, ) OPCODE(FPMul64, F64, F64, F64, ) +OPCODE(FPDiv32, F32, F32, F32, ) +OPCODE(FPDiv64, F64, F64, F64, ) OPCODE(FPNeg32, F32, F32, ) OPCODE(FPNeg64, F64, F64, ) OPCODE(FPRecip32, F32, F32, ) diff --git a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp index 89c5c78a..99585104 100644 --- a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp +++ b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp @@ -420,26 +420,29 @@ void PatchImageSampleInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& descriptors, const IR::Inst* producer, const u32 image_binding, const AmdGpu::Image& image) { // Read sampler sharp. This doesn't exist for IMAGE_LOAD/IMAGE_STORE instructions - const u32 sampler_binding = [&] { + const auto [sampler_binding, sampler] = [&] -> std::pair { ASSERT(producer->GetOpcode() == IR::Opcode::CompositeConstructU32x2); const IR::Value& handle = producer->Arg(1); // Inline sampler resource. if (handle.IsImmediate()) { LOG_WARNING(Render_Vulkan, "Inline sampler detected"); - return descriptors.Add(SamplerResource{ + const auto inline_sampler = AmdGpu::Sampler{.raw0 = handle.U32()}; + const auto binding = descriptors.Add(SamplerResource{ .sharp_idx = std::numeric_limits::max(), - .inline_sampler = AmdGpu::Sampler{.raw0 = handle.U32()}, + .inline_sampler = inline_sampler, }); + return {binding, inline_sampler}; } // Normal sampler resource. const auto ssharp_handle = handle.InstRecursive(); const auto& [ssharp_ud, disable_aniso] = TryDisableAnisoLod0(ssharp_handle); const auto ssharp = TrackSharp(ssharp_ud, info); - return descriptors.Add(SamplerResource{ + const auto binding = descriptors.Add(SamplerResource{ .sharp_idx = ssharp, .associated_image = image_binding, .disable_aniso = disable_aniso, }); + return {binding, info.ReadUdSharp(ssharp)}; }(); IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; @@ -539,28 +542,46 @@ void PatchImageSampleInstruction(IR::Block& block, IR::Inst& inst, Info& info, } }(); + const auto unnormalized = sampler.force_unnormalized || inst_info.is_unnormalized; + // Query dimensions of image if needed for normalization. + // We can't use the image sharp because it could be bound to a different image later. + const auto dimensions = + unnormalized ? ir.ImageQueryDimension(ir.Imm32(image_binding), ir.Imm32(0u), ir.Imm1(false)) + : IR::Value{}; + const auto get_coord = [&](u32 idx, u32 dim_idx) -> IR::Value { + const auto coord = get_addr_reg(idx); + if (unnormalized) { + // Normalize the coordinate for sampling, dividing by its corresponding dimension. + return ir.FPDiv(coord, + ir.BitCast(IR::U32{ir.CompositeExtract(dimensions, dim_idx)})); + } + return coord; + }; + // Now we can load body components as noted in Table 8.9 Image Opcodes with Sampler const IR::Value coords = [&] -> IR::Value { switch (image.GetType()) { case AmdGpu::ImageType::Color1D: // x addr_reg = addr_reg + 1; - return get_addr_reg(addr_reg - 1); + return get_coord(addr_reg - 1, 0); case AmdGpu::ImageType::Color1DArray: // x, slice [[fallthrough]]; case AmdGpu::ImageType::Color2D: // x, y addr_reg = addr_reg + 2; - return ir.CompositeConstruct(get_addr_reg(addr_reg - 2), get_addr_reg(addr_reg - 1)); + return ir.CompositeConstruct(get_coord(addr_reg - 2, 0), get_coord(addr_reg - 1, 1)); case AmdGpu::ImageType::Color2DArray: // x, y, slice [[fallthrough]]; case AmdGpu::ImageType::Color2DMsaa: // x, y, frag - [[fallthrough]]; + addr_reg = addr_reg + 3; + return ir.CompositeConstruct(get_coord(addr_reg - 3, 0), get_coord(addr_reg - 2, 1), + get_addr_reg(addr_reg - 1)); case AmdGpu::ImageType::Color3D: // x, y, z addr_reg = addr_reg + 3; - return ir.CompositeConstruct(get_addr_reg(addr_reg - 3), get_addr_reg(addr_reg - 2), - get_addr_reg(addr_reg - 1)); + return ir.CompositeConstruct(get_coord(addr_reg - 3, 0), get_coord(addr_reg - 2, 1), + get_coord(addr_reg - 1, 2)); case AmdGpu::ImageType::Cube: // x, y, face addr_reg = addr_reg + 3; - return PatchCubeCoord(ir, get_addr_reg(addr_reg - 3), get_addr_reg(addr_reg - 2), + return PatchCubeCoord(ir, get_coord(addr_reg - 3, 0), get_coord(addr_reg - 2, 1), get_addr_reg(addr_reg - 1), false, inst_info.is_array); default: UNREACHABLE(); diff --git a/src/shader_recompiler/ir/reg.h b/src/shader_recompiler/ir/reg.h index 3004d2b8..ca2e9ceb 100644 --- a/src/shader_recompiler/ir/reg.h +++ b/src/shader_recompiler/ir/reg.h @@ -40,7 +40,8 @@ union TextureInstInfo { BitField<6, 2, u32> gather_comp; BitField<8, 1, u32> has_derivatives; BitField<9, 1, u32> is_array; - BitField<10, 1, u32> is_gather; + BitField<10, 1, u32> is_unnormalized; + BitField<11, 1, u32> is_gather; }; union BufferInstInfo { diff --git a/src/shader_recompiler/specialization.h b/src/shader_recompiler/specialization.h index 2a3bd62f..bc8627c1 100644 --- a/src/shader_recompiler/specialization.h +++ b/src/shader_recompiler/specialization.h @@ -49,6 +49,12 @@ struct FMaskSpecialization { auto operator<=>(const FMaskSpecialization&) const = default; }; +struct SamplerSpecialization { + bool force_unnormalized = false; + + auto operator<=>(const SamplerSpecialization&) const = default; +}; + /** * Alongside runtime information, this structure also checks bound resources * for compatibility. Can be used as a key for storing shader permutations. @@ -67,6 +73,7 @@ struct StageSpecialization { boost::container::small_vector tex_buffers; boost::container::small_vector images; boost::container::small_vector fmasks; + boost::container::small_vector samplers; Backend::Bindings start{}; explicit StageSpecialization(const Info& info_, RuntimeInfo runtime_info_, @@ -107,6 +114,10 @@ struct StageSpecialization { spec.width = sharp.width; spec.height = sharp.height; }); + ForEachSharp(samplers, info->samplers, + [](auto& spec, const auto& desc, AmdGpu::Sampler sharp) { + spec.force_unnormalized = sharp.force_unnormalized; + }); } void ForEachSharp(auto& spec_list, auto& desc_list, auto&& func) { @@ -175,6 +186,11 @@ struct StageSpecialization { return false; } } + for (u32 i = 0; i < samplers.size(); i++) { + if (samplers[i] != other.samplers[i]) { + return false; + } + } return true; } }; diff --git a/src/video_core/texture_cache/sampler.cpp b/src/video_core/texture_cache/sampler.cpp index e47f53ab..9f4bc7a7 100644 --- a/src/video_core/texture_cache/sampler.cpp +++ b/src/video_core/texture_cache/sampler.cpp @@ -25,7 +25,7 @@ Sampler::Sampler(const Vulkan::Instance& instance, const AmdGpu::Sampler& sample .minLod = sampler.MinLod(), .maxLod = sampler.MaxLod(), .borderColor = LiverpoolToVK::BorderColor(sampler.border_color_type), - .unnormalizedCoordinates = bool(sampler.force_unnormalized), + .unnormalizedCoordinates = false, // Handled in shader due to Vulkan limitations. }; auto [sampler_result, smplr] = instance.GetDevice().createSamplerUnique(sampler_ci); ASSERT_MSG(sampler_result == vk::Result::eSuccess, "Failed to create sampler: {}",