From 028be3ba5d7da1a0782c053f43cf606c78d9b71b Mon Sep 17 00:00:00 2001
From: squidbus <175574877+squidbus@users.noreply.github.com>
Date: Fri, 13 Dec 2024 11:49:07 -0800
Subject: [PATCH] shader_recompiler: Emulate unnormalized sampler coordinates
 in shader. (#1762)

* shader_recompiler: Emulate unnormalized sampler coordinates in shader.

* Address review comments.
---
 .../spirv/emit_spirv_floating_point.cpp       |  8 ++++
 .../backend/spirv/emit_spirv_instructions.h   |  2 +
 .../frontend/translate/vector_memory.cpp      |  1 +
 src/shader_recompiler/ir/ir_emitter.cpp       | 14 +++++++
 src/shader_recompiler/ir/ir_emitter.h         |  1 +
 src/shader_recompiler/ir/opcodes.inc          |  2 +
 .../ir/passes/resource_tracking_pass.cpp      | 41 ++++++++++++++-----
 src/shader_recompiler/ir/reg.h                |  3 +-
 src/shader_recompiler/specialization.h        | 16 ++++++++
 src/video_core/texture_cache/sampler.cpp      |  2 +-
 10 files changed, 78 insertions(+), 12 deletions(-)
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp
index e822eabe..1e8f31dd 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp
@@ -87,6 +87,14 @@ Id EmitFPMul64(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
     return Decorate(ctx, inst, ctx.OpFMul(ctx.F64[1], a, b));
 }
 
+Id EmitFPDiv32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
+    return Decorate(ctx, inst, ctx.OpFDiv(ctx.F32[1], a, b));
+}
+
+Id EmitFPDiv64(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
+    return Decorate(ctx, inst, ctx.OpFDiv(ctx.F64[1], a, b));
+}
+
 Id EmitFPNeg16(EmitContext& ctx, Id value) {
     return ctx.OpFNegate(ctx.F16[1], value);
 }
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h
index cc3db880..071b430d 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h
@@ -189,6 +189,8 @@ Id EmitFPMin64(EmitContext& ctx, Id a, Id b);
 Id EmitFPMul16(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
 Id EmitFPMul32(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
 Id EmitFPMul64(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
+Id EmitFPDiv32(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
+Id EmitFPDiv64(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
 Id EmitFPNeg16(EmitContext& ctx, Id value);
 Id EmitFPNeg32(EmitContext& ctx, Id value);
 Id EmitFPNeg64(EmitContext& ctx, Id value);
diff --git a/src/shader_recompiler/frontend/translate/vector_memory.cpp b/src/shader_recompiler/frontend/translate/vector_memory.cpp
index b7ad3b36..74b9c905 100644
--- a/src/shader_recompiler/frontend/translate/vector_memory.cpp
+++ b/src/shader_recompiler/frontend/translate/vector_memory.cpp
@@ -527,6 +527,7 @@ IR::Value EmitImageSample(IR::IREmitter& ir, const GcnInst& inst, const IR::Scal
     info.has_offset.Assign(flags.test(MimgModifier::Offset));
     info.has_lod.Assign(flags.any(MimgModifier::Lod));
     info.is_array.Assign(mimg.da);
+    info.is_unnormalized.Assign(mimg.unrm);
 
     if (gather) {
         info.gather_comp.Assign(std::bit_width(mimg.dmask) - 1);
diff --git a/src/shader_recompiler/ir/ir_emitter.cpp b/src/shader_recompiler/ir/ir_emitter.cpp
index 78e7f228..5fa20b74 100644
--- a/src/shader_recompiler/ir/ir_emitter.cpp
+++ b/src/shader_recompiler/ir/ir_emitter.cpp
@@ -692,6 +692,20 @@ F32F64 IREmitter::FPMul(const F32F64& a, const F32F64& b) {
     }
 }
 
+F32F64 IREmitter::FPDiv(const F32F64& a, const F32F64& b) {
+    if (a.Type() != b.Type()) {
+        UNREACHABLE_MSG("Mismatching types {} and {}", a.Type(), b.Type());
+    }
+    switch (a.Type()) {
+    case Type::F32:
+        return Inst<F32>(Opcode::FPDiv32, a, b);
+    case Type::F64:
+        return Inst<F64>(Opcode::FPDiv64, a, b);
+    default:
+        ThrowInvalidType(a.Type());
+    }
+}
+
 F32F64 IREmitter::FPFma(const F32F64& a, const F32F64& b, const F32F64& c) {
     if (a.Type() != b.Type() || a.Type() != c.Type()) {
         UNREACHABLE_MSG("Mismatching types {}, {}, and {}", a.Type(), b.Type(), c.Type());
diff --git a/src/shader_recompiler/ir/ir_emitter.h b/src/shader_recompiler/ir/ir_emitter.h
index cbd3780d..e6608cba 100644
--- a/src/shader_recompiler/ir/ir_emitter.h
+++ b/src/shader_recompiler/ir/ir_emitter.h
@@ -158,6 +158,7 @@ public:
     [[nodiscard]] F32F64 FPAdd(const F32F64& a, const F32F64& b);
     [[nodiscard]] F32F64 FPSub(const F32F64& a, const F32F64& b);
     [[nodiscard]] F32F64 FPMul(const F32F64& a, const F32F64& b);
+    [[nodiscard]] F32F64 FPDiv(const F32F64& a, const F32F64& b);
     [[nodiscard]] F32F64 FPFma(const F32F64& a, const F32F64& b, const F32F64& c);
 
     [[nodiscard]] F32F64 FPAbs(const F32F64& value);
diff --git a/src/shader_recompiler/ir/opcodes.inc b/src/shader_recompiler/ir/opcodes.inc
index 0283ccd0..60232a3a 100644
--- a/src/shader_recompiler/ir/opcodes.inc
+++ b/src/shader_recompiler/ir/opcodes.inc
@@ -184,6 +184,8 @@ OPCODE(FPMin32,                                             F32,            F32,
 OPCODE(FPMin64,                                             F64,            F64,            F64,                                                            )
 OPCODE(FPMul32,                                             F32,            F32,            F32,                                                            )
 OPCODE(FPMul64,                                             F64,            F64,            F64,                                                            )
+OPCODE(FPDiv32,                                             F32,            F32,            F32,                                                            )
+OPCODE(FPDiv64,                                             F64,            F64,            F64,                                                            )
 OPCODE(FPNeg32,                                             F32,            F32,                                                                            )
 OPCODE(FPNeg64,                                             F64,            F64,                                                                            )
 OPCODE(FPRecip32,                                           F32,            F32,                                                                            )
diff --git a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp
index 89c5c78a..99585104 100644
--- a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp
+++ b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp
@@ -420,26 +420,29 @@ void PatchImageSampleInstruction(IR::Block& block, IR::Inst& inst, Info& info,
                                  Descriptors& descriptors, const IR::Inst* producer,
                                  const u32 image_binding, const AmdGpu::Image& image) {
     // Read sampler sharp. This doesn't exist for IMAGE_LOAD/IMAGE_STORE instructions
-    const u32 sampler_binding = [&] {
+    const auto [sampler_binding, sampler] = [&] -> std::pair<u32, AmdGpu::Sampler> {
         ASSERT(producer->GetOpcode() == IR::Opcode::CompositeConstructU32x2);
         const IR::Value& handle = producer->Arg(1);
         // Inline sampler resource.
         if (handle.IsImmediate()) {
             LOG_WARNING(Render_Vulkan, "Inline sampler detected");
-            return descriptors.Add(SamplerResource{
+            const auto inline_sampler = AmdGpu::Sampler{.raw0 = handle.U32()};
+            const auto binding = descriptors.Add(SamplerResource{
                 .sharp_idx = std::numeric_limits<u32>::max(),
-                .inline_sampler = AmdGpu::Sampler{.raw0 = handle.U32()},
+                .inline_sampler = inline_sampler,
             });
+            return {binding, inline_sampler};
         }
         // Normal sampler resource.
         const auto ssharp_handle = handle.InstRecursive();
         const auto& [ssharp_ud, disable_aniso] = TryDisableAnisoLod0(ssharp_handle);
         const auto ssharp = TrackSharp(ssharp_ud, info);
-        return descriptors.Add(SamplerResource{
+        const auto binding = descriptors.Add(SamplerResource{
             .sharp_idx = ssharp,
             .associated_image = image_binding,
             .disable_aniso = disable_aniso,
         });
+        return {binding, info.ReadUdSharp<AmdGpu::Sampler>(ssharp)};
     }();
 
     IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
@@ -539,28 +542,46 @@ void PatchImageSampleInstruction(IR::Block& block, IR::Inst& inst, Info& info,
         }
     }();
 
+    const auto unnormalized = sampler.force_unnormalized || inst_info.is_unnormalized;
+    // Query dimensions of image if needed for normalization.
+    // We can't use the image sharp because it could be bound to a different image later.
+    const auto dimensions =
+        unnormalized ? ir.ImageQueryDimension(ir.Imm32(image_binding), ir.Imm32(0u), ir.Imm1(false))
+                     : IR::Value{};
+    const auto get_coord = [&](u32 idx, u32 dim_idx) -> IR::Value {
+        const auto coord = get_addr_reg(idx);
+        if (unnormalized) {
+            // Normalize the coordinate for sampling, dividing by its corresponding dimension.
+            return ir.FPDiv(coord,
+                            ir.BitCast<IR::F32>(IR::U32{ir.CompositeExtract(dimensions, dim_idx)}));
+        }
+        return coord;
+    };
+
     // Now we can load body components as noted in Table 8.9 Image Opcodes with Sampler
     const IR::Value coords = [&] -> IR::Value {
         switch (image.GetType()) {
         case AmdGpu::ImageType::Color1D: // x
             addr_reg = addr_reg + 1;
-            return get_addr_reg(addr_reg - 1);
+            return get_coord(addr_reg - 1, 0);
         case AmdGpu::ImageType::Color1DArray: // x, slice
             [[fallthrough]];
         case AmdGpu::ImageType::Color2D: // x, y
             addr_reg = addr_reg + 2;
-            return ir.CompositeConstruct(get_addr_reg(addr_reg - 2), get_addr_reg(addr_reg - 1));
+            return ir.CompositeConstruct(get_coord(addr_reg - 2, 0), get_coord(addr_reg - 1, 1));
         case AmdGpu::ImageType::Color2DArray: // x, y, slice
             [[fallthrough]];
         case AmdGpu::ImageType::Color2DMsaa: // x, y, frag
-            [[fallthrough]];
+            addr_reg = addr_reg + 3;
+            return ir.CompositeConstruct(get_coord(addr_reg - 3, 0), get_coord(addr_reg - 2, 1),
+                                         get_addr_reg(addr_reg - 1));
         case AmdGpu::ImageType::Color3D: // x, y, z
             addr_reg = addr_reg + 3;
-            return ir.CompositeConstruct(get_addr_reg(addr_reg - 3), get_addr_reg(addr_reg - 2),
-                                         get_addr_reg(addr_reg - 1));
+            return ir.CompositeConstruct(get_coord(addr_reg - 3, 0), get_coord(addr_reg - 2, 1),
+                                         get_coord(addr_reg - 1, 2));
         case AmdGpu::ImageType::Cube: // x, y, face
             addr_reg = addr_reg + 3;
-            return PatchCubeCoord(ir, get_addr_reg(addr_reg - 3), get_addr_reg(addr_reg - 2),
+            return PatchCubeCoord(ir, get_coord(addr_reg - 3, 0), get_coord(addr_reg - 2, 1),
                                   get_addr_reg(addr_reg - 1), false, inst_info.is_array);
         default:
             UNREACHABLE();
diff --git a/src/shader_recompiler/ir/reg.h b/src/shader_recompiler/ir/reg.h
index 3004d2b8..ca2e9ceb 100644
--- a/src/shader_recompiler/ir/reg.h
+++ b/src/shader_recompiler/ir/reg.h
@@ -40,7 +40,8 @@ union TextureInstInfo {
     BitField<6, 2, u32> gather_comp;
     BitField<8, 1, u32> has_derivatives;
     BitField<9, 1, u32> is_array;
-    BitField<10, 1, u32> is_gather;
+    BitField<10, 1, u32> is_unnormalized;
+    BitField<11, 1, u32> is_gather;
 };
 
 union BufferInstInfo {
diff --git a/src/shader_recompiler/specialization.h b/src/shader_recompiler/specialization.h
index 2a3bd62f..bc8627c1 100644
--- a/src/shader_recompiler/specialization.h
+++ b/src/shader_recompiler/specialization.h
@@ -49,6 +49,12 @@ struct FMaskSpecialization {
     auto operator<=>(const FMaskSpecialization&) const = default;
 };
 
+struct SamplerSpecialization {
+    bool force_unnormalized = false;
+
+    auto operator<=>(const SamplerSpecialization&) const = default;
+};
+
 /**
  * Alongside runtime information, this structure also checks bound resources
  * for compatibility. Can be used as a key for storing shader permutations.
@@ -67,6 +73,7 @@ struct StageSpecialization {
     boost::container::small_vector<TextureBufferSpecialization, 8> tex_buffers;
     boost::container::small_vector<ImageSpecialization, 16> images;
     boost::container::small_vector<FMaskSpecialization, 8> fmasks;
+    boost::container::small_vector<SamplerSpecialization, 16> samplers;
     Backend::Bindings start{};
 
     explicit StageSpecialization(const Info& info_, RuntimeInfo runtime_info_,
@@ -107,6 +114,10 @@ struct StageSpecialization {
                          spec.width = sharp.width;
                          spec.height = sharp.height;
                      });
+        ForEachSharp(samplers, info->samplers,
+                     [](auto& spec, const auto& desc, AmdGpu::Sampler sharp) {
+                         spec.force_unnormalized = sharp.force_unnormalized;
+                     });
     }
 
     void ForEachSharp(auto& spec_list, auto& desc_list, auto&& func) {
@@ -175,6 +186,11 @@ struct StageSpecialization {
                 return false;
             }
         }
+        for (u32 i = 0; i < samplers.size(); i++) {
+            if (samplers[i] != other.samplers[i]) {
+                return false;
+            }
+        }
         return true;
     }
 };
diff --git a/src/video_core/texture_cache/sampler.cpp b/src/video_core/texture_cache/sampler.cpp
index e47f53ab..9f4bc7a7 100644
--- a/src/video_core/texture_cache/sampler.cpp
+++ b/src/video_core/texture_cache/sampler.cpp
@@ -25,7 +25,7 @@ Sampler::Sampler(const Vulkan::Instance& instance, const AmdGpu::Sampler& sample
         .minLod = sampler.MinLod(),
         .maxLod = sampler.MaxLod(),
         .borderColor = LiverpoolToVK::BorderColor(sampler.border_color_type),
-        .unnormalizedCoordinates = bool(sampler.force_unnormalized),
+        .unnormalizedCoordinates = false, // Handled in shader due to Vulkan limitations.
     };
     auto [sampler_result, smplr] = instance.GetDevice().createSamplerUnique(sampler_ci);
     ASSERT_MSG(sampler_result == vk::Result::eSuccess, "Failed to create sampler: {}",