shader_recompiler: patch fmask access instructions (#1439)

* Fix multisample texture fetch * Patch some fmask reads * clang-format * Assert insteed of ignore, coordinate fixes * Patch ImageQueryDimensions
2025-01-29 17:48:37 +00:00 · 2024-11-05 22:39:57 +01:00 · 2024-11-05 22:39:57 +01:00 · aa4c6c0178
parent fe389e560a
commit aa4c6c0178
5 changed files with 94 additions and 5 deletions
--- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp
@ -176,6 +176,7 @@ Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, const
    ImageOperands operands;
    operands.AddOffset(ctx, offset);
    operands.Add(spv::ImageOperandsMask::Lod, lod);
+    operands.Add(spv::ImageOperandsMask::Sample, ms);
    const Id texel =
        texture.is_storage
            ? ctx.OpImageRead(result_type, image, coords, operands.mask, operands.operands)
--- a/src/shader_recompiler/info.h
+++ b/src/shader_recompiler/info.h
@ -86,6 +86,14 @@ struct SamplerResource {
 };
 using SamplerResourceList = boost::container::small_vector<SamplerResource, 16>;

+struct FMaskResource {
+    u32 sgpr_base;
+    u32 dword_offset;
+
+    constexpr AmdGpu::Image GetSharp(const Info& info) const noexcept;
+};
+using FMaskResourceList = boost::container::small_vector<FMaskResource, 16>;
+
 struct PushData {
    static constexpr u32 BufOffsetIndex = 2;
    static constexpr u32 UdRegsIndex = 4;
@ -178,6 +186,7 @@ struct Info {
    TextureBufferResourceList texture_buffers;
    ImageResourceList images;
    SamplerResourceList samplers;
+    FMaskResourceList fmasks;

    PersistentSrtInfo srt_info;
    std::vector<u32> flattened_ud_buf;
@ -282,6 +291,10 @@ constexpr AmdGpu::Sampler SamplerResource::GetSharp(const Info& info) const noex
    return inline_sampler ? inline_sampler : info.ReadUdSharp<AmdGpu::Sampler>(sharp_idx);
 }

+constexpr AmdGpu::Image FMaskResource::GetSharp(const Info& info) const noexcept {
+    return info.ReadUd<AmdGpu::Image>(sgpr_base, dword_offset);
+}
+
 } // namespace Shader

 template <>
--- a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp
+++ b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp
@ -142,7 +142,7 @@ public:
    explicit Descriptors(Info& info_)
        : info{info_}, buffer_resources{info_.buffers},
          texture_buffer_resources{info_.texture_buffers}, image_resources{info_.images},
-          sampler_resources{info_.samplers} {}
+          sampler_resources{info_.samplers}, fmask_resources(info_.fmasks) {}

    u32 Add(const BufferResource& desc) {
        const u32 index{Add(buffer_resources, desc, [&desc](const auto& existing) {
@ -183,6 +183,14 @@ public:
        return index;
    }

+    u32 Add(const FMaskResource& desc) {
+        u32 index = Add(fmask_resources, desc, [&desc](const auto& existing) {
+            return desc.sgpr_base == existing.sgpr_base &&
+                   desc.dword_offset == existing.dword_offset;
+        });
+        return index;
+    }
+
 private:
    template <typename Descriptors, typename Descriptor, typename Func>
    static u32 Add(Descriptors& descriptors, const Descriptor& desc, Func&& pred) {
@ -199,6 +207,7 @@ private:
    TextureBufferResourceList& texture_buffer_resources;
    ImageResourceList& image_resources;
    SamplerResourceList& sampler_resources;
+    FMaskResourceList& fmask_resources;
 };

 } // Anonymous namespace
@ -618,6 +627,41 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip
    }
    ASSERT(image.GetType() != AmdGpu::ImageType::Invalid);
    const bool is_storage = IsImageStorageInstruction(inst);
+
+    // Patch image instruction if image is FMask.
+    if (image.IsFmask()) {
+        ASSERT_MSG(!is_storage, "FMask storage instructions are not supported");
+
+        IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
+        switch (inst.GetOpcode()) {
+        case IR::Opcode::ImageFetch:
+        case IR::Opcode::ImageSampleRaw: {
+            IR::F32 fmaskx = ir.BitCast<IR::F32>(ir.Imm32(0x76543210));
+            IR::F32 fmasky = ir.BitCast<IR::F32>(ir.Imm32(0xfedcba98));
+            inst.ReplaceUsesWith(ir.CompositeConstruct(fmaskx, fmasky));
+            return;
+        }
+        case IR::Opcode::ImageQueryLod:
+            inst.ReplaceUsesWith(ir.Imm32(1));
+            return;
+        case IR::Opcode::ImageQueryDimensions: {
+            IR::Value dims = ir.CompositeConstruct(ir.Imm32(static_cast<u32>(image.width)), // x
+                                                   ir.Imm32(static_cast<u32>(image.width)), // y
+                                                   ir.Imm32(1), ir.Imm32(1)); // depth, mip
+            inst.ReplaceUsesWith(dims);
+
+            // Track FMask resource to do specialization.
+            descriptors.Add(FMaskResource{
+                .sgpr_base = tsharp.sgpr_base,
+                .dword_offset = tsharp.dword_offset,
+            });
+            return;
+        }
+        default:
+            UNREACHABLE_MSG("Can't patch fmask instruction {}", inst.GetOpcode());
+        }
+    }
+
    const auto type = image.IsPartialCubemap() ? AmdGpu::ImageType::Color2DArray : image.GetType();
    u32 image_binding = descriptors.Add(ImageResource{
        .sharp_idx = tsharp,
@ -652,11 +696,14 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip
            return {body->Arg(0), body->Arg(1)};
        case AmdGpu::ImageType::Color1DArray: // x, slice
            [[fallthrough]];
-        case AmdGpu::ImageType::Color2D: // x, y
-            return {ir.CompositeConstruct(body->Arg(0), body->Arg(1)), body->Arg(2)};
-        case AmdGpu::ImageType::Color2DArray: // x, y, slice
+        case AmdGpu::ImageType::Color2D: // x, y, [lod]
            [[fallthrough]];
-        case AmdGpu::ImageType::Color2DMsaa: // x, y, frag
+        case AmdGpu::ImageType::Color2DMsaa: // x, y. (sample is passed on different argument)
+            return {ir.CompositeConstruct(body->Arg(0), body->Arg(1)), body->Arg(2)};
+        case AmdGpu::ImageType::Color2DArray: // x, y, slice, [lod]
+            [[fallthrough]];
+        case AmdGpu::ImageType::Color2DMsaaArray: // x, y, slice. (sample is passed on different
+                                                  // argument)
            [[fallthrough]];
        case AmdGpu::ImageType::Color3D: // x, y, z
            return {ir.CompositeConstruct(body->Arg(0), body->Arg(1), body->Arg(2)), body->Arg(3)};
@ -672,7 +719,12 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip

    if (inst_info.has_lod) {
        ASSERT(inst.GetOpcode() == IR::Opcode::ImageFetch);
+        ASSERT(image.GetType() == AmdGpu::ImageType::Color2D ||
+               image.GetType() == AmdGpu::ImageType::Color2DArray);
        inst.SetArg(3, arg);
+    } else if (image.GetType() == AmdGpu::ImageType::Color2DMsaa ||
+               image.GetType() == AmdGpu::ImageType::Color2DMsaaArray) {
+        inst.SetArg(4, arg);
    }
 }

--- a/src/shader_recompiler/specialization.h
+++ b/src/shader_recompiler/specialization.h
@ -32,6 +32,13 @@ struct ImageSpecialization {
    auto operator<=>(const ImageSpecialization&) const = default;
 };

+struct FMaskSpecialization {
+    u32 width;
+    u32 height;
+
+    auto operator<=>(const FMaskSpecialization&) const = default;
+};
+
 /**
 * Alongside runtime information, this structure also checks bound resources
 * for compatibility. Can be used as a key for storing shader permutations.
@ -47,6 +54,7 @@ struct StageSpecialization {
    boost::container::small_vector<BufferSpecialization, 16> buffers;
    boost::container::small_vector<TextureBufferSpecialization, 8> tex_buffers;
    boost::container::small_vector<ImageSpecialization, 16> images;
+    boost::container::small_vector<FMaskSpecialization, 8> fmasks;
    Backend::Bindings start{};

    explicit StageSpecialization(const Shader::Info& info_, RuntimeInfo runtime_info_,
@ -71,6 +79,11 @@ struct StageSpecialization {
                                                              : sharp.GetType();
                         spec.is_integer = AmdGpu::IsInteger(sharp.GetNumberFmt());
                     });
+        ForEachSharp(binding, fmasks, info->fmasks,
+                     [](auto& spec, const auto& desc, AmdGpu::Image sharp) {
+                         spec.width = sharp.width;
+                         spec.height = sharp.height;
+                     });
    }

    void ForEachSharp(u32& binding, auto& spec_list, auto& desc_list, auto&& func) {
@ -115,6 +128,11 @@ struct StageSpecialization {
                return false;
            }
        }
+        for (u32 i = 0; i < fmasks.size(); i++) {
+            if (other.bitset[binding++] && fmasks[i] != other.fmasks[i]) {
+                return false;
+            }
+        }
        return true;
    }
 };
--- a/src/video_core/amdgpu/resource.h
+++ b/src/video_core/amdgpu/resource.h
@ -295,6 +295,11 @@ struct Image {
        return GetTilingMode() != TilingMode::Display_Linear;
    }

+    bool IsFmask() const noexcept {
+        return GetDataFmt() >= DataFormat::FormatFmask8_1 &&
+               GetDataFmt() <= DataFormat::FormatFmask64_8;
+    }
+
    bool IsPartialCubemap() const {
        const auto viewed_slice = last_array - base_array + 1;
        return GetType() == ImageType::Cube && viewed_slice < 6;