From 69aee2d496e32ff525cac4f8736c6d0cb07a301f Mon Sep 17 00:00:00 2001 From: Lander Gallastegi Date: Tue, 5 Nov 2024 22:39:57 +0100 Subject: [PATCH] shader_recompiler: patch fmask access instructions (#1439) * Fix multisample texture fetch * Patch some fmask reads * clang-format * Assert insteed of ignore, coordinate fixes * Patch ImageQueryDimensions --- .../backend/spirv/emit_spirv_image.cpp | 1 + src/shader_recompiler/info.h | 13 ++++ .../ir/passes/resource_tracking_pass.cpp | 62 +++++++++++++++++-- src/shader_recompiler/specialization.h | 18 ++++++ src/video_core/amdgpu/resource.h | 5 ++ 5 files changed, 94 insertions(+), 5 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp index fc99b892..40e5ea8b 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp @@ -176,6 +176,7 @@ Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, const ImageOperands operands; operands.AddOffset(ctx, offset); operands.Add(spv::ImageOperandsMask::Lod, lod); + operands.Add(spv::ImageOperandsMask::Sample, ms); const Id texel = texture.is_storage ? ctx.OpImageRead(result_type, image, coords, operands.mask, operands.operands) diff --git a/src/shader_recompiler/info.h b/src/shader_recompiler/info.h index b69863f4..91256085 100644 --- a/src/shader_recompiler/info.h +++ b/src/shader_recompiler/info.h @@ -86,6 +86,14 @@ struct SamplerResource { }; using SamplerResourceList = boost::container::small_vector; +struct FMaskResource { + u32 sgpr_base; + u32 dword_offset; + + constexpr AmdGpu::Image GetSharp(const Info& info) const noexcept; +}; +using FMaskResourceList = boost::container::small_vector; + struct PushData { static constexpr u32 BufOffsetIndex = 2; static constexpr u32 UdRegsIndex = 4; @@ -178,6 +186,7 @@ struct Info { TextureBufferResourceList texture_buffers; ImageResourceList images; SamplerResourceList samplers; + FMaskResourceList fmasks; PersistentSrtInfo srt_info; std::vector flattened_ud_buf; @@ -282,6 +291,10 @@ constexpr AmdGpu::Sampler SamplerResource::GetSharp(const Info& info) const noex return inline_sampler ? inline_sampler : info.ReadUdSharp(sharp_idx); } +constexpr AmdGpu::Image FMaskResource::GetSharp(const Info& info) const noexcept { + return info.ReadUd(sgpr_base, dword_offset); +} + } // namespace Shader template <> diff --git a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp index 6c8809cf..9468f80a 100644 --- a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp +++ b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp @@ -142,7 +142,7 @@ public: explicit Descriptors(Info& info_) : info{info_}, buffer_resources{info_.buffers}, texture_buffer_resources{info_.texture_buffers}, image_resources{info_.images}, - sampler_resources{info_.samplers} {} + sampler_resources{info_.samplers}, fmask_resources(info_.fmasks) {} u32 Add(const BufferResource& desc) { const u32 index{Add(buffer_resources, desc, [&desc](const auto& existing) { @@ -183,6 +183,14 @@ public: return index; } + u32 Add(const FMaskResource& desc) { + u32 index = Add(fmask_resources, desc, [&desc](const auto& existing) { + return desc.sgpr_base == existing.sgpr_base && + desc.dword_offset == existing.dword_offset; + }); + return index; + } + private: template static u32 Add(Descriptors& descriptors, const Descriptor& desc, Func&& pred) { @@ -199,6 +207,7 @@ private: TextureBufferResourceList& texture_buffer_resources; ImageResourceList& image_resources; SamplerResourceList& sampler_resources; + FMaskResourceList& fmask_resources; }; } // Anonymous namespace @@ -618,6 +627,41 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip } ASSERT(image.GetType() != AmdGpu::ImageType::Invalid); const bool is_storage = IsImageStorageInstruction(inst); + + // Patch image instruction if image is FMask. + if (image.IsFmask()) { + ASSERT_MSG(!is_storage, "FMask storage instructions are not supported"); + + IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; + switch (inst.GetOpcode()) { + case IR::Opcode::ImageFetch: + case IR::Opcode::ImageSampleRaw: { + IR::F32 fmaskx = ir.BitCast(ir.Imm32(0x76543210)); + IR::F32 fmasky = ir.BitCast(ir.Imm32(0xfedcba98)); + inst.ReplaceUsesWith(ir.CompositeConstruct(fmaskx, fmasky)); + return; + } + case IR::Opcode::ImageQueryLod: + inst.ReplaceUsesWith(ir.Imm32(1)); + return; + case IR::Opcode::ImageQueryDimensions: { + IR::Value dims = ir.CompositeConstruct(ir.Imm32(static_cast(image.width)), // x + ir.Imm32(static_cast(image.width)), // y + ir.Imm32(1), ir.Imm32(1)); // depth, mip + inst.ReplaceUsesWith(dims); + + // Track FMask resource to do specialization. + descriptors.Add(FMaskResource{ + .sgpr_base = tsharp.sgpr_base, + .dword_offset = tsharp.dword_offset, + }); + return; + } + default: + UNREACHABLE_MSG("Can't patch fmask instruction {}", inst.GetOpcode()); + } + } + const auto type = image.IsPartialCubemap() ? AmdGpu::ImageType::Color2DArray : image.GetType(); u32 image_binding = descriptors.Add(ImageResource{ .sharp_idx = tsharp, @@ -652,11 +696,14 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip return {body->Arg(0), body->Arg(1)}; case AmdGpu::ImageType::Color1DArray: // x, slice [[fallthrough]]; - case AmdGpu::ImageType::Color2D: // x, y - return {ir.CompositeConstruct(body->Arg(0), body->Arg(1)), body->Arg(2)}; - case AmdGpu::ImageType::Color2DArray: // x, y, slice + case AmdGpu::ImageType::Color2D: // x, y, [lod] [[fallthrough]]; - case AmdGpu::ImageType::Color2DMsaa: // x, y, frag + case AmdGpu::ImageType::Color2DMsaa: // x, y. (sample is passed on different argument) + return {ir.CompositeConstruct(body->Arg(0), body->Arg(1)), body->Arg(2)}; + case AmdGpu::ImageType::Color2DArray: // x, y, slice, [lod] + [[fallthrough]]; + case AmdGpu::ImageType::Color2DMsaaArray: // x, y, slice. (sample is passed on different + // argument) [[fallthrough]]; case AmdGpu::ImageType::Color3D: // x, y, z return {ir.CompositeConstruct(body->Arg(0), body->Arg(1), body->Arg(2)), body->Arg(3)}; @@ -672,7 +719,12 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip if (inst_info.has_lod) { ASSERT(inst.GetOpcode() == IR::Opcode::ImageFetch); + ASSERT(image.GetType() == AmdGpu::ImageType::Color2D || + image.GetType() == AmdGpu::ImageType::Color2DArray); inst.SetArg(3, arg); + } else if (image.GetType() == AmdGpu::ImageType::Color2DMsaa || + image.GetType() == AmdGpu::ImageType::Color2DMsaaArray) { + inst.SetArg(4, arg); } } diff --git a/src/shader_recompiler/specialization.h b/src/shader_recompiler/specialization.h index c25c611e..225b164b 100644 --- a/src/shader_recompiler/specialization.h +++ b/src/shader_recompiler/specialization.h @@ -32,6 +32,13 @@ struct ImageSpecialization { auto operator<=>(const ImageSpecialization&) const = default; }; +struct FMaskSpecialization { + u32 width; + u32 height; + + auto operator<=>(const FMaskSpecialization&) const = default; +}; + /** * Alongside runtime information, this structure also checks bound resources * for compatibility. Can be used as a key for storing shader permutations. @@ -47,6 +54,7 @@ struct StageSpecialization { boost::container::small_vector buffers; boost::container::small_vector tex_buffers; boost::container::small_vector images; + boost::container::small_vector fmasks; Backend::Bindings start{}; explicit StageSpecialization(const Shader::Info& info_, RuntimeInfo runtime_info_, @@ -71,6 +79,11 @@ struct StageSpecialization { : sharp.GetType(); spec.is_integer = AmdGpu::IsInteger(sharp.GetNumberFmt()); }); + ForEachSharp(binding, fmasks, info->fmasks, + [](auto& spec, const auto& desc, AmdGpu::Image sharp) { + spec.width = sharp.width; + spec.height = sharp.height; + }); } void ForEachSharp(u32& binding, auto& spec_list, auto& desc_list, auto&& func) { @@ -115,6 +128,11 @@ struct StageSpecialization { return false; } } + for (u32 i = 0; i < fmasks.size(); i++) { + if (other.bitset[binding++] && fmasks[i] != other.fmasks[i]) { + return false; + } + } return true; } }; diff --git a/src/video_core/amdgpu/resource.h b/src/video_core/amdgpu/resource.h index 83be0b0a..81fe43f4 100644 --- a/src/video_core/amdgpu/resource.h +++ b/src/video_core/amdgpu/resource.h @@ -295,6 +295,11 @@ struct Image { return GetTilingMode() != TilingMode::Display_Linear; } + bool IsFmask() const noexcept { + return GetDataFmt() >= DataFormat::FormatFmask8_1 && + GetDataFmt() <= DataFormat::FormatFmask64_8; + } + bool IsPartialCubemap() const { const auto viewed_slice = last_array - base_array + 1; return GetType() == ImageType::Cube && viewed_slice < 6;