shader_recompiler: patch fmask access instructions (#1439)

* Fix multisample texture fetch

* Patch some fmask reads

* clang-format

* Assert insteed of ignore, coordinate fixes

* Patch ImageQueryDimensions
This commit is contained in:
Lander Gallastegi 2024-11-05 22:39:57 +01:00 committed by GitHub
parent 4c650bb9c2
commit 69aee2d496
5 changed files with 94 additions and 5 deletions

View file

@ -176,6 +176,7 @@ Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, const
ImageOperands operands; ImageOperands operands;
operands.AddOffset(ctx, offset); operands.AddOffset(ctx, offset);
operands.Add(spv::ImageOperandsMask::Lod, lod); operands.Add(spv::ImageOperandsMask::Lod, lod);
operands.Add(spv::ImageOperandsMask::Sample, ms);
const Id texel = const Id texel =
texture.is_storage texture.is_storage
? ctx.OpImageRead(result_type, image, coords, operands.mask, operands.operands) ? ctx.OpImageRead(result_type, image, coords, operands.mask, operands.operands)

View file

@ -86,6 +86,14 @@ struct SamplerResource {
}; };
using SamplerResourceList = boost::container::small_vector<SamplerResource, 16>; using SamplerResourceList = boost::container::small_vector<SamplerResource, 16>;
struct FMaskResource {
u32 sgpr_base;
u32 dword_offset;
constexpr AmdGpu::Image GetSharp(const Info& info) const noexcept;
};
using FMaskResourceList = boost::container::small_vector<FMaskResource, 16>;
struct PushData { struct PushData {
static constexpr u32 BufOffsetIndex = 2; static constexpr u32 BufOffsetIndex = 2;
static constexpr u32 UdRegsIndex = 4; static constexpr u32 UdRegsIndex = 4;
@ -178,6 +186,7 @@ struct Info {
TextureBufferResourceList texture_buffers; TextureBufferResourceList texture_buffers;
ImageResourceList images; ImageResourceList images;
SamplerResourceList samplers; SamplerResourceList samplers;
FMaskResourceList fmasks;
PersistentSrtInfo srt_info; PersistentSrtInfo srt_info;
std::vector<u32> flattened_ud_buf; std::vector<u32> flattened_ud_buf;
@ -282,6 +291,10 @@ constexpr AmdGpu::Sampler SamplerResource::GetSharp(const Info& info) const noex
return inline_sampler ? inline_sampler : info.ReadUdSharp<AmdGpu::Sampler>(sharp_idx); return inline_sampler ? inline_sampler : info.ReadUdSharp<AmdGpu::Sampler>(sharp_idx);
} }
constexpr AmdGpu::Image FMaskResource::GetSharp(const Info& info) const noexcept {
return info.ReadUd<AmdGpu::Image>(sgpr_base, dword_offset);
}
} // namespace Shader } // namespace Shader
template <> template <>

View file

@ -142,7 +142,7 @@ public:
explicit Descriptors(Info& info_) explicit Descriptors(Info& info_)
: info{info_}, buffer_resources{info_.buffers}, : info{info_}, buffer_resources{info_.buffers},
texture_buffer_resources{info_.texture_buffers}, image_resources{info_.images}, texture_buffer_resources{info_.texture_buffers}, image_resources{info_.images},
sampler_resources{info_.samplers} {} sampler_resources{info_.samplers}, fmask_resources(info_.fmasks) {}
u32 Add(const BufferResource& desc) { u32 Add(const BufferResource& desc) {
const u32 index{Add(buffer_resources, desc, [&desc](const auto& existing) { const u32 index{Add(buffer_resources, desc, [&desc](const auto& existing) {
@ -183,6 +183,14 @@ public:
return index; return index;
} }
u32 Add(const FMaskResource& desc) {
u32 index = Add(fmask_resources, desc, [&desc](const auto& existing) {
return desc.sgpr_base == existing.sgpr_base &&
desc.dword_offset == existing.dword_offset;
});
return index;
}
private: private:
template <typename Descriptors, typename Descriptor, typename Func> template <typename Descriptors, typename Descriptor, typename Func>
static u32 Add(Descriptors& descriptors, const Descriptor& desc, Func&& pred) { static u32 Add(Descriptors& descriptors, const Descriptor& desc, Func&& pred) {
@ -199,6 +207,7 @@ private:
TextureBufferResourceList& texture_buffer_resources; TextureBufferResourceList& texture_buffer_resources;
ImageResourceList& image_resources; ImageResourceList& image_resources;
SamplerResourceList& sampler_resources; SamplerResourceList& sampler_resources;
FMaskResourceList& fmask_resources;
}; };
} // Anonymous namespace } // Anonymous namespace
@ -618,6 +627,41 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip
} }
ASSERT(image.GetType() != AmdGpu::ImageType::Invalid); ASSERT(image.GetType() != AmdGpu::ImageType::Invalid);
const bool is_storage = IsImageStorageInstruction(inst); const bool is_storage = IsImageStorageInstruction(inst);
// Patch image instruction if image is FMask.
if (image.IsFmask()) {
ASSERT_MSG(!is_storage, "FMask storage instructions are not supported");
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
switch (inst.GetOpcode()) {
case IR::Opcode::ImageFetch:
case IR::Opcode::ImageSampleRaw: {
IR::F32 fmaskx = ir.BitCast<IR::F32>(ir.Imm32(0x76543210));
IR::F32 fmasky = ir.BitCast<IR::F32>(ir.Imm32(0xfedcba98));
inst.ReplaceUsesWith(ir.CompositeConstruct(fmaskx, fmasky));
return;
}
case IR::Opcode::ImageQueryLod:
inst.ReplaceUsesWith(ir.Imm32(1));
return;
case IR::Opcode::ImageQueryDimensions: {
IR::Value dims = ir.CompositeConstruct(ir.Imm32(static_cast<u32>(image.width)), // x
ir.Imm32(static_cast<u32>(image.width)), // y
ir.Imm32(1), ir.Imm32(1)); // depth, mip
inst.ReplaceUsesWith(dims);
// Track FMask resource to do specialization.
descriptors.Add(FMaskResource{
.sgpr_base = tsharp.sgpr_base,
.dword_offset = tsharp.dword_offset,
});
return;
}
default:
UNREACHABLE_MSG("Can't patch fmask instruction {}", inst.GetOpcode());
}
}
const auto type = image.IsPartialCubemap() ? AmdGpu::ImageType::Color2DArray : image.GetType(); const auto type = image.IsPartialCubemap() ? AmdGpu::ImageType::Color2DArray : image.GetType();
u32 image_binding = descriptors.Add(ImageResource{ u32 image_binding = descriptors.Add(ImageResource{
.sharp_idx = tsharp, .sharp_idx = tsharp,
@ -652,11 +696,14 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip
return {body->Arg(0), body->Arg(1)}; return {body->Arg(0), body->Arg(1)};
case AmdGpu::ImageType::Color1DArray: // x, slice case AmdGpu::ImageType::Color1DArray: // x, slice
[[fallthrough]]; [[fallthrough]];
case AmdGpu::ImageType::Color2D: // x, y case AmdGpu::ImageType::Color2D: // x, y, [lod]
return {ir.CompositeConstruct(body->Arg(0), body->Arg(1)), body->Arg(2)};
case AmdGpu::ImageType::Color2DArray: // x, y, slice
[[fallthrough]]; [[fallthrough]];
case AmdGpu::ImageType::Color2DMsaa: // x, y, frag case AmdGpu::ImageType::Color2DMsaa: // x, y. (sample is passed on different argument)
return {ir.CompositeConstruct(body->Arg(0), body->Arg(1)), body->Arg(2)};
case AmdGpu::ImageType::Color2DArray: // x, y, slice, [lod]
[[fallthrough]];
case AmdGpu::ImageType::Color2DMsaaArray: // x, y, slice. (sample is passed on different
// argument)
[[fallthrough]]; [[fallthrough]];
case AmdGpu::ImageType::Color3D: // x, y, z case AmdGpu::ImageType::Color3D: // x, y, z
return {ir.CompositeConstruct(body->Arg(0), body->Arg(1), body->Arg(2)), body->Arg(3)}; return {ir.CompositeConstruct(body->Arg(0), body->Arg(1), body->Arg(2)), body->Arg(3)};
@ -672,7 +719,12 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip
if (inst_info.has_lod) { if (inst_info.has_lod) {
ASSERT(inst.GetOpcode() == IR::Opcode::ImageFetch); ASSERT(inst.GetOpcode() == IR::Opcode::ImageFetch);
ASSERT(image.GetType() == AmdGpu::ImageType::Color2D ||
image.GetType() == AmdGpu::ImageType::Color2DArray);
inst.SetArg(3, arg); inst.SetArg(3, arg);
} else if (image.GetType() == AmdGpu::ImageType::Color2DMsaa ||
image.GetType() == AmdGpu::ImageType::Color2DMsaaArray) {
inst.SetArg(4, arg);
} }
} }

View file

@ -32,6 +32,13 @@ struct ImageSpecialization {
auto operator<=>(const ImageSpecialization&) const = default; auto operator<=>(const ImageSpecialization&) const = default;
}; };
struct FMaskSpecialization {
u32 width;
u32 height;
auto operator<=>(const FMaskSpecialization&) const = default;
};
/** /**
* Alongside runtime information, this structure also checks bound resources * Alongside runtime information, this structure also checks bound resources
* for compatibility. Can be used as a key for storing shader permutations. * for compatibility. Can be used as a key for storing shader permutations.
@ -47,6 +54,7 @@ struct StageSpecialization {
boost::container::small_vector<BufferSpecialization, 16> buffers; boost::container::small_vector<BufferSpecialization, 16> buffers;
boost::container::small_vector<TextureBufferSpecialization, 8> tex_buffers; boost::container::small_vector<TextureBufferSpecialization, 8> tex_buffers;
boost::container::small_vector<ImageSpecialization, 16> images; boost::container::small_vector<ImageSpecialization, 16> images;
boost::container::small_vector<FMaskSpecialization, 8> fmasks;
Backend::Bindings start{}; Backend::Bindings start{};
explicit StageSpecialization(const Shader::Info& info_, RuntimeInfo runtime_info_, explicit StageSpecialization(const Shader::Info& info_, RuntimeInfo runtime_info_,
@ -71,6 +79,11 @@ struct StageSpecialization {
: sharp.GetType(); : sharp.GetType();
spec.is_integer = AmdGpu::IsInteger(sharp.GetNumberFmt()); spec.is_integer = AmdGpu::IsInteger(sharp.GetNumberFmt());
}); });
ForEachSharp(binding, fmasks, info->fmasks,
[](auto& spec, const auto& desc, AmdGpu::Image sharp) {
spec.width = sharp.width;
spec.height = sharp.height;
});
} }
void ForEachSharp(u32& binding, auto& spec_list, auto& desc_list, auto&& func) { void ForEachSharp(u32& binding, auto& spec_list, auto& desc_list, auto&& func) {
@ -115,6 +128,11 @@ struct StageSpecialization {
return false; return false;
} }
} }
for (u32 i = 0; i < fmasks.size(); i++) {
if (other.bitset[binding++] && fmasks[i] != other.fmasks[i]) {
return false;
}
}
return true; return true;
} }
}; };

View file

@ -295,6 +295,11 @@ struct Image {
return GetTilingMode() != TilingMode::Display_Linear; return GetTilingMode() != TilingMode::Display_Linear;
} }
bool IsFmask() const noexcept {
return GetDataFmt() >= DataFormat::FormatFmask8_1 &&
GetDataFmt() <= DataFormat::FormatFmask64_8;
}
bool IsPartialCubemap() const { bool IsPartialCubemap() const {
const auto viewed_slice = last_array - base_array + 1; const auto viewed_slice = last_array - base_array + 1;
return GetType() == ImageType::Cube && viewed_slice < 6; return GetType() == ImageType::Cube && viewed_slice < 6;