mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-01-15 11:25:13 +00:00
video_core: Implement conversion for uncommon/unsupported number formats. (#2047)
* video_core: Implement conversion for uncommon/unsupported number formats. * shader_recompiler: Reinterpret image sample output as well. * liverpool_to_vk: Remove mappings for remapped number formats. These were poorly supported by drivers anyway. * resource_tracking_pass: Fix image write swizzle mistake. * amdgpu: Add missing specialization and move format mapping data to types * reinterpret: Fix U/SToF input type.
This commit is contained in:
parent
c3ecf599ad
commit
b0d7feb292
|
@ -2,6 +2,7 @@
|
|||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "shader_recompiler/frontend/translate/translate.h"
|
||||
#include "shader_recompiler/ir/reinterpret.h"
|
||||
#include "shader_recompiler/runtime_info.h"
|
||||
|
||||
namespace Shader::Gcn {
|
||||
|
@ -31,14 +32,16 @@ void Translator::EmitExport(const GcnInst& inst) {
|
|||
return;
|
||||
}
|
||||
const u32 index = u32(attrib) - u32(IR::Attribute::RenderTarget0);
|
||||
const auto [r, g, b, a] = runtime_info.fs_info.color_buffers[index].swizzle;
|
||||
const auto col_buf = runtime_info.fs_info.color_buffers[index];
|
||||
const auto converted = IR::ApplyWriteNumberConversion(ir, value, col_buf.num_conversion);
|
||||
const auto [r, g, b, a] = col_buf.swizzle;
|
||||
const std::array swizzle_array = {r, g, b, a};
|
||||
const auto swizzled_comp = swizzle_array[comp];
|
||||
if (u32(swizzled_comp) < u32(AmdGpu::CompSwizzle::Red)) {
|
||||
ir.SetAttribute(attrib, value, comp);
|
||||
ir.SetAttribute(attrib, converted, comp);
|
||||
return;
|
||||
}
|
||||
ir.SetAttribute(attrib, value, u32(swizzled_comp) - u32(AmdGpu::CompSwizzle::Red));
|
||||
ir.SetAttribute(attrib, converted, u32(swizzled_comp) - u32(AmdGpu::CompSwizzle::Red));
|
||||
};
|
||||
|
||||
const auto unpack = [&](u32 idx) {
|
||||
|
|
|
@ -301,8 +301,7 @@ s32 TryHandleInlineCbuf(IR::Inst& inst, Info& info, Descriptors& descriptors,
|
|||
});
|
||||
}
|
||||
|
||||
void PatchBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info,
|
||||
Descriptors& descriptors) {
|
||||
void PatchBufferSharp(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& descriptors) {
|
||||
s32 binding{};
|
||||
AmdGpu::Buffer buffer;
|
||||
if (binding = TryHandleInlineCbuf(inst, info, descriptors, buffer); binding == -1) {
|
||||
|
@ -317,19 +316,191 @@ void PatchBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info,
|
|||
});
|
||||
}
|
||||
|
||||
// Update buffer descriptor format.
|
||||
const auto inst_info = inst.Flags<IR::BufferInstInfo>();
|
||||
|
||||
// Replace handle with binding index in buffer resource list.
|
||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
inst.SetArg(0, ir.Imm32(binding));
|
||||
}
|
||||
|
||||
void PatchTextureBufferSharp(IR::Block& block, IR::Inst& inst, Info& info,
|
||||
Descriptors& descriptors) {
|
||||
const IR::Inst* handle = inst.Arg(0).InstRecursive();
|
||||
const IR::Inst* producer = handle->Arg(0).InstRecursive();
|
||||
const auto sharp = TrackSharp(producer, info);
|
||||
const s32 binding = descriptors.Add(TextureBufferResource{
|
||||
.sharp_idx = sharp,
|
||||
.is_written = inst.GetOpcode() == IR::Opcode::StoreBufferFormatF32,
|
||||
});
|
||||
|
||||
// Replace handle with binding index in texture buffer resource list.
|
||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
inst.SetArg(0, ir.Imm32(binding));
|
||||
}
|
||||
|
||||
void PatchImageSharp(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& descriptors) {
|
||||
const auto pred = [](const IR::Inst* inst) -> std::optional<const IR::Inst*> {
|
||||
const auto opcode = inst->GetOpcode();
|
||||
if (opcode == IR::Opcode::CompositeConstructU32x2 || // IMAGE_SAMPLE (image+sampler)
|
||||
opcode == IR::Opcode::ReadConst || // IMAGE_LOAD (image only)
|
||||
opcode == IR::Opcode::GetUserData) {
|
||||
return inst;
|
||||
}
|
||||
return std::nullopt;
|
||||
};
|
||||
const auto result = IR::BreadthFirstSearch(&inst, pred);
|
||||
ASSERT_MSG(result, "Unable to find image sharp source");
|
||||
const IR::Inst* producer = result.value();
|
||||
const bool has_sampler = producer->GetOpcode() == IR::Opcode::CompositeConstructU32x2;
|
||||
const auto tsharp_handle = has_sampler ? producer->Arg(0).InstRecursive() : producer;
|
||||
|
||||
// Read image sharp.
|
||||
const auto tsharp = TrackSharp(tsharp_handle, info);
|
||||
const auto inst_info = inst.Flags<IR::TextureInstInfo>();
|
||||
auto image = info.ReadUdSharp<AmdGpu::Image>(tsharp);
|
||||
if (!image.Valid()) {
|
||||
LOG_ERROR(Render_Vulkan, "Shader compiled with unbound image!");
|
||||
image = AmdGpu::Image::Null();
|
||||
}
|
||||
ASSERT(image.GetType() != AmdGpu::ImageType::Invalid);
|
||||
const bool is_read = inst.GetOpcode() == IR::Opcode::ImageRead;
|
||||
const bool is_written = inst.GetOpcode() == IR::Opcode::ImageWrite;
|
||||
|
||||
// Patch image instruction if image is FMask.
|
||||
if (image.IsFmask()) {
|
||||
ASSERT_MSG(!is_written, "FMask storage instructions are not supported");
|
||||
|
||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
switch (inst.GetOpcode()) {
|
||||
case IR::Opcode::ImageRead:
|
||||
case IR::Opcode::ImageSampleRaw: {
|
||||
IR::F32 fmaskx = ir.BitCast<IR::F32>(ir.Imm32(0x76543210));
|
||||
IR::F32 fmasky = ir.BitCast<IR::F32>(ir.Imm32(0xfedcba98));
|
||||
inst.ReplaceUsesWith(ir.CompositeConstruct(fmaskx, fmasky));
|
||||
return;
|
||||
}
|
||||
case IR::Opcode::ImageQueryLod:
|
||||
inst.ReplaceUsesWith(ir.Imm32(1));
|
||||
return;
|
||||
case IR::Opcode::ImageQueryDimensions: {
|
||||
IR::Value dims = ir.CompositeConstruct(ir.Imm32(static_cast<u32>(image.width)), // x
|
||||
ir.Imm32(static_cast<u32>(image.width)), // y
|
||||
ir.Imm32(1), ir.Imm32(1)); // depth, mip
|
||||
inst.ReplaceUsesWith(dims);
|
||||
|
||||
// Track FMask resource to do specialization.
|
||||
descriptors.Add(FMaskResource{
|
||||
.sharp_idx = tsharp,
|
||||
});
|
||||
return;
|
||||
}
|
||||
default:
|
||||
UNREACHABLE_MSG("Can't patch fmask instruction {}", inst.GetOpcode());
|
||||
}
|
||||
}
|
||||
|
||||
u32 image_binding = descriptors.Add(ImageResource{
|
||||
.sharp_idx = tsharp,
|
||||
.is_depth = bool(inst_info.is_depth),
|
||||
.is_atomic = IsImageAtomicInstruction(inst),
|
||||
.is_array = bool(inst_info.is_array),
|
||||
.is_read = is_read,
|
||||
.is_written = is_written,
|
||||
});
|
||||
|
||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
|
||||
if (inst.GetOpcode() == IR::Opcode::ImageSampleRaw) {
|
||||
// Read sampler sharp.
|
||||
const auto [sampler_binding, sampler] = [&] -> std::pair<u32, AmdGpu::Sampler> {
|
||||
ASSERT(producer->GetOpcode() == IR::Opcode::CompositeConstructU32x2);
|
||||
const IR::Value& handle = producer->Arg(1);
|
||||
// Inline sampler resource.
|
||||
if (handle.IsImmediate()) {
|
||||
LOG_WARNING(Render_Vulkan, "Inline sampler detected");
|
||||
const auto inline_sampler = AmdGpu::Sampler{.raw0 = handle.U32()};
|
||||
const auto binding = descriptors.Add(SamplerResource{
|
||||
.sharp_idx = std::numeric_limits<u32>::max(),
|
||||
.inline_sampler = inline_sampler,
|
||||
});
|
||||
return {binding, inline_sampler};
|
||||
}
|
||||
// Normal sampler resource.
|
||||
const auto ssharp_handle = handle.InstRecursive();
|
||||
const auto& [ssharp_ud, disable_aniso] = TryDisableAnisoLod0(ssharp_handle);
|
||||
const auto ssharp = TrackSharp(ssharp_ud, info);
|
||||
const auto binding = descriptors.Add(SamplerResource{
|
||||
.sharp_idx = ssharp,
|
||||
.associated_image = image_binding,
|
||||
.disable_aniso = disable_aniso,
|
||||
});
|
||||
return {binding, info.ReadUdSharp<AmdGpu::Sampler>(ssharp)};
|
||||
}();
|
||||
// Patch image and sampler handle.
|
||||
inst.SetArg(0, ir.Imm32(image_binding | sampler_binding << 16));
|
||||
} else {
|
||||
// Patch image handle.
|
||||
inst.SetArg(0, ir.Imm32(image_binding));
|
||||
}
|
||||
}
|
||||
|
||||
void PatchDataRingAccess(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& descriptors) {
|
||||
// Insert gds binding in the shader if it doesn't exist already.
|
||||
// The buffer is used for append/consume counters.
|
||||
constexpr static AmdGpu::Buffer GdsSharp{.base_address = 1};
|
||||
const u32 binding = descriptors.Add(BufferResource{
|
||||
.used_types = IR::Type::U32,
|
||||
.inline_cbuf = GdsSharp,
|
||||
.is_gds_buffer = true,
|
||||
.is_written = true,
|
||||
});
|
||||
|
||||
const auto pred = [](const IR::Inst* inst) -> std::optional<const IR::Inst*> {
|
||||
if (inst->GetOpcode() == IR::Opcode::GetUserData) {
|
||||
return inst;
|
||||
}
|
||||
return std::nullopt;
|
||||
};
|
||||
|
||||
// Attempt to deduce the GDS address of counter at compile time.
|
||||
const u32 gds_addr = [&] {
|
||||
const IR::Value& gds_offset = inst.Arg(0);
|
||||
if (gds_offset.IsImmediate()) {
|
||||
// Nothing to do, offset is known.
|
||||
return gds_offset.U32() & 0xFFFF;
|
||||
}
|
||||
const auto result = IR::BreadthFirstSearch(&inst, pred);
|
||||
ASSERT_MSG(result, "Unable to track M0 source");
|
||||
|
||||
// M0 must be set by some user data register.
|
||||
const IR::Inst* prod = gds_offset.InstRecursive();
|
||||
const u32 ud_reg = u32(result.value()->Arg(0).ScalarReg());
|
||||
u32 m0_val = info.user_data[ud_reg] >> 16;
|
||||
if (prod->GetOpcode() == IR::Opcode::IAdd32) {
|
||||
m0_val += prod->Arg(1).U32();
|
||||
}
|
||||
return m0_val & 0xFFFF;
|
||||
}();
|
||||
|
||||
// Patch instruction.
|
||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
inst.SetArg(0, ir.Imm32(gds_addr >> 2));
|
||||
inst.SetArg(1, ir.Imm32(binding));
|
||||
}
|
||||
|
||||
void PatchBufferArgs(IR::Block& block, IR::Inst& inst, Info& info) {
|
||||
const auto handle = inst.Arg(0);
|
||||
const auto buffer_res = info.buffers[handle.U32()];
|
||||
const auto buffer = buffer_res.GetSharp(info);
|
||||
|
||||
ASSERT(!buffer.add_tid_enable);
|
||||
|
||||
// Address of constant buffer reads can be calculated at IR emittion time.
|
||||
// Address of constant buffer reads can be calculated at IR emission time.
|
||||
if (inst.GetOpcode() == IR::Opcode::ReadConstBuffer) {
|
||||
return;
|
||||
}
|
||||
|
||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
const auto inst_info = inst.Flags<IR::BufferInstInfo>();
|
||||
|
||||
const IR::U32 index_stride = ir.Imm32(buffer.index_stride);
|
||||
const IR::U32 element_size = ir.Imm32(buffer.element_size);
|
||||
|
||||
|
@ -366,21 +537,27 @@ void PatchBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info,
|
|||
inst.SetArg(1, address);
|
||||
}
|
||||
|
||||
void PatchTextureBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info,
|
||||
Descriptors& descriptors) {
|
||||
const IR::Inst* handle = inst.Arg(0).InstRecursive();
|
||||
const IR::Inst* producer = handle->Arg(0).InstRecursive();
|
||||
const auto sharp = TrackSharp(producer, info);
|
||||
const auto buffer = info.ReadUdSharp<AmdGpu::Buffer>(sharp);
|
||||
const s32 binding = descriptors.Add(TextureBufferResource{
|
||||
.sharp_idx = sharp,
|
||||
.is_written = inst.GetOpcode() == IR::Opcode::StoreBufferFormatF32,
|
||||
});
|
||||
void PatchTextureBufferArgs(IR::Block& block, IR::Inst& inst, Info& info) {
|
||||
const auto handle = inst.Arg(0);
|
||||
const auto buffer_res = info.texture_buffers[handle.U32()];
|
||||
const auto buffer = buffer_res.GetSharp(info);
|
||||
|
||||
// Replace handle with binding index in texture buffer resource list.
|
||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
inst.SetArg(0, ir.Imm32(binding));
|
||||
ASSERT(!buffer.swizzle_enable && !buffer.add_tid_enable);
|
||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
|
||||
if (inst.GetOpcode() == IR::Opcode::StoreBufferFormatF32) {
|
||||
const auto swizzled = ApplySwizzle(ir, inst.Arg(2), buffer.DstSelect());
|
||||
const auto converted =
|
||||
ApplyWriteNumberConversionVec4(ir, swizzled, buffer.GetNumberConversion());
|
||||
inst.SetArg(2, converted);
|
||||
} else if (inst.GetOpcode() == IR::Opcode::LoadBufferFormatF32) {
|
||||
const auto inst_info = inst.Flags<IR::BufferInstInfo>();
|
||||
const auto texel = ir.LoadBufferFormat(inst.Arg(0), inst.Arg(1), inst_info);
|
||||
const auto swizzled = ApplySwizzle(ir, texel, buffer.DstSelect());
|
||||
const auto converted =
|
||||
ApplyReadNumberConversionVec4(ir, swizzled, buffer.GetNumberConversion());
|
||||
inst.ReplaceUsesWith(converted);
|
||||
}
|
||||
}
|
||||
|
||||
IR::Value PatchCubeCoord(IR::IREmitter& ir, const IR::Value& s, const IR::Value& t,
|
||||
|
@ -409,39 +586,14 @@ IR::Value PatchCubeCoord(IR::IREmitter& ir, const IR::Value& s, const IR::Value&
|
|||
}
|
||||
}
|
||||
|
||||
void PatchImageSampleInstruction(IR::Block& block, IR::Inst& inst, Info& info,
|
||||
Descriptors& descriptors, const IR::Inst* producer,
|
||||
const u32 image_binding, const AmdGpu::Image& image) {
|
||||
// Read sampler sharp. This doesn't exist for IMAGE_LOAD/IMAGE_STORE instructions
|
||||
const auto [sampler_binding, sampler] = [&] -> std::pair<u32, AmdGpu::Sampler> {
|
||||
ASSERT(producer->GetOpcode() == IR::Opcode::CompositeConstructU32x2);
|
||||
const IR::Value& handle = producer->Arg(1);
|
||||
// Inline sampler resource.
|
||||
if (handle.IsImmediate()) {
|
||||
LOG_WARNING(Render_Vulkan, "Inline sampler detected");
|
||||
const auto inline_sampler = AmdGpu::Sampler{.raw0 = handle.U32()};
|
||||
const auto binding = descriptors.Add(SamplerResource{
|
||||
.sharp_idx = std::numeric_limits<u32>::max(),
|
||||
.inline_sampler = inline_sampler,
|
||||
});
|
||||
return {binding, inline_sampler};
|
||||
}
|
||||
// Normal sampler resource.
|
||||
const auto ssharp_handle = handle.InstRecursive();
|
||||
const auto& [ssharp_ud, disable_aniso] = TryDisableAnisoLod0(ssharp_handle);
|
||||
const auto ssharp = TrackSharp(ssharp_ud, info);
|
||||
const auto binding = descriptors.Add(SamplerResource{
|
||||
.sharp_idx = ssharp,
|
||||
.associated_image = image_binding,
|
||||
.disable_aniso = disable_aniso,
|
||||
});
|
||||
return {binding, info.ReadUdSharp<AmdGpu::Sampler>(ssharp)};
|
||||
}();
|
||||
void PatchImageSampleArgs(IR::Block& block, IR::Inst& inst, Info& info,
|
||||
const AmdGpu::Image& image) {
|
||||
const auto handle = inst.Arg(0);
|
||||
const auto sampler_res = info.samplers[(handle.U32() >> 16) & 0xFFFF];
|
||||
auto sampler = sampler_res.GetSharp(info);
|
||||
|
||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
|
||||
const auto inst_info = inst.Flags<IR::TextureInstInfo>();
|
||||
const IR::U32 handle = ir.Imm32(image_binding | sampler_binding << 16);
|
||||
|
||||
IR::Inst* body1 = inst.Arg(1).InstRecursive();
|
||||
IR::Inst* body2 = inst.Arg(2).InstRecursive();
|
||||
|
@ -539,8 +691,7 @@ void PatchImageSampleInstruction(IR::Block& block, IR::Inst& inst, Info& info,
|
|||
// Query dimensions of image if needed for normalization.
|
||||
// We can't use the image sharp because it could be bound to a different image later.
|
||||
const auto dimensions =
|
||||
unnormalized ? ir.ImageQueryDimension(ir.Imm32(image_binding), ir.Imm32(0u), ir.Imm1(false))
|
||||
: IR::Value{};
|
||||
unnormalized ? ir.ImageQueryDimension(handle, ir.Imm32(0u), ir.Imm1(false)) : IR::Value{};
|
||||
const auto get_coord = [&](u32 coord_idx, u32 dim_idx) -> IR::Value {
|
||||
const auto coord = get_addr_reg(coord_idx);
|
||||
if (unnormalized) {
|
||||
|
@ -589,7 +740,7 @@ void PatchImageSampleInstruction(IR::Block& block, IR::Inst& inst, Info& info,
|
|||
: IR::F32{};
|
||||
const IR::F32 lod_clamp = inst_info.has_lod_clamp ? get_addr_reg(addr_reg++) : IR::F32{};
|
||||
|
||||
auto new_inst = [&] -> IR::Value {
|
||||
auto texel = [&] -> IR::Value {
|
||||
if (inst_info.is_gather) {
|
||||
if (inst_info.is_depth) {
|
||||
return ir.ImageGatherDref(handle, coords, offset, dref, inst_info);
|
||||
|
@ -611,94 +762,30 @@ void PatchImageSampleInstruction(IR::Block& block, IR::Inst& inst, Info& info,
|
|||
}
|
||||
return ir.ImageSampleImplicitLod(handle, coords, bias, offset, inst_info);
|
||||
}();
|
||||
inst.ReplaceUsesWithAndRemove(new_inst);
|
||||
|
||||
const auto converted = ApplyReadNumberConversionVec4(ir, texel, image.GetNumberConversion());
|
||||
inst.ReplaceUsesWith(converted);
|
||||
}
|
||||
|
||||
void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& descriptors) {
|
||||
const auto pred = [](const IR::Inst* inst) -> std::optional<const IR::Inst*> {
|
||||
const auto opcode = inst->GetOpcode();
|
||||
if (opcode == IR::Opcode::CompositeConstructU32x2 || // IMAGE_SAMPLE (image+sampler)
|
||||
opcode == IR::Opcode::ReadConst || // IMAGE_LOAD (image only)
|
||||
opcode == IR::Opcode::GetUserData) {
|
||||
return inst;
|
||||
}
|
||||
return std::nullopt;
|
||||
};
|
||||
const auto result = IR::BreadthFirstSearch(&inst, pred);
|
||||
ASSERT_MSG(result, "Unable to find image sharp source");
|
||||
const IR::Inst* producer = result.value();
|
||||
const bool has_sampler = producer->GetOpcode() == IR::Opcode::CompositeConstructU32x2;
|
||||
const auto tsharp_handle = has_sampler ? producer->Arg(0).InstRecursive() : producer;
|
||||
|
||||
// Read image sharp.
|
||||
const auto tsharp = TrackSharp(tsharp_handle, info);
|
||||
const auto inst_info = inst.Flags<IR::TextureInstInfo>();
|
||||
auto image = info.ReadUdSharp<AmdGpu::Image>(tsharp);
|
||||
if (!image.Valid()) {
|
||||
LOG_ERROR(Render_Vulkan, "Shader compiled with unbound image!");
|
||||
image = AmdGpu::Image::Null();
|
||||
}
|
||||
ASSERT(image.GetType() != AmdGpu::ImageType::Invalid);
|
||||
const bool is_read = inst.GetOpcode() == IR::Opcode::ImageRead;
|
||||
const bool is_written = inst.GetOpcode() == IR::Opcode::ImageWrite;
|
||||
|
||||
// Patch image instruction if image is FMask.
|
||||
if (image.IsFmask()) {
|
||||
ASSERT_MSG(!is_written, "FMask storage instructions are not supported");
|
||||
|
||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
switch (inst.GetOpcode()) {
|
||||
case IR::Opcode::ImageRead:
|
||||
case IR::Opcode::ImageSampleRaw: {
|
||||
IR::F32 fmaskx = ir.BitCast<IR::F32>(ir.Imm32(0x76543210));
|
||||
IR::F32 fmasky = ir.BitCast<IR::F32>(ir.Imm32(0xfedcba98));
|
||||
inst.ReplaceUsesWith(ir.CompositeConstruct(fmaskx, fmasky));
|
||||
return;
|
||||
}
|
||||
case IR::Opcode::ImageQueryLod:
|
||||
inst.ReplaceUsesWith(ir.Imm32(1));
|
||||
return;
|
||||
case IR::Opcode::ImageQueryDimensions: {
|
||||
IR::Value dims = ir.CompositeConstruct(ir.Imm32(static_cast<u32>(image.width)), // x
|
||||
ir.Imm32(static_cast<u32>(image.width)), // y
|
||||
ir.Imm32(1), ir.Imm32(1)); // depth, mip
|
||||
inst.ReplaceUsesWith(dims);
|
||||
|
||||
// Track FMask resource to do specialization.
|
||||
descriptors.Add(FMaskResource{
|
||||
.sharp_idx = tsharp,
|
||||
});
|
||||
return;
|
||||
}
|
||||
default:
|
||||
UNREACHABLE_MSG("Can't patch fmask instruction {}", inst.GetOpcode());
|
||||
}
|
||||
}
|
||||
|
||||
u32 image_binding = descriptors.Add(ImageResource{
|
||||
.sharp_idx = tsharp,
|
||||
.is_depth = bool(inst_info.is_depth),
|
||||
.is_atomic = IsImageAtomicInstruction(inst),
|
||||
.is_array = bool(inst_info.is_array),
|
||||
.is_read = is_read,
|
||||
.is_written = is_written,
|
||||
});
|
||||
|
||||
// Sample instructions must be resolved into a new instruction using address register data.
|
||||
if (inst.GetOpcode() == IR::Opcode::ImageSampleRaw) {
|
||||
PatchImageSampleInstruction(block, inst, info, descriptors, producer, image_binding, image);
|
||||
return;
|
||||
}
|
||||
|
||||
// Patch image handle
|
||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
inst.SetArg(0, ir.Imm32(image_binding));
|
||||
|
||||
// No need to patch coordinates if we are just querying.
|
||||
void PatchImageArgs(IR::Block& block, IR::Inst& inst, Info& info) {
|
||||
// Nothing to patch for dimension query.
|
||||
if (inst.GetOpcode() == IR::Opcode::ImageQueryDimensions) {
|
||||
return;
|
||||
}
|
||||
|
||||
const auto handle = inst.Arg(0);
|
||||
const auto image_res = info.images[handle.U32() & 0xFFFF];
|
||||
auto image = image_res.GetSharp(info);
|
||||
|
||||
// Sample instructions must be handled separately using address register data.
|
||||
if (inst.GetOpcode() == IR::Opcode::ImageSampleRaw) {
|
||||
PatchImageSampleArgs(block, inst, info, image);
|
||||
return;
|
||||
}
|
||||
|
||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
const auto inst_info = inst.Flags<IR::TextureInstInfo>();
|
||||
|
||||
// Now that we know the image type, adjust texture coordinate vector.
|
||||
IR::Inst* body = inst.Arg(1).InstRecursive();
|
||||
const auto [coords, arg] = [&] -> std::pair<IR::Value, IR::Value> {
|
||||
|
@ -719,152 +806,77 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip
|
|||
case AmdGpu::ImageType::Color3D: // x, y, z, [lod]
|
||||
return {ir.CompositeConstruct(body->Arg(0), body->Arg(1), body->Arg(2)), body->Arg(3)};
|
||||
case AmdGpu::ImageType::Cube: // x, y, face, [lod]
|
||||
return {PatchCubeCoord(ir, body->Arg(0), body->Arg(1), body->Arg(2), is_written,
|
||||
inst_info.is_array),
|
||||
return {PatchCubeCoord(ir, body->Arg(0), body->Arg(1), body->Arg(2),
|
||||
inst.GetOpcode() == IR::Opcode::ImageWrite, inst_info.is_array),
|
||||
body->Arg(3)};
|
||||
default:
|
||||
UNREACHABLE_MSG("Unknown image type {}", image.GetType());
|
||||
}
|
||||
}();
|
||||
|
||||
const auto has_ms = image.GetType() == AmdGpu::ImageType::Color2DMsaa ||
|
||||
image.GetType() == AmdGpu::ImageType::Color2DMsaaArray;
|
||||
ASSERT(!inst_info.has_lod || !has_ms);
|
||||
const auto lod = inst_info.has_lod ? IR::U32{arg} : IR::U32{};
|
||||
const auto ms = has_ms ? IR::U32{arg} : IR::U32{};
|
||||
|
||||
const auto is_storage = image_res.IsStorage(image);
|
||||
if (inst.GetOpcode() == IR::Opcode::ImageRead) {
|
||||
auto texel = ir.ImageRead(handle, coords, lod, ms, inst_info);
|
||||
if (is_storage) {
|
||||
// Storage image requires shader swizzle.
|
||||
texel = ApplySwizzle(ir, texel, image.DstSelect());
|
||||
}
|
||||
const auto converted =
|
||||
ApplyReadNumberConversionVec4(ir, texel, image.GetNumberConversion());
|
||||
inst.ReplaceUsesWith(converted);
|
||||
} else {
|
||||
inst.SetArg(1, coords);
|
||||
|
||||
if (inst_info.has_lod) {
|
||||
ASSERT(inst.GetOpcode() == IR::Opcode::ImageRead ||
|
||||
inst.GetOpcode() == IR::Opcode::ImageWrite);
|
||||
ASSERT(image.GetType() != AmdGpu::ImageType::Color2DMsaa &&
|
||||
image.GetType() != AmdGpu::ImageType::Color2DMsaaArray);
|
||||
inst.SetArg(2, arg);
|
||||
} else if ((image.GetType() == AmdGpu::ImageType::Color2DMsaa ||
|
||||
image.GetType() == AmdGpu::ImageType::Color2DMsaaArray) &&
|
||||
(inst.GetOpcode() == IR::Opcode::ImageRead ||
|
||||
inst.GetOpcode() == IR::Opcode::ImageWrite)) {
|
||||
inst.SetArg(3, arg);
|
||||
}
|
||||
}
|
||||
|
||||
void PatchTextureBufferInterpretation(IR::Block& block, IR::Inst& inst, Info& info) {
|
||||
const auto binding = inst.Arg(0).U32();
|
||||
const auto buffer_res = info.texture_buffers[binding];
|
||||
const auto buffer = buffer_res.GetSharp(info);
|
||||
if (!buffer.Valid()) {
|
||||
// Don't need to swizzle invalid buffer.
|
||||
return;
|
||||
}
|
||||
|
||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
if (inst.GetOpcode() == IR::Opcode::StoreBufferFormatF32) {
|
||||
inst.SetArg(2, ApplySwizzle(ir, inst.Arg(2), buffer.DstSelect()));
|
||||
} else if (inst.GetOpcode() == IR::Opcode::LoadBufferFormatF32) {
|
||||
const auto inst_info = inst.Flags<IR::BufferInstInfo>();
|
||||
const auto texel = ir.LoadBufferFormat(inst.Arg(0), inst.Arg(1), inst_info);
|
||||
const auto swizzled = ApplySwizzle(ir, texel, buffer.DstSelect());
|
||||
inst.ReplaceUsesWith(swizzled);
|
||||
}
|
||||
}
|
||||
|
||||
void PatchImageInterpretation(IR::Block& block, IR::Inst& inst, Info& info) {
|
||||
const auto binding = inst.Arg(0).U32();
|
||||
const auto image_res = info.images[binding & 0xFFFF];
|
||||
const auto image = image_res.GetSharp(info);
|
||||
if (!image.Valid() || !image_res.IsStorage(image)) {
|
||||
// Don't need to swizzle invalid or non-storage image.
|
||||
return;
|
||||
}
|
||||
|
||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
if (inst.GetOpcode() == IR::Opcode::ImageWrite) {
|
||||
inst.SetArg(4, ApplySwizzle(ir, inst.Arg(4), image.DstSelect()));
|
||||
} else if (inst.GetOpcode() == IR::Opcode::ImageRead) {
|
||||
const auto inst_info = inst.Flags<IR::TextureInstInfo>();
|
||||
const auto lod = inst.Arg(2);
|
||||
const auto ms = inst.Arg(3);
|
||||
const auto texel =
|
||||
ir.ImageRead(inst.Arg(0), inst.Arg(1), lod.IsEmpty() ? IR::U32{} : IR::U32{lod},
|
||||
ms.IsEmpty() ? IR::U32{} : IR::U32{ms}, inst_info);
|
||||
const auto swizzled = ApplySwizzle(ir, texel, image.DstSelect());
|
||||
inst.ReplaceUsesWith(swizzled);
|
||||
inst.SetArg(2, lod);
|
||||
inst.SetArg(3, ms);
|
||||
|
||||
auto texel = inst.Arg(4);
|
||||
if (is_storage) {
|
||||
// Storage image requires shader swizzle.
|
||||
texel = ApplySwizzle(ir, texel, image.DstSelect());
|
||||
}
|
||||
}
|
||||
|
||||
void PatchDataRingInstruction(IR::Block& block, IR::Inst& inst, Info& info,
|
||||
Descriptors& descriptors) {
|
||||
// Insert gds binding in the shader if it doesn't exist already.
|
||||
// The buffer is used for append/consume counters.
|
||||
constexpr static AmdGpu::Buffer GdsSharp{.base_address = 1};
|
||||
const u32 binding = descriptors.Add(BufferResource{
|
||||
.used_types = IR::Type::U32,
|
||||
.inline_cbuf = GdsSharp,
|
||||
.is_gds_buffer = true,
|
||||
.is_written = true,
|
||||
});
|
||||
|
||||
const auto pred = [](const IR::Inst* inst) -> std::optional<const IR::Inst*> {
|
||||
if (inst->GetOpcode() == IR::Opcode::GetUserData) {
|
||||
return inst;
|
||||
const auto converted =
|
||||
ApplyWriteNumberConversionVec4(ir, texel, image.GetNumberConversion());
|
||||
inst.SetArg(4, converted);
|
||||
}
|
||||
return std::nullopt;
|
||||
};
|
||||
|
||||
// Attempt to deduce the GDS address of counter at compile time.
|
||||
const u32 gds_addr = [&] {
|
||||
const IR::Value& gds_offset = inst.Arg(0);
|
||||
if (gds_offset.IsImmediate()) {
|
||||
// Nothing to do, offset is known.
|
||||
return gds_offset.U32() & 0xFFFF;
|
||||
}
|
||||
const auto result = IR::BreadthFirstSearch(&inst, pred);
|
||||
ASSERT_MSG(result, "Unable to track M0 source");
|
||||
|
||||
// M0 must be set by some user data register.
|
||||
const IR::Inst* prod = gds_offset.InstRecursive();
|
||||
const u32 ud_reg = u32(result.value()->Arg(0).ScalarReg());
|
||||
u32 m0_val = info.user_data[ud_reg] >> 16;
|
||||
if (prod->GetOpcode() == IR::Opcode::IAdd32) {
|
||||
m0_val += prod->Arg(1).U32();
|
||||
}
|
||||
return m0_val & 0xFFFF;
|
||||
}();
|
||||
|
||||
// Patch instruction.
|
||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
inst.SetArg(0, ir.Imm32(gds_addr >> 2));
|
||||
inst.SetArg(1, ir.Imm32(binding));
|
||||
}
|
||||
|
||||
void ResourceTrackingPass(IR::Program& program) {
|
||||
// Iterate resource instructions and patch them after finding the sharp.
|
||||
auto& info = program.info;
|
||||
|
||||
// Pass 1: Track resource sharps
|
||||
Descriptors descriptors{info};
|
||||
for (IR::Block* const block : program.blocks) {
|
||||
for (IR::Inst& inst : block->Instructions()) {
|
||||
if (IsBufferInstruction(inst)) {
|
||||
PatchBufferInstruction(*block, inst, info, descriptors);
|
||||
continue;
|
||||
}
|
||||
if (IsTextureBufferInstruction(inst)) {
|
||||
PatchTextureBufferInstruction(*block, inst, info, descriptors);
|
||||
continue;
|
||||
}
|
||||
if (IsImageInstruction(inst)) {
|
||||
PatchImageInstruction(*block, inst, info, descriptors);
|
||||
continue;
|
||||
}
|
||||
if (IsDataRingInstruction(inst)) {
|
||||
PatchDataRingInstruction(*block, inst, info, descriptors);
|
||||
PatchBufferSharp(*block, inst, info, descriptors);
|
||||
} else if (IsTextureBufferInstruction(inst)) {
|
||||
PatchTextureBufferSharp(*block, inst, info, descriptors);
|
||||
} else if (IsImageInstruction(inst)) {
|
||||
PatchImageSharp(*block, inst, info, descriptors);
|
||||
} else if (IsDataRingInstruction(inst)) {
|
||||
PatchDataRingAccess(*block, inst, info, descriptors);
|
||||
}
|
||||
}
|
||||
}
|
||||
// Second pass to reinterpret format read/write where needed, since we now know
|
||||
// the bindings and their properties.
|
||||
|
||||
// Pass 2: Patch instruction args
|
||||
for (IR::Block* const block : program.blocks) {
|
||||
for (IR::Inst& inst : block->Instructions()) {
|
||||
if (IsTextureBufferInstruction(inst)) {
|
||||
PatchTextureBufferInterpretation(*block, inst, info);
|
||||
continue;
|
||||
}
|
||||
if (IsImageInstruction(inst)) {
|
||||
PatchImageInterpretation(*block, inst, info);
|
||||
if (IsBufferInstruction(inst)) {
|
||||
PatchBufferArgs(*block, inst, info);
|
||||
} else if (IsTextureBufferInstruction(inst)) {
|
||||
PatchTextureBufferArgs(*block, inst, info);
|
||||
} else if (IsImageInstruction(inst)) {
|
||||
PatchImageArgs(*block, inst, info);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
#pragma once
|
||||
|
||||
#include "shader_recompiler/ir/ir_emitter.h"
|
||||
#include "video_core/amdgpu/resource.h"
|
||||
#include "video_core/amdgpu/types.h"
|
||||
|
||||
namespace Shader::IR {
|
||||
|
||||
|
@ -21,4 +21,66 @@ inline Value ApplySwizzle(IREmitter& ir, const Value& vector, const AmdGpu::Comp
|
|||
return swizzled;
|
||||
}
|
||||
|
||||
/// Applies a number conversion in the read direction.
|
||||
inline F32 ApplyReadNumberConversion(IREmitter& ir, const F32& value,
|
||||
const AmdGpu::NumberConversion& conversion) {
|
||||
switch (conversion) {
|
||||
case AmdGpu::NumberConversion::None:
|
||||
return value;
|
||||
case AmdGpu::NumberConversion::UintToUscaled:
|
||||
return ir.ConvertUToF(32, 32, ir.BitCast<U32>(value));
|
||||
case AmdGpu::NumberConversion::SintToSscaled:
|
||||
return ir.ConvertSToF(32, 32, ir.BitCast<U32>(value));
|
||||
case AmdGpu::NumberConversion::UnormToUbnorm:
|
||||
// Convert 0...1 to -1...1
|
||||
return ir.FPSub(ir.FPMul(value, ir.Imm32(2.f)), ir.Imm32(1.f));
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
|
||||
inline Value ApplyReadNumberConversionVec4(IREmitter& ir, const Value& value,
|
||||
const AmdGpu::NumberConversion& conversion) {
|
||||
if (conversion == AmdGpu::NumberConversion::None) {
|
||||
return value;
|
||||
}
|
||||
const auto x = ApplyReadNumberConversion(ir, F32{ir.CompositeExtract(value, 0)}, conversion);
|
||||
const auto y = ApplyReadNumberConversion(ir, F32{ir.CompositeExtract(value, 1)}, conversion);
|
||||
const auto z = ApplyReadNumberConversion(ir, F32{ir.CompositeExtract(value, 2)}, conversion);
|
||||
const auto w = ApplyReadNumberConversion(ir, F32{ir.CompositeExtract(value, 3)}, conversion);
|
||||
return ir.CompositeConstruct(x, y, z, w);
|
||||
}
|
||||
|
||||
/// Applies a number conversion in the write direction.
|
||||
inline F32 ApplyWriteNumberConversion(IREmitter& ir, const F32& value,
|
||||
const AmdGpu::NumberConversion& conversion) {
|
||||
switch (conversion) {
|
||||
case AmdGpu::NumberConversion::None:
|
||||
return value;
|
||||
case AmdGpu::NumberConversion::UintToUscaled:
|
||||
// Need to return float type to maintain IR semantics.
|
||||
return ir.BitCast<F32>(U32{ir.ConvertFToU(32, value)});
|
||||
case AmdGpu::NumberConversion::SintToSscaled:
|
||||
// Need to return float type to maintain IR semantics.
|
||||
return ir.BitCast<F32>(U32{ir.ConvertFToS(32, value)});
|
||||
case AmdGpu::NumberConversion::UnormToUbnorm:
|
||||
// Convert -1...1 to 0...1
|
||||
return ir.FPDiv(ir.FPAdd(value, ir.Imm32(1.f)), ir.Imm32(2.f));
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
|
||||
inline Value ApplyWriteNumberConversionVec4(IREmitter& ir, const Value& value,
|
||||
const AmdGpu::NumberConversion& conversion) {
|
||||
if (conversion == AmdGpu::NumberConversion::None) {
|
||||
return value;
|
||||
}
|
||||
const auto x = ApplyWriteNumberConversion(ir, F32{ir.CompositeExtract(value, 0)}, conversion);
|
||||
const auto y = ApplyWriteNumberConversion(ir, F32{ir.CompositeExtract(value, 1)}, conversion);
|
||||
const auto z = ApplyWriteNumberConversion(ir, F32{ir.CompositeExtract(value, 2)}, conversion);
|
||||
const auto w = ApplyWriteNumberConversion(ir, F32{ir.CompositeExtract(value, 3)}, conversion);
|
||||
return ir.CompositeConstruct(x, y, z, w);
|
||||
}
|
||||
|
||||
} // namespace Shader::IR
|
||||
|
|
|
@ -180,6 +180,7 @@ struct FragmentRuntimeInfo {
|
|||
std::array<PsInput, 32> inputs;
|
||||
struct PsColorBuffer {
|
||||
AmdGpu::NumberFormat num_format;
|
||||
AmdGpu::NumberConversion num_conversion;
|
||||
AmdGpu::CompMapping swizzle;
|
||||
|
||||
auto operator<=>(const PsColorBuffer&) const noexcept = default;
|
||||
|
|
|
@ -32,6 +32,7 @@ struct BufferSpecialization {
|
|||
struct TextureBufferSpecialization {
|
||||
bool is_integer = false;
|
||||
AmdGpu::CompMapping dst_select{};
|
||||
AmdGpu::NumberConversion num_conversion{};
|
||||
|
||||
auto operator<=>(const TextureBufferSpecialization&) const = default;
|
||||
};
|
||||
|
@ -41,6 +42,7 @@ struct ImageSpecialization {
|
|||
bool is_integer = false;
|
||||
bool is_storage = false;
|
||||
AmdGpu::CompMapping dst_select{};
|
||||
AmdGpu::NumberConversion num_conversion{};
|
||||
|
||||
auto operator<=>(const ImageSpecialization&) const = default;
|
||||
};
|
||||
|
@ -107,6 +109,7 @@ struct StageSpecialization {
|
|||
[](auto& spec, const auto& desc, AmdGpu::Buffer sharp) {
|
||||
spec.is_integer = AmdGpu::IsInteger(sharp.GetNumberFmt());
|
||||
spec.dst_select = sharp.DstSelect();
|
||||
spec.num_conversion = sharp.GetNumberConversion();
|
||||
});
|
||||
ForEachSharp(binding, images, info->images,
|
||||
[](auto& spec, const auto& desc, AmdGpu::Image sharp) {
|
||||
|
@ -116,6 +119,7 @@ struct StageSpecialization {
|
|||
if (spec.is_storage) {
|
||||
spec.dst_select = sharp.DstSelect();
|
||||
}
|
||||
spec.num_conversion = sharp.GetNumberConversion();
|
||||
});
|
||||
ForEachSharp(binding, fmasks, info->fmasks,
|
||||
[](auto& spec, const auto& desc, AmdGpu::Image sharp) {
|
||||
|
|
|
@ -20,9 +20,9 @@
|
|||
#include "common/types.h"
|
||||
#include "common/unique_function.h"
|
||||
#include "shader_recompiler/params.h"
|
||||
#include "types.h"
|
||||
#include "video_core/amdgpu/pixel_format.h"
|
||||
#include "video_core/amdgpu/resource.h"
|
||||
#include "video_core/amdgpu/types.h"
|
||||
|
||||
namespace Vulkan {
|
||||
class Rasterizer;
|
||||
|
@ -902,6 +902,10 @@ struct Liverpool {
|
|||
: info.number_type.Value());
|
||||
}
|
||||
|
||||
[[nodiscard]] NumberConversion GetNumberConversion() const {
|
||||
return MapNumberConversion(info.number_type);
|
||||
}
|
||||
|
||||
[[nodiscard]] CompMapping Swizzle() const {
|
||||
// clang-format off
|
||||
static constexpr std::array<std::array<CompMapping, 4>, 4> mrt_swizzles{{
|
||||
|
@ -938,7 +942,7 @@ struct Liverpool {
|
|||
const auto swap_idx = static_cast<u32>(info.comp_swap.Value());
|
||||
const auto components_idx = NumComponents(info.format) - 1;
|
||||
const auto mrt_swizzle = mrt_swizzles[swap_idx][components_idx];
|
||||
return RemapComponents(info.format, mrt_swizzle);
|
||||
return RemapSwizzle(info.format, mrt_swizzle);
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
@ -100,7 +100,7 @@ std::string_view NameOf(NumberFormat fmt) {
|
|||
return "Srgb";
|
||||
case NumberFormat::Ubnorm:
|
||||
return "Ubnorm";
|
||||
case NumberFormat::UbnromNz:
|
||||
case NumberFormat::UbnormNz:
|
||||
return "UbnormNz";
|
||||
case NumberFormat::Ubint:
|
||||
return "Ubint";
|
||||
|
|
|
@ -11,96 +11,6 @@
|
|||
|
||||
namespace AmdGpu {
|
||||
|
||||
enum class CompSwizzle : u32 {
|
||||
Zero = 0,
|
||||
One = 1,
|
||||
Red = 4,
|
||||
Green = 5,
|
||||
Blue = 6,
|
||||
Alpha = 7,
|
||||
};
|
||||
|
||||
struct CompMapping {
|
||||
CompSwizzle r : 3;
|
||||
CompSwizzle g : 3;
|
||||
CompSwizzle b : 3;
|
||||
CompSwizzle a : 3;
|
||||
|
||||
auto operator<=>(const CompMapping& other) const = default;
|
||||
|
||||
template <typename T>
|
||||
[[nodiscard]] std::array<T, 4> Apply(const std::array<T, 4>& data) const {
|
||||
return {
|
||||
ApplySingle(data, r),
|
||||
ApplySingle(data, g),
|
||||
ApplySingle(data, b),
|
||||
ApplySingle(data, a),
|
||||
};
|
||||
}
|
||||
|
||||
private:
|
||||
template <typename T>
|
||||
T ApplySingle(const std::array<T, 4>& data, const CompSwizzle swizzle) const {
|
||||
switch (swizzle) {
|
||||
case CompSwizzle::Zero:
|
||||
return T(0);
|
||||
case CompSwizzle::One:
|
||||
return T(1);
|
||||
case CompSwizzle::Red:
|
||||
return data[0];
|
||||
case CompSwizzle::Green:
|
||||
return data[1];
|
||||
case CompSwizzle::Blue:
|
||||
return data[2];
|
||||
case CompSwizzle::Alpha:
|
||||
return data[3];
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
inline DataFormat RemapDataFormat(const DataFormat format) {
|
||||
switch (format) {
|
||||
case DataFormat::Format11_11_10:
|
||||
return DataFormat::Format10_11_11;
|
||||
case DataFormat::Format10_10_10_2:
|
||||
return DataFormat::Format2_10_10_10;
|
||||
case DataFormat::Format5_5_5_1:
|
||||
return DataFormat::Format1_5_5_5;
|
||||
default:
|
||||
return format;
|
||||
}
|
||||
}
|
||||
|
||||
inline NumberFormat RemapNumberFormat(const NumberFormat format) {
|
||||
return format;
|
||||
}
|
||||
|
||||
inline CompMapping RemapComponents(const DataFormat format, const CompMapping components) {
|
||||
switch (format) {
|
||||
case DataFormat::Format11_11_10: {
|
||||
CompMapping result;
|
||||
result.r = components.b;
|
||||
result.g = components.g;
|
||||
result.b = components.r;
|
||||
result.a = components.a;
|
||||
return result;
|
||||
}
|
||||
case DataFormat::Format10_10_10_2:
|
||||
case DataFormat::Format5_5_5_1: {
|
||||
CompMapping result;
|
||||
result.r = components.a;
|
||||
result.g = components.b;
|
||||
result.b = components.g;
|
||||
result.a = components.r;
|
||||
return result;
|
||||
}
|
||||
default:
|
||||
return components;
|
||||
}
|
||||
}
|
||||
|
||||
// Table 8.5 Buffer Resource Descriptor [Sea Islands Series Instruction Set Architecture]
|
||||
struct Buffer {
|
||||
u64 base_address : 44;
|
||||
|
@ -140,7 +50,7 @@ struct Buffer {
|
|||
.b = CompSwizzle(dst_sel_z),
|
||||
.a = CompSwizzle(dst_sel_w),
|
||||
};
|
||||
return RemapComponents(DataFormat(data_format), dst_sel);
|
||||
return RemapSwizzle(DataFormat(data_format), dst_sel);
|
||||
}
|
||||
|
||||
NumberFormat GetNumberFmt() const noexcept {
|
||||
|
@ -151,6 +61,10 @@ struct Buffer {
|
|||
return RemapDataFormat(DataFormat(data_format));
|
||||
}
|
||||
|
||||
NumberConversion GetNumberConversion() const noexcept {
|
||||
return MapNumberConversion(NumberFormat(num_format));
|
||||
}
|
||||
|
||||
u32 GetStride() const noexcept {
|
||||
return stride;
|
||||
}
|
||||
|
@ -305,7 +219,7 @@ struct Image {
|
|||
.b = CompSwizzle(dst_sel_z),
|
||||
.a = CompSwizzle(dst_sel_w),
|
||||
};
|
||||
return RemapComponents(DataFormat(data_format), dst_sel);
|
||||
return RemapSwizzle(DataFormat(data_format), dst_sel);
|
||||
}
|
||||
|
||||
u32 Pitch() const {
|
||||
|
@ -354,6 +268,10 @@ struct Image {
|
|||
return RemapNumberFormat(NumberFormat(num_format));
|
||||
}
|
||||
|
||||
NumberConversion GetNumberConversion() const noexcept {
|
||||
return MapNumberConversion(NumberFormat(num_format));
|
||||
}
|
||||
|
||||
TilingMode GetTilingMode() const {
|
||||
if (tiling_index >= 0 && tiling_index <= 7) {
|
||||
return tiling_index == 5 ? TilingMode::Texture_MicroTiled
|
||||
|
|
|
@ -5,6 +5,7 @@
|
|||
|
||||
#include <string_view>
|
||||
#include <fmt/format.h>
|
||||
#include "common/assert.h"
|
||||
#include "common/types.h"
|
||||
|
||||
namespace AmdGpu {
|
||||
|
@ -177,11 +178,130 @@ enum class NumberFormat : u32 {
|
|||
Float = 7,
|
||||
Srgb = 9,
|
||||
Ubnorm = 10,
|
||||
UbnromNz = 11,
|
||||
UbnormNz = 11,
|
||||
Ubint = 12,
|
||||
Ubscaled = 13,
|
||||
};
|
||||
|
||||
enum class CompSwizzle : u32 {
|
||||
Zero = 0,
|
||||
One = 1,
|
||||
Red = 4,
|
||||
Green = 5,
|
||||
Blue = 6,
|
||||
Alpha = 7,
|
||||
};
|
||||
|
||||
enum class NumberConversion : u32 {
|
||||
None,
|
||||
UintToUscaled,
|
||||
SintToSscaled,
|
||||
UnormToUbnorm,
|
||||
};
|
||||
|
||||
struct CompMapping {
|
||||
CompSwizzle r : 3;
|
||||
CompSwizzle g : 3;
|
||||
CompSwizzle b : 3;
|
||||
CompSwizzle a : 3;
|
||||
|
||||
auto operator<=>(const CompMapping& other) const = default;
|
||||
|
||||
template <typename T>
|
||||
[[nodiscard]] std::array<T, 4> Apply(const std::array<T, 4>& data) const {
|
||||
return {
|
||||
ApplySingle(data, r),
|
||||
ApplySingle(data, g),
|
||||
ApplySingle(data, b),
|
||||
ApplySingle(data, a),
|
||||
};
|
||||
}
|
||||
|
||||
private:
|
||||
template <typename T>
|
||||
T ApplySingle(const std::array<T, 4>& data, const CompSwizzle swizzle) const {
|
||||
switch (swizzle) {
|
||||
case CompSwizzle::Zero:
|
||||
return T(0);
|
||||
case CompSwizzle::One:
|
||||
return T(1);
|
||||
case CompSwizzle::Red:
|
||||
return data[0];
|
||||
case CompSwizzle::Green:
|
||||
return data[1];
|
||||
case CompSwizzle::Blue:
|
||||
return data[2];
|
||||
case CompSwizzle::Alpha:
|
||||
return data[3];
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
inline DataFormat RemapDataFormat(const DataFormat format) {
|
||||
switch (format) {
|
||||
case DataFormat::Format11_11_10:
|
||||
return DataFormat::Format10_11_11;
|
||||
case DataFormat::Format10_10_10_2:
|
||||
return DataFormat::Format2_10_10_10;
|
||||
case DataFormat::Format5_5_5_1:
|
||||
return DataFormat::Format1_5_5_5;
|
||||
default:
|
||||
return format;
|
||||
}
|
||||
}
|
||||
|
||||
inline NumberFormat RemapNumberFormat(const NumberFormat format) {
|
||||
switch (format) {
|
||||
case NumberFormat::Uscaled:
|
||||
return NumberFormat::Uint;
|
||||
case NumberFormat::Sscaled:
|
||||
return NumberFormat::Sint;
|
||||
case NumberFormat::Ubnorm:
|
||||
return NumberFormat::Unorm;
|
||||
default:
|
||||
return format;
|
||||
}
|
||||
}
|
||||
|
||||
inline CompMapping RemapSwizzle(const DataFormat format, const CompMapping swizzle) {
|
||||
switch (format) {
|
||||
case DataFormat::Format11_11_10: {
|
||||
CompMapping result;
|
||||
result.r = swizzle.b;
|
||||
result.g = swizzle.g;
|
||||
result.b = swizzle.r;
|
||||
result.a = swizzle.a;
|
||||
return result;
|
||||
}
|
||||
case DataFormat::Format10_10_10_2:
|
||||
case DataFormat::Format5_5_5_1: {
|
||||
CompMapping result;
|
||||
result.r = swizzle.a;
|
||||
result.g = swizzle.b;
|
||||
result.b = swizzle.g;
|
||||
result.a = swizzle.r;
|
||||
return result;
|
||||
}
|
||||
default:
|
||||
return swizzle;
|
||||
}
|
||||
}
|
||||
|
||||
inline NumberConversion MapNumberConversion(const NumberFormat format) {
|
||||
switch (format) {
|
||||
case NumberFormat::Uscaled:
|
||||
return NumberConversion::UintToUscaled;
|
||||
case NumberFormat::Sscaled:
|
||||
return NumberConversion::SintToSscaled;
|
||||
case NumberFormat::Ubnorm:
|
||||
return NumberConversion::UnormToUbnorm;
|
||||
default:
|
||||
return NumberConversion::None;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace AmdGpu
|
||||
|
||||
template <>
|
||||
|
|
|
@ -447,7 +447,7 @@ static constexpr vk::FormatFeatureFlags2 GetNumberFormatFeatureFlags(
|
|||
case AmdGpu::NumberFormat::Srgb:
|
||||
return ImageRead | Mrt;
|
||||
case AmdGpu::NumberFormat::Ubnorm:
|
||||
case AmdGpu::NumberFormat::UbnromNz:
|
||||
case AmdGpu::NumberFormat::UbnormNz:
|
||||
case AmdGpu::NumberFormat::Ubint:
|
||||
case AmdGpu::NumberFormat::Ubscaled:
|
||||
return ImageRead;
|
||||
|
@ -468,6 +468,7 @@ static constexpr SurfaceFormatInfo CreateSurfaceFormatInfo(const AmdGpu::DataFor
|
|||
}
|
||||
|
||||
std::span<const SurfaceFormatInfo> SurfaceFormats() {
|
||||
// Uscaled, Sscaled, and Ubnorm formats are automatically remapped and handled in shader.
|
||||
static constexpr std::array formats{
|
||||
// Invalid
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::FormatInvalid, AmdGpu::NumberFormat::Unorm,
|
||||
|
@ -490,7 +491,7 @@ std::span<const SurfaceFormatInfo> SurfaceFormats() {
|
|||
vk::Format::eUndefined),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::FormatInvalid, AmdGpu::NumberFormat::Ubnorm,
|
||||
vk::Format::eUndefined),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::FormatInvalid, AmdGpu::NumberFormat::UbnromNz,
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::FormatInvalid, AmdGpu::NumberFormat::UbnormNz,
|
||||
vk::Format::eUndefined),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::FormatInvalid, AmdGpu::NumberFormat::Ubint,
|
||||
vk::Format::eUndefined),
|
||||
|
@ -501,10 +502,6 @@ std::span<const SurfaceFormatInfo> SurfaceFormats() {
|
|||
vk::Format::eR8Unorm),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format8, AmdGpu::NumberFormat::Snorm,
|
||||
vk::Format::eR8Snorm),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format8, AmdGpu::NumberFormat::Uscaled,
|
||||
vk::Format::eR8Uscaled),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format8, AmdGpu::NumberFormat::Sscaled,
|
||||
vk::Format::eR8Sscaled),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format8, AmdGpu::NumberFormat::Uint,
|
||||
vk::Format::eR8Uint),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format8, AmdGpu::NumberFormat::Sint,
|
||||
|
@ -516,10 +513,6 @@ std::span<const SurfaceFormatInfo> SurfaceFormats() {
|
|||
vk::Format::eR16Unorm),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format16, AmdGpu::NumberFormat::Snorm,
|
||||
vk::Format::eR16Snorm),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format16, AmdGpu::NumberFormat::Uscaled,
|
||||
vk::Format::eR16Uscaled),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format16, AmdGpu::NumberFormat::Sscaled,
|
||||
vk::Format::eR16Sscaled),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format16, AmdGpu::NumberFormat::Uint,
|
||||
vk::Format::eR16Uint),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format16, AmdGpu::NumberFormat::Sint,
|
||||
|
@ -531,10 +524,6 @@ std::span<const SurfaceFormatInfo> SurfaceFormats() {
|
|||
vk::Format::eR8G8Unorm),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format8_8, AmdGpu::NumberFormat::Snorm,
|
||||
vk::Format::eR8G8Snorm),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format8_8, AmdGpu::NumberFormat::Uscaled,
|
||||
vk::Format::eR8G8Uscaled),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format8_8, AmdGpu::NumberFormat::Sscaled,
|
||||
vk::Format::eR8G8Sscaled),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format8_8, AmdGpu::NumberFormat::Uint,
|
||||
vk::Format::eR8G8Uint),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format8_8, AmdGpu::NumberFormat::Sint,
|
||||
|
@ -553,10 +542,6 @@ std::span<const SurfaceFormatInfo> SurfaceFormats() {
|
|||
vk::Format::eR16G16Unorm),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format16_16, AmdGpu::NumberFormat::Snorm,
|
||||
vk::Format::eR16G16Snorm),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format16_16, AmdGpu::NumberFormat::Uscaled,
|
||||
vk::Format::eR16G16Uscaled),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format16_16, AmdGpu::NumberFormat::Sscaled,
|
||||
vk::Format::eR16G16Sscaled),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format16_16, AmdGpu::NumberFormat::Uint,
|
||||
vk::Format::eR16G16Uint),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format16_16, AmdGpu::NumberFormat::Sint,
|
||||
|
@ -573,10 +558,6 @@ std::span<const SurfaceFormatInfo> SurfaceFormats() {
|
|||
vk::Format::eA2B10G10R10UnormPack32),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format2_10_10_10, AmdGpu::NumberFormat::Snorm,
|
||||
vk::Format::eA2B10G10R10SnormPack32),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format2_10_10_10, AmdGpu::NumberFormat::Uscaled,
|
||||
vk::Format::eA2B10G10R10UscaledPack32),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format2_10_10_10, AmdGpu::NumberFormat::Sscaled,
|
||||
vk::Format::eA2B10G10R10SscaledPack32),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format2_10_10_10, AmdGpu::NumberFormat::Uint,
|
||||
vk::Format::eA2B10G10R10UintPack32),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format2_10_10_10, AmdGpu::NumberFormat::Sint,
|
||||
|
@ -586,10 +567,6 @@ std::span<const SurfaceFormatInfo> SurfaceFormats() {
|
|||
vk::Format::eR8G8B8A8Unorm),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format8_8_8_8, AmdGpu::NumberFormat::Snorm,
|
||||
vk::Format::eR8G8B8A8Snorm),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format8_8_8_8, AmdGpu::NumberFormat::Uscaled,
|
||||
vk::Format::eR8G8B8A8Uscaled),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format8_8_8_8, AmdGpu::NumberFormat::Sscaled,
|
||||
vk::Format::eR8G8B8A8Sscaled),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format8_8_8_8, AmdGpu::NumberFormat::Uint,
|
||||
vk::Format::eR8G8B8A8Uint),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format8_8_8_8, AmdGpu::NumberFormat::Sint,
|
||||
|
@ -608,10 +585,6 @@ std::span<const SurfaceFormatInfo> SurfaceFormats() {
|
|||
vk::Format::eR16G16B16A16Unorm),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format16_16_16_16, AmdGpu::NumberFormat::Snorm,
|
||||
vk::Format::eR16G16B16A16Snorm),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format16_16_16_16,
|
||||
AmdGpu::NumberFormat::Uscaled, vk::Format::eR16G16B16A16Uscaled),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format16_16_16_16,
|
||||
AmdGpu::NumberFormat::Sscaled, vk::Format::eR16G16B16A16Sscaled),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format16_16_16_16, AmdGpu::NumberFormat::Uint,
|
||||
vk::Format::eR16G16B16A16Uint),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format16_16_16_16, AmdGpu::NumberFormat::Sint,
|
||||
|
|
|
@ -32,6 +32,7 @@ struct GraphicsPipelineKey {
|
|||
u32 num_color_attachments;
|
||||
std::array<vk::Format, Liverpool::NumColorBuffers> color_formats;
|
||||
std::array<AmdGpu::NumberFormat, Liverpool::NumColorBuffers> color_num_formats;
|
||||
std::array<AmdGpu::NumberConversion, Liverpool::NumColorBuffers> color_num_conversions;
|
||||
std::array<AmdGpu::CompMapping, Liverpool::NumColorBuffers> color_swizzles;
|
||||
vk::Format depth_format;
|
||||
vk::Format stencil_format;
|
||||
|
|
|
@ -168,6 +168,7 @@ const Shader::RuntimeInfo& PipelineCache::BuildRuntimeInfo(Stage stage, LogicalS
|
|||
for (u32 i = 0; i < Shader::MaxColorBuffers; i++) {
|
||||
info.fs_info.color_buffers[i] = {
|
||||
.num_format = graphics_key.color_num_formats[i],
|
||||
.num_conversion = graphics_key.color_num_conversions[i],
|
||||
.swizzle = graphics_key.color_swizzles[i],
|
||||
};
|
||||
}
|
||||
|
@ -302,6 +303,7 @@ bool PipelineCache::RefreshGraphicsKey() {
|
|||
key.num_color_attachments = 0;
|
||||
key.color_formats.fill(vk::Format::eUndefined);
|
||||
key.color_num_formats.fill(AmdGpu::NumberFormat::Unorm);
|
||||
key.color_num_conversions.fill(AmdGpu::NumberConversion::None);
|
||||
key.blend_controls.fill({});
|
||||
key.write_masks.fill({});
|
||||
key.color_swizzles.fill({});
|
||||
|
@ -330,6 +332,7 @@ bool PipelineCache::RefreshGraphicsKey() {
|
|||
key.color_formats[remapped_cb] =
|
||||
LiverpoolToVK::SurfaceFormat(col_buf.GetDataFmt(), col_buf.GetNumberFmt());
|
||||
key.color_num_formats[remapped_cb] = col_buf.GetNumberFmt();
|
||||
key.color_num_conversions[remapped_cb] = col_buf.GetNumberConversion();
|
||||
key.color_swizzles[remapped_cb] = col_buf.Swizzle();
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in a new issue