shader_recompiler: Define fragment output type based on number format. (#1097)

* shader_recompiler: Define fragment output type based on number format.

* shader_recompiler: Fix GetAttribute SPIR-V output type.

* shader_recompiler: Don't bitcast on SetAttribute unless integer target.
This commit is contained in:
squidbus 2024-10-01 13:42:37 -07:00 committed by GitHub
parent 9fde313a77
commit 789d78c3e4
7 changed files with 97 additions and 38 deletions

View file

@ -49,12 +49,13 @@ Id OutputAttrPointer(EmitContext& ctx, IR::Attribute attr, u32 element) {
if (info.num_components == 1) {
return info.id;
} else {
return ctx.OpAccessChain(ctx.output_f32, info.id, ctx.ConstU32(element));
return ctx.OpAccessChain(info.pointer_type, info.id, ctx.ConstU32(element));
}
}
switch (attr) {
case IR::Attribute::Position0: {
return ctx.OpAccessChain(ctx.output_f32, ctx.output_position, ctx.ConstU32(element));
}
case IR::Attribute::Position1:
case IR::Attribute::Position2:
case IR::Attribute::Position3: {
@ -70,17 +71,47 @@ Id OutputAttrPointer(EmitContext& ctx, IR::Attribute attr, u32 element) {
case IR::Attribute::RenderTarget6:
case IR::Attribute::RenderTarget7: {
const u32 index = u32(attr) - u32(IR::Attribute::RenderTarget0);
if (ctx.frag_num_comp[index] > 1) {
return ctx.OpAccessChain(ctx.output_f32, ctx.frag_color[index], ctx.ConstU32(element));
const auto& info{ctx.frag_outputs.at(index)};
if (info.num_components > 1) {
return ctx.OpAccessChain(info.pointer_type, info.id, ctx.ConstU32(element));
} else {
return ctx.frag_color[index];
return info.id;
}
}
case IR::Attribute::Depth:
return ctx.frag_depth;
default:
throw NotImplementedException("Read attribute {}", attr);
throw NotImplementedException("Write attribute {}", attr);
}
}
std::pair<Id, bool> OutputAttrComponentType(EmitContext& ctx, IR::Attribute attr) {
if (IR::IsParam(attr)) {
const u32 index{u32(attr) - u32(IR::Attribute::Param0)};
const auto& info{ctx.output_params.at(index)};
return {info.component_type, info.is_integer};
}
switch (attr) {
case IR::Attribute::Position0:
case IR::Attribute::Position1:
case IR::Attribute::Position2:
case IR::Attribute::Position3:
case IR::Attribute::Depth:
return {ctx.F32[1], false};
case IR::Attribute::RenderTarget0:
case IR::Attribute::RenderTarget1:
case IR::Attribute::RenderTarget2:
case IR::Attribute::RenderTarget3:
case IR::Attribute::RenderTarget4:
case IR::Attribute::RenderTarget5:
case IR::Attribute::RenderTarget6:
case IR::Attribute::RenderTarget7: {
const u32 index = u32(attr) - u32(IR::Attribute::RenderTarget0);
const auto& info{ctx.frag_outputs.at(index)};
return {info.component_type, info.is_integer};
}
default:
throw NotImplementedException("Write attribute {}", attr);
}
}
} // Anonymous namespace
@ -156,17 +187,21 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp) {
// Attribute is disabled or varying component is not written
return ctx.ConstF32(comp == 3 ? 1.0f : 0.0f);
}
if (param.is_default) {
return ctx.OpCompositeExtract(param.component_type, param.id, comp);
}
if (param.num_components > 1) {
Id result;
if (param.is_default) {
result = ctx.OpCompositeExtract(param.component_type, param.id, comp);
} else if (param.num_components > 1) {
const Id pointer{
ctx.OpAccessChain(param.pointer_type, param.id, ctx.ConstU32(comp))};
return ctx.OpLoad(param.component_type, pointer);
result = ctx.OpLoad(param.component_type, pointer);
} else {
return ctx.OpLoad(param.component_type, param.id);
result = ctx.OpLoad(param.component_type, param.id);
}
if (param.is_integer) {
result = ctx.OpBitcast(ctx.F32[1], result);
}
return result;
} else {
const auto step_rate = EmitReadStepRate(ctx, param.id.value);
const auto offset = ctx.OpIAdd(
@ -222,7 +257,12 @@ void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 elemen
return;
}
const Id pointer{OutputAttrPointer(ctx, attr, element)};
ctx.OpStore(pointer, ctx.OpBitcast(ctx.F32[1], value));
const auto component_type{OutputAttrComponentType(ctx, attr)};
if (component_type.second) {
ctx.OpStore(pointer, ctx.OpBitcast(component_type.first, value));
} else {
ctx.OpStore(pointer, value);
}
}
template <u32 N>

View file

@ -120,6 +120,7 @@ void EmitContext::DefineArithmeticTypes() {
output_f32 = Name(TypePointer(spv::StorageClass::Output, F32[1]), "output_f32");
output_u32 = Name(TypePointer(spv::StorageClass::Output, U32[1]), "output_u32");
output_s32 = Name(TypePointer(spv::StorageClass::Output, S32[1]), "output_s32");
full_result_i32x2 = Name(TypeStruct(S32[1], S32[1]), "full_result_i32x2");
full_result_u32x2 = Name(TypeStruct(U32[1], U32[1]), "full_result_u32x2");
@ -151,21 +152,21 @@ const VectorIds& GetAttributeType(EmitContext& ctx, AmdGpu::NumberFormat fmt) {
UNREACHABLE_MSG("Invalid attribute type {}", fmt);
}
EmitContext::SpirvAttribute EmitContext::GetAttributeInfo(AmdGpu::NumberFormat fmt, Id id) {
EmitContext::SpirvAttribute EmitContext::GetAttributeInfo(AmdGpu::NumberFormat fmt, Id id,
bool output) {
switch (fmt) {
case AmdGpu::NumberFormat::Float:
case AmdGpu::NumberFormat::Unorm:
case AmdGpu::NumberFormat::Snorm:
case AmdGpu::NumberFormat::SnormNz:
return {id, input_f32, F32[1], 4};
case AmdGpu::NumberFormat::Uint:
return {id, input_u32, U32[1], 4};
case AmdGpu::NumberFormat::Sint:
return {id, input_s32, S32[1], 4};
case AmdGpu::NumberFormat::Sscaled:
return {id, input_f32, F32[1], 4};
case AmdGpu::NumberFormat::Uscaled:
return {id, input_f32, F32[1], 4};
case AmdGpu::NumberFormat::Srgb:
return {id, output ? output_f32 : input_f32, F32[1], 4, false};
case AmdGpu::NumberFormat::Uint:
return {id, output ? output_u32 : input_u32, U32[1], 4, true};
case AmdGpu::NumberFormat::Sint:
return {id, output ? output_s32 : input_s32, S32[1], 4, true};
default:
break;
}
@ -236,9 +237,13 @@ void EmitContext::DefineInputs() {
: 1;
// Note that we pass index rather than Id
input_params[input.binding] = {
rate_idx, input_u32,
U32[1], input.num_components,
false, input.instance_data_buf,
rate_idx,
input_u32,
U32[1],
input.num_components,
true,
false,
input.instance_data_buf,
};
} else {
Id id{DefineInput(type, input.binding)};
@ -247,7 +252,7 @@ void EmitContext::DefineInputs() {
} else {
Name(id, fmt::format("vs_in_attr{}", input.binding));
}
input_params[input.binding] = GetAttributeInfo(input.fmt, id);
input_params[input.binding] = GetAttributeInfo(input.fmt, id, false);
interfaces.push_back(id);
}
}
@ -320,10 +325,12 @@ void EmitContext::DefineOutputs() {
continue;
}
const u32 num_components = info.stores.NumComponents(mrt);
frag_color[i] = DefineOutput(F32[num_components], i);
frag_num_comp[i] = num_components;
Name(frag_color[i], fmt::format("frag_color{}", i));
interfaces.push_back(frag_color[i]);
const AmdGpu::NumberFormat num_format{runtime_info.fs_info.color_buffers[i].num_format};
const Id type{GetAttributeType(*this, num_format)[num_components]};
const Id id = DefineOutput(type, i);
Name(id, fmt::format("frag_color{}", i));
frag_outputs[i] = GetAttributeInfo(num_format, id, true);
interfaces.push_back(id);
}
break;
default:

View file

@ -166,6 +166,7 @@ public:
Id input_s32{};
Id output_u32{};
Id output_f32{};
Id output_s32{};
boost::container::small_vector<Id, 16> interfaces;
@ -177,8 +178,6 @@ public:
Id frag_coord{};
Id front_facing{};
Id frag_depth{};
std::array<Id, 8> frag_color{};
std::array<u32, 8> frag_num_comp{};
Id clip_distances{};
Id cull_distances{};
@ -237,11 +236,13 @@ public:
Id pointer_type;
Id component_type;
u32 num_components;
bool is_integer{};
bool is_default{};
s32 buffer_handle{-1};
};
std::array<SpirvAttribute, 32> input_params{};
std::array<SpirvAttribute, 32> output_params{};
std::array<SpirvAttribute, 8> frag_outputs{};
private:
void DefineArithmeticTypes();
@ -254,7 +255,7 @@ private:
void DefineImagesAndSamplers();
void DefineSharedMemory();
SpirvAttribute GetAttributeInfo(AmdGpu::NumberFormat fmt, Id id);
SpirvAttribute GetAttributeInfo(AmdGpu::NumberFormat fmt, Id id, bool output);
};
} // namespace Shader::Backend::SPIRV

View file

@ -25,7 +25,7 @@ void Translator::EmitExport(const GcnInst& inst) {
return comp;
}
const u32 index = u32(attrib) - u32(IR::Attribute::RenderTarget0);
switch (runtime_info.fs_info.mrt_swizzles[index]) {
switch (runtime_info.fs_info.color_buffers[index].mrt_swizzle) {
case MrtSwizzle::Identity:
return comp;
case MrtSwizzle::Alt:

View file

@ -80,10 +80,16 @@ struct FragmentRuntimeInfo {
auto operator<=>(const PsInput&) const noexcept = default;
};
boost::container::static_vector<PsInput, 32> inputs;
std::array<MrtSwizzle, MaxColorBuffers> mrt_swizzles;
struct PsColorBuffer {
AmdGpu::NumberFormat num_format;
MrtSwizzle mrt_swizzle;
auto operator<=>(const PsColorBuffer&) const noexcept = default;
};
std::array<PsColorBuffer, MaxColorBuffers> color_buffers;
bool operator==(const FragmentRuntimeInfo& other) const noexcept {
return std::ranges::equal(mrt_swizzles, other.mrt_swizzles) &&
return std::ranges::equal(color_buffers, other.color_buffers) &&
std::ranges::equal(inputs, other.inputs);
}
};

View file

@ -26,6 +26,7 @@ using Liverpool = AmdGpu::Liverpool;
struct GraphicsPipelineKey {
std::array<size_t, MaxShaderStages> stage_hashes;
std::array<vk::Format, Liverpool::NumColorBuffers> color_formats;
std::array<AmdGpu::NumberFormat, Liverpool::NumColorBuffers> color_num_formats;
std::array<Liverpool::ColorBuffer::SwapMode, Liverpool::NumColorBuffers> mrt_swizzles;
vk::Format depth_format;
vk::Format stencil_format;

View file

@ -95,10 +95,6 @@ Shader::RuntimeInfo PipelineCache::BuildRuntimeInfo(Shader::Stage stage) {
case Shader::Stage::Fragment: {
info.num_user_data = regs.ps_program.settings.num_user_regs;
info.num_allocated_vgprs = regs.ps_program.settings.num_vgprs * 4;
std::ranges::transform(graphics_key.mrt_swizzles, info.fs_info.mrt_swizzles.begin(),
[](Liverpool::ColorBuffer::SwapMode mode) {
return static_cast<Shader::MrtSwizzle>(mode);
});
const auto& ps_inputs = regs.ps_inputs;
for (u32 i = 0; i < regs.num_interp; i++) {
info.fs_info.inputs.push_back({
@ -108,6 +104,12 @@ Shader::RuntimeInfo PipelineCache::BuildRuntimeInfo(Shader::Stage stage) {
.default_value = u8(ps_inputs[i].default_value),
});
}
for (u32 i = 0; i < Shader::MaxColorBuffers; i++) {
info.fs_info.color_buffers[i] = {
.num_format = graphics_key.color_num_formats[i],
.mrt_swizzle = static_cast<Shader::MrtSwizzle>(graphics_key.mrt_swizzles[i]),
};
}
break;
}
case Shader::Stage::Compute: {
@ -244,6 +246,7 @@ bool PipelineCache::RefreshGraphicsKey() {
// attachments. This might be not a case as HW color buffers can be bound in an arbitrary
// order. We need to do some arrays compaction at this stage
key.color_formats.fill(vk::Format::eUndefined);
key.color_num_formats.fill(AmdGpu::NumberFormat::Unorm);
key.blend_controls.fill({});
key.write_masks.fill({});
key.mrt_swizzles.fill(Liverpool::ColorBuffer::SwapMode::Standard);
@ -261,6 +264,7 @@ bool PipelineCache::RefreshGraphicsKey() {
const bool is_vo_surface = renderer->IsVideoOutSurface(col_buf);
key.color_formats[remapped_cb] = LiverpoolToVK::AdjustColorBufferFormat(
base_format, col_buf.info.comp_swap.Value(), false /*is_vo_surface*/);
key.color_num_formats[remapped_cb] = col_buf.NumFormat();
if (base_format == key.color_formats[remapped_cb]) {
key.mrt_swizzles[remapped_cb] = col_buf.info.comp_swap.Value();
}