mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-01-04 06:06:00 +00:00
shader_recompiler: Define fragment output type based on number format. (#1097)
* shader_recompiler: Define fragment output type based on number format. * shader_recompiler: Fix GetAttribute SPIR-V output type. * shader_recompiler: Don't bitcast on SetAttribute unless integer target.
This commit is contained in:
parent
9fde313a77
commit
789d78c3e4
|
@ -49,12 +49,13 @@ Id OutputAttrPointer(EmitContext& ctx, IR::Attribute attr, u32 element) {
|
|||
if (info.num_components == 1) {
|
||||
return info.id;
|
||||
} else {
|
||||
return ctx.OpAccessChain(ctx.output_f32, info.id, ctx.ConstU32(element));
|
||||
return ctx.OpAccessChain(info.pointer_type, info.id, ctx.ConstU32(element));
|
||||
}
|
||||
}
|
||||
switch (attr) {
|
||||
case IR::Attribute::Position0: {
|
||||
return ctx.OpAccessChain(ctx.output_f32, ctx.output_position, ctx.ConstU32(element));
|
||||
}
|
||||
case IR::Attribute::Position1:
|
||||
case IR::Attribute::Position2:
|
||||
case IR::Attribute::Position3: {
|
||||
|
@ -70,17 +71,47 @@ Id OutputAttrPointer(EmitContext& ctx, IR::Attribute attr, u32 element) {
|
|||
case IR::Attribute::RenderTarget6:
|
||||
case IR::Attribute::RenderTarget7: {
|
||||
const u32 index = u32(attr) - u32(IR::Attribute::RenderTarget0);
|
||||
if (ctx.frag_num_comp[index] > 1) {
|
||||
return ctx.OpAccessChain(ctx.output_f32, ctx.frag_color[index], ctx.ConstU32(element));
|
||||
const auto& info{ctx.frag_outputs.at(index)};
|
||||
if (info.num_components > 1) {
|
||||
return ctx.OpAccessChain(info.pointer_type, info.id, ctx.ConstU32(element));
|
||||
} else {
|
||||
return ctx.frag_color[index];
|
||||
return info.id;
|
||||
}
|
||||
}
|
||||
case IR::Attribute::Depth:
|
||||
return ctx.frag_depth;
|
||||
default:
|
||||
throw NotImplementedException("Read attribute {}", attr);
|
||||
throw NotImplementedException("Write attribute {}", attr);
|
||||
}
|
||||
}
|
||||
|
||||
std::pair<Id, bool> OutputAttrComponentType(EmitContext& ctx, IR::Attribute attr) {
|
||||
if (IR::IsParam(attr)) {
|
||||
const u32 index{u32(attr) - u32(IR::Attribute::Param0)};
|
||||
const auto& info{ctx.output_params.at(index)};
|
||||
return {info.component_type, info.is_integer};
|
||||
}
|
||||
switch (attr) {
|
||||
case IR::Attribute::Position0:
|
||||
case IR::Attribute::Position1:
|
||||
case IR::Attribute::Position2:
|
||||
case IR::Attribute::Position3:
|
||||
case IR::Attribute::Depth:
|
||||
return {ctx.F32[1], false};
|
||||
case IR::Attribute::RenderTarget0:
|
||||
case IR::Attribute::RenderTarget1:
|
||||
case IR::Attribute::RenderTarget2:
|
||||
case IR::Attribute::RenderTarget3:
|
||||
case IR::Attribute::RenderTarget4:
|
||||
case IR::Attribute::RenderTarget5:
|
||||
case IR::Attribute::RenderTarget6:
|
||||
case IR::Attribute::RenderTarget7: {
|
||||
const u32 index = u32(attr) - u32(IR::Attribute::RenderTarget0);
|
||||
const auto& info{ctx.frag_outputs.at(index)};
|
||||
return {info.component_type, info.is_integer};
|
||||
}
|
||||
default:
|
||||
throw NotImplementedException("Write attribute {}", attr);
|
||||
}
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
@ -156,17 +187,21 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp) {
|
|||
// Attribute is disabled or varying component is not written
|
||||
return ctx.ConstF32(comp == 3 ? 1.0f : 0.0f);
|
||||
}
|
||||
if (param.is_default) {
|
||||
return ctx.OpCompositeExtract(param.component_type, param.id, comp);
|
||||
}
|
||||
|
||||
if (param.num_components > 1) {
|
||||
Id result;
|
||||
if (param.is_default) {
|
||||
result = ctx.OpCompositeExtract(param.component_type, param.id, comp);
|
||||
} else if (param.num_components > 1) {
|
||||
const Id pointer{
|
||||
ctx.OpAccessChain(param.pointer_type, param.id, ctx.ConstU32(comp))};
|
||||
return ctx.OpLoad(param.component_type, pointer);
|
||||
result = ctx.OpLoad(param.component_type, pointer);
|
||||
} else {
|
||||
return ctx.OpLoad(param.component_type, param.id);
|
||||
result = ctx.OpLoad(param.component_type, param.id);
|
||||
}
|
||||
if (param.is_integer) {
|
||||
result = ctx.OpBitcast(ctx.F32[1], result);
|
||||
}
|
||||
return result;
|
||||
} else {
|
||||
const auto step_rate = EmitReadStepRate(ctx, param.id.value);
|
||||
const auto offset = ctx.OpIAdd(
|
||||
|
@ -222,7 +257,12 @@ void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 elemen
|
|||
return;
|
||||
}
|
||||
const Id pointer{OutputAttrPointer(ctx, attr, element)};
|
||||
ctx.OpStore(pointer, ctx.OpBitcast(ctx.F32[1], value));
|
||||
const auto component_type{OutputAttrComponentType(ctx, attr)};
|
||||
if (component_type.second) {
|
||||
ctx.OpStore(pointer, ctx.OpBitcast(component_type.first, value));
|
||||
} else {
|
||||
ctx.OpStore(pointer, value);
|
||||
}
|
||||
}
|
||||
|
||||
template <u32 N>
|
||||
|
|
|
@ -120,6 +120,7 @@ void EmitContext::DefineArithmeticTypes() {
|
|||
|
||||
output_f32 = Name(TypePointer(spv::StorageClass::Output, F32[1]), "output_f32");
|
||||
output_u32 = Name(TypePointer(spv::StorageClass::Output, U32[1]), "output_u32");
|
||||
output_s32 = Name(TypePointer(spv::StorageClass::Output, S32[1]), "output_s32");
|
||||
|
||||
full_result_i32x2 = Name(TypeStruct(S32[1], S32[1]), "full_result_i32x2");
|
||||
full_result_u32x2 = Name(TypeStruct(U32[1], U32[1]), "full_result_u32x2");
|
||||
|
@ -151,21 +152,21 @@ const VectorIds& GetAttributeType(EmitContext& ctx, AmdGpu::NumberFormat fmt) {
|
|||
UNREACHABLE_MSG("Invalid attribute type {}", fmt);
|
||||
}
|
||||
|
||||
EmitContext::SpirvAttribute EmitContext::GetAttributeInfo(AmdGpu::NumberFormat fmt, Id id) {
|
||||
EmitContext::SpirvAttribute EmitContext::GetAttributeInfo(AmdGpu::NumberFormat fmt, Id id,
|
||||
bool output) {
|
||||
switch (fmt) {
|
||||
case AmdGpu::NumberFormat::Float:
|
||||
case AmdGpu::NumberFormat::Unorm:
|
||||
case AmdGpu::NumberFormat::Snorm:
|
||||
case AmdGpu::NumberFormat::SnormNz:
|
||||
return {id, input_f32, F32[1], 4};
|
||||
case AmdGpu::NumberFormat::Uint:
|
||||
return {id, input_u32, U32[1], 4};
|
||||
case AmdGpu::NumberFormat::Sint:
|
||||
return {id, input_s32, S32[1], 4};
|
||||
case AmdGpu::NumberFormat::Sscaled:
|
||||
return {id, input_f32, F32[1], 4};
|
||||
case AmdGpu::NumberFormat::Uscaled:
|
||||
return {id, input_f32, F32[1], 4};
|
||||
case AmdGpu::NumberFormat::Srgb:
|
||||
return {id, output ? output_f32 : input_f32, F32[1], 4, false};
|
||||
case AmdGpu::NumberFormat::Uint:
|
||||
return {id, output ? output_u32 : input_u32, U32[1], 4, true};
|
||||
case AmdGpu::NumberFormat::Sint:
|
||||
return {id, output ? output_s32 : input_s32, S32[1], 4, true};
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
@ -236,9 +237,13 @@ void EmitContext::DefineInputs() {
|
|||
: 1;
|
||||
// Note that we pass index rather than Id
|
||||
input_params[input.binding] = {
|
||||
rate_idx, input_u32,
|
||||
U32[1], input.num_components,
|
||||
false, input.instance_data_buf,
|
||||
rate_idx,
|
||||
input_u32,
|
||||
U32[1],
|
||||
input.num_components,
|
||||
true,
|
||||
false,
|
||||
input.instance_data_buf,
|
||||
};
|
||||
} else {
|
||||
Id id{DefineInput(type, input.binding)};
|
||||
|
@ -247,7 +252,7 @@ void EmitContext::DefineInputs() {
|
|||
} else {
|
||||
Name(id, fmt::format("vs_in_attr{}", input.binding));
|
||||
}
|
||||
input_params[input.binding] = GetAttributeInfo(input.fmt, id);
|
||||
input_params[input.binding] = GetAttributeInfo(input.fmt, id, false);
|
||||
interfaces.push_back(id);
|
||||
}
|
||||
}
|
||||
|
@ -320,10 +325,12 @@ void EmitContext::DefineOutputs() {
|
|||
continue;
|
||||
}
|
||||
const u32 num_components = info.stores.NumComponents(mrt);
|
||||
frag_color[i] = DefineOutput(F32[num_components], i);
|
||||
frag_num_comp[i] = num_components;
|
||||
Name(frag_color[i], fmt::format("frag_color{}", i));
|
||||
interfaces.push_back(frag_color[i]);
|
||||
const AmdGpu::NumberFormat num_format{runtime_info.fs_info.color_buffers[i].num_format};
|
||||
const Id type{GetAttributeType(*this, num_format)[num_components]};
|
||||
const Id id = DefineOutput(type, i);
|
||||
Name(id, fmt::format("frag_color{}", i));
|
||||
frag_outputs[i] = GetAttributeInfo(num_format, id, true);
|
||||
interfaces.push_back(id);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
|
|
|
@ -166,6 +166,7 @@ public:
|
|||
Id input_s32{};
|
||||
Id output_u32{};
|
||||
Id output_f32{};
|
||||
Id output_s32{};
|
||||
|
||||
boost::container::small_vector<Id, 16> interfaces;
|
||||
|
||||
|
@ -177,8 +178,6 @@ public:
|
|||
Id frag_coord{};
|
||||
Id front_facing{};
|
||||
Id frag_depth{};
|
||||
std::array<Id, 8> frag_color{};
|
||||
std::array<u32, 8> frag_num_comp{};
|
||||
Id clip_distances{};
|
||||
Id cull_distances{};
|
||||
|
||||
|
@ -237,11 +236,13 @@ public:
|
|||
Id pointer_type;
|
||||
Id component_type;
|
||||
u32 num_components;
|
||||
bool is_integer{};
|
||||
bool is_default{};
|
||||
s32 buffer_handle{-1};
|
||||
};
|
||||
std::array<SpirvAttribute, 32> input_params{};
|
||||
std::array<SpirvAttribute, 32> output_params{};
|
||||
std::array<SpirvAttribute, 8> frag_outputs{};
|
||||
|
||||
private:
|
||||
void DefineArithmeticTypes();
|
||||
|
@ -254,7 +255,7 @@ private:
|
|||
void DefineImagesAndSamplers();
|
||||
void DefineSharedMemory();
|
||||
|
||||
SpirvAttribute GetAttributeInfo(AmdGpu::NumberFormat fmt, Id id);
|
||||
SpirvAttribute GetAttributeInfo(AmdGpu::NumberFormat fmt, Id id, bool output);
|
||||
};
|
||||
|
||||
} // namespace Shader::Backend::SPIRV
|
||||
|
|
|
@ -25,7 +25,7 @@ void Translator::EmitExport(const GcnInst& inst) {
|
|||
return comp;
|
||||
}
|
||||
const u32 index = u32(attrib) - u32(IR::Attribute::RenderTarget0);
|
||||
switch (runtime_info.fs_info.mrt_swizzles[index]) {
|
||||
switch (runtime_info.fs_info.color_buffers[index].mrt_swizzle) {
|
||||
case MrtSwizzle::Identity:
|
||||
return comp;
|
||||
case MrtSwizzle::Alt:
|
||||
|
|
|
@ -80,10 +80,16 @@ struct FragmentRuntimeInfo {
|
|||
auto operator<=>(const PsInput&) const noexcept = default;
|
||||
};
|
||||
boost::container::static_vector<PsInput, 32> inputs;
|
||||
std::array<MrtSwizzle, MaxColorBuffers> mrt_swizzles;
|
||||
struct PsColorBuffer {
|
||||
AmdGpu::NumberFormat num_format;
|
||||
MrtSwizzle mrt_swizzle;
|
||||
|
||||
auto operator<=>(const PsColorBuffer&) const noexcept = default;
|
||||
};
|
||||
std::array<PsColorBuffer, MaxColorBuffers> color_buffers;
|
||||
|
||||
bool operator==(const FragmentRuntimeInfo& other) const noexcept {
|
||||
return std::ranges::equal(mrt_swizzles, other.mrt_swizzles) &&
|
||||
return std::ranges::equal(color_buffers, other.color_buffers) &&
|
||||
std::ranges::equal(inputs, other.inputs);
|
||||
}
|
||||
};
|
||||
|
|
|
@ -26,6 +26,7 @@ using Liverpool = AmdGpu::Liverpool;
|
|||
struct GraphicsPipelineKey {
|
||||
std::array<size_t, MaxShaderStages> stage_hashes;
|
||||
std::array<vk::Format, Liverpool::NumColorBuffers> color_formats;
|
||||
std::array<AmdGpu::NumberFormat, Liverpool::NumColorBuffers> color_num_formats;
|
||||
std::array<Liverpool::ColorBuffer::SwapMode, Liverpool::NumColorBuffers> mrt_swizzles;
|
||||
vk::Format depth_format;
|
||||
vk::Format stencil_format;
|
||||
|
|
|
@ -95,10 +95,6 @@ Shader::RuntimeInfo PipelineCache::BuildRuntimeInfo(Shader::Stage stage) {
|
|||
case Shader::Stage::Fragment: {
|
||||
info.num_user_data = regs.ps_program.settings.num_user_regs;
|
||||
info.num_allocated_vgprs = regs.ps_program.settings.num_vgprs * 4;
|
||||
std::ranges::transform(graphics_key.mrt_swizzles, info.fs_info.mrt_swizzles.begin(),
|
||||
[](Liverpool::ColorBuffer::SwapMode mode) {
|
||||
return static_cast<Shader::MrtSwizzle>(mode);
|
||||
});
|
||||
const auto& ps_inputs = regs.ps_inputs;
|
||||
for (u32 i = 0; i < regs.num_interp; i++) {
|
||||
info.fs_info.inputs.push_back({
|
||||
|
@ -108,6 +104,12 @@ Shader::RuntimeInfo PipelineCache::BuildRuntimeInfo(Shader::Stage stage) {
|
|||
.default_value = u8(ps_inputs[i].default_value),
|
||||
});
|
||||
}
|
||||
for (u32 i = 0; i < Shader::MaxColorBuffers; i++) {
|
||||
info.fs_info.color_buffers[i] = {
|
||||
.num_format = graphics_key.color_num_formats[i],
|
||||
.mrt_swizzle = static_cast<Shader::MrtSwizzle>(graphics_key.mrt_swizzles[i]),
|
||||
};
|
||||
}
|
||||
break;
|
||||
}
|
||||
case Shader::Stage::Compute: {
|
||||
|
@ -244,6 +246,7 @@ bool PipelineCache::RefreshGraphicsKey() {
|
|||
// attachments. This might be not a case as HW color buffers can be bound in an arbitrary
|
||||
// order. We need to do some arrays compaction at this stage
|
||||
key.color_formats.fill(vk::Format::eUndefined);
|
||||
key.color_num_formats.fill(AmdGpu::NumberFormat::Unorm);
|
||||
key.blend_controls.fill({});
|
||||
key.write_masks.fill({});
|
||||
key.mrt_swizzles.fill(Liverpool::ColorBuffer::SwapMode::Standard);
|
||||
|
@ -261,6 +264,7 @@ bool PipelineCache::RefreshGraphicsKey() {
|
|||
const bool is_vo_surface = renderer->IsVideoOutSurface(col_buf);
|
||||
key.color_formats[remapped_cb] = LiverpoolToVK::AdjustColorBufferFormat(
|
||||
base_format, col_buf.info.comp_swap.Value(), false /*is_vo_surface*/);
|
||||
key.color_num_formats[remapped_cb] = col_buf.NumFormat();
|
||||
if (base_format == key.color_formats[remapped_cb]) {
|
||||
key.mrt_swizzles[remapped_cb] = col_buf.info.comp_swap.Value();
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue