mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-01-07 15:46:01 +00:00
shader_recompiler: Define fragment output type based on number format. (#1097)
* shader_recompiler: Define fragment output type based on number format. * shader_recompiler: Fix GetAttribute SPIR-V output type. * shader_recompiler: Don't bitcast on SetAttribute unless integer target.
This commit is contained in:
parent
9fde313a77
commit
789d78c3e4
|
@ -49,12 +49,13 @@ Id OutputAttrPointer(EmitContext& ctx, IR::Attribute attr, u32 element) {
|
||||||
if (info.num_components == 1) {
|
if (info.num_components == 1) {
|
||||||
return info.id;
|
return info.id;
|
||||||
} else {
|
} else {
|
||||||
return ctx.OpAccessChain(ctx.output_f32, info.id, ctx.ConstU32(element));
|
return ctx.OpAccessChain(info.pointer_type, info.id, ctx.ConstU32(element));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
switch (attr) {
|
switch (attr) {
|
||||||
case IR::Attribute::Position0: {
|
case IR::Attribute::Position0: {
|
||||||
return ctx.OpAccessChain(ctx.output_f32, ctx.output_position, ctx.ConstU32(element));
|
return ctx.OpAccessChain(ctx.output_f32, ctx.output_position, ctx.ConstU32(element));
|
||||||
|
}
|
||||||
case IR::Attribute::Position1:
|
case IR::Attribute::Position1:
|
||||||
case IR::Attribute::Position2:
|
case IR::Attribute::Position2:
|
||||||
case IR::Attribute::Position3: {
|
case IR::Attribute::Position3: {
|
||||||
|
@ -70,17 +71,47 @@ Id OutputAttrPointer(EmitContext& ctx, IR::Attribute attr, u32 element) {
|
||||||
case IR::Attribute::RenderTarget6:
|
case IR::Attribute::RenderTarget6:
|
||||||
case IR::Attribute::RenderTarget7: {
|
case IR::Attribute::RenderTarget7: {
|
||||||
const u32 index = u32(attr) - u32(IR::Attribute::RenderTarget0);
|
const u32 index = u32(attr) - u32(IR::Attribute::RenderTarget0);
|
||||||
if (ctx.frag_num_comp[index] > 1) {
|
const auto& info{ctx.frag_outputs.at(index)};
|
||||||
return ctx.OpAccessChain(ctx.output_f32, ctx.frag_color[index], ctx.ConstU32(element));
|
if (info.num_components > 1) {
|
||||||
|
return ctx.OpAccessChain(info.pointer_type, info.id, ctx.ConstU32(element));
|
||||||
} else {
|
} else {
|
||||||
return ctx.frag_color[index];
|
return info.id;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
case IR::Attribute::Depth:
|
case IR::Attribute::Depth:
|
||||||
return ctx.frag_depth;
|
return ctx.frag_depth;
|
||||||
default:
|
default:
|
||||||
throw NotImplementedException("Read attribute {}", attr);
|
throw NotImplementedException("Write attribute {}", attr);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::pair<Id, bool> OutputAttrComponentType(EmitContext& ctx, IR::Attribute attr) {
|
||||||
|
if (IR::IsParam(attr)) {
|
||||||
|
const u32 index{u32(attr) - u32(IR::Attribute::Param0)};
|
||||||
|
const auto& info{ctx.output_params.at(index)};
|
||||||
|
return {info.component_type, info.is_integer};
|
||||||
|
}
|
||||||
|
switch (attr) {
|
||||||
|
case IR::Attribute::Position0:
|
||||||
|
case IR::Attribute::Position1:
|
||||||
|
case IR::Attribute::Position2:
|
||||||
|
case IR::Attribute::Position3:
|
||||||
|
case IR::Attribute::Depth:
|
||||||
|
return {ctx.F32[1], false};
|
||||||
|
case IR::Attribute::RenderTarget0:
|
||||||
|
case IR::Attribute::RenderTarget1:
|
||||||
|
case IR::Attribute::RenderTarget2:
|
||||||
|
case IR::Attribute::RenderTarget3:
|
||||||
|
case IR::Attribute::RenderTarget4:
|
||||||
|
case IR::Attribute::RenderTarget5:
|
||||||
|
case IR::Attribute::RenderTarget6:
|
||||||
|
case IR::Attribute::RenderTarget7: {
|
||||||
|
const u32 index = u32(attr) - u32(IR::Attribute::RenderTarget0);
|
||||||
|
const auto& info{ctx.frag_outputs.at(index)};
|
||||||
|
return {info.component_type, info.is_integer};
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
throw NotImplementedException("Write attribute {}", attr);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} // Anonymous namespace
|
} // Anonymous namespace
|
||||||
|
@ -156,17 +187,21 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp) {
|
||||||
// Attribute is disabled or varying component is not written
|
// Attribute is disabled or varying component is not written
|
||||||
return ctx.ConstF32(comp == 3 ? 1.0f : 0.0f);
|
return ctx.ConstF32(comp == 3 ? 1.0f : 0.0f);
|
||||||
}
|
}
|
||||||
if (param.is_default) {
|
|
||||||
return ctx.OpCompositeExtract(param.component_type, param.id, comp);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (param.num_components > 1) {
|
Id result;
|
||||||
|
if (param.is_default) {
|
||||||
|
result = ctx.OpCompositeExtract(param.component_type, param.id, comp);
|
||||||
|
} else if (param.num_components > 1) {
|
||||||
const Id pointer{
|
const Id pointer{
|
||||||
ctx.OpAccessChain(param.pointer_type, param.id, ctx.ConstU32(comp))};
|
ctx.OpAccessChain(param.pointer_type, param.id, ctx.ConstU32(comp))};
|
||||||
return ctx.OpLoad(param.component_type, pointer);
|
result = ctx.OpLoad(param.component_type, pointer);
|
||||||
} else {
|
} else {
|
||||||
return ctx.OpLoad(param.component_type, param.id);
|
result = ctx.OpLoad(param.component_type, param.id);
|
||||||
}
|
}
|
||||||
|
if (param.is_integer) {
|
||||||
|
result = ctx.OpBitcast(ctx.F32[1], result);
|
||||||
|
}
|
||||||
|
return result;
|
||||||
} else {
|
} else {
|
||||||
const auto step_rate = EmitReadStepRate(ctx, param.id.value);
|
const auto step_rate = EmitReadStepRate(ctx, param.id.value);
|
||||||
const auto offset = ctx.OpIAdd(
|
const auto offset = ctx.OpIAdd(
|
||||||
|
@ -222,7 +257,12 @@ void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 elemen
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
const Id pointer{OutputAttrPointer(ctx, attr, element)};
|
const Id pointer{OutputAttrPointer(ctx, attr, element)};
|
||||||
ctx.OpStore(pointer, ctx.OpBitcast(ctx.F32[1], value));
|
const auto component_type{OutputAttrComponentType(ctx, attr)};
|
||||||
|
if (component_type.second) {
|
||||||
|
ctx.OpStore(pointer, ctx.OpBitcast(component_type.first, value));
|
||||||
|
} else {
|
||||||
|
ctx.OpStore(pointer, value);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <u32 N>
|
template <u32 N>
|
||||||
|
|
|
@ -120,6 +120,7 @@ void EmitContext::DefineArithmeticTypes() {
|
||||||
|
|
||||||
output_f32 = Name(TypePointer(spv::StorageClass::Output, F32[1]), "output_f32");
|
output_f32 = Name(TypePointer(spv::StorageClass::Output, F32[1]), "output_f32");
|
||||||
output_u32 = Name(TypePointer(spv::StorageClass::Output, U32[1]), "output_u32");
|
output_u32 = Name(TypePointer(spv::StorageClass::Output, U32[1]), "output_u32");
|
||||||
|
output_s32 = Name(TypePointer(spv::StorageClass::Output, S32[1]), "output_s32");
|
||||||
|
|
||||||
full_result_i32x2 = Name(TypeStruct(S32[1], S32[1]), "full_result_i32x2");
|
full_result_i32x2 = Name(TypeStruct(S32[1], S32[1]), "full_result_i32x2");
|
||||||
full_result_u32x2 = Name(TypeStruct(U32[1], U32[1]), "full_result_u32x2");
|
full_result_u32x2 = Name(TypeStruct(U32[1], U32[1]), "full_result_u32x2");
|
||||||
|
@ -151,21 +152,21 @@ const VectorIds& GetAttributeType(EmitContext& ctx, AmdGpu::NumberFormat fmt) {
|
||||||
UNREACHABLE_MSG("Invalid attribute type {}", fmt);
|
UNREACHABLE_MSG("Invalid attribute type {}", fmt);
|
||||||
}
|
}
|
||||||
|
|
||||||
EmitContext::SpirvAttribute EmitContext::GetAttributeInfo(AmdGpu::NumberFormat fmt, Id id) {
|
EmitContext::SpirvAttribute EmitContext::GetAttributeInfo(AmdGpu::NumberFormat fmt, Id id,
|
||||||
|
bool output) {
|
||||||
switch (fmt) {
|
switch (fmt) {
|
||||||
case AmdGpu::NumberFormat::Float:
|
case AmdGpu::NumberFormat::Float:
|
||||||
case AmdGpu::NumberFormat::Unorm:
|
case AmdGpu::NumberFormat::Unorm:
|
||||||
case AmdGpu::NumberFormat::Snorm:
|
case AmdGpu::NumberFormat::Snorm:
|
||||||
case AmdGpu::NumberFormat::SnormNz:
|
case AmdGpu::NumberFormat::SnormNz:
|
||||||
return {id, input_f32, F32[1], 4};
|
|
||||||
case AmdGpu::NumberFormat::Uint:
|
|
||||||
return {id, input_u32, U32[1], 4};
|
|
||||||
case AmdGpu::NumberFormat::Sint:
|
|
||||||
return {id, input_s32, S32[1], 4};
|
|
||||||
case AmdGpu::NumberFormat::Sscaled:
|
case AmdGpu::NumberFormat::Sscaled:
|
||||||
return {id, input_f32, F32[1], 4};
|
|
||||||
case AmdGpu::NumberFormat::Uscaled:
|
case AmdGpu::NumberFormat::Uscaled:
|
||||||
return {id, input_f32, F32[1], 4};
|
case AmdGpu::NumberFormat::Srgb:
|
||||||
|
return {id, output ? output_f32 : input_f32, F32[1], 4, false};
|
||||||
|
case AmdGpu::NumberFormat::Uint:
|
||||||
|
return {id, output ? output_u32 : input_u32, U32[1], 4, true};
|
||||||
|
case AmdGpu::NumberFormat::Sint:
|
||||||
|
return {id, output ? output_s32 : input_s32, S32[1], 4, true};
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -236,9 +237,13 @@ void EmitContext::DefineInputs() {
|
||||||
: 1;
|
: 1;
|
||||||
// Note that we pass index rather than Id
|
// Note that we pass index rather than Id
|
||||||
input_params[input.binding] = {
|
input_params[input.binding] = {
|
||||||
rate_idx, input_u32,
|
rate_idx,
|
||||||
U32[1], input.num_components,
|
input_u32,
|
||||||
false, input.instance_data_buf,
|
U32[1],
|
||||||
|
input.num_components,
|
||||||
|
true,
|
||||||
|
false,
|
||||||
|
input.instance_data_buf,
|
||||||
};
|
};
|
||||||
} else {
|
} else {
|
||||||
Id id{DefineInput(type, input.binding)};
|
Id id{DefineInput(type, input.binding)};
|
||||||
|
@ -247,7 +252,7 @@ void EmitContext::DefineInputs() {
|
||||||
} else {
|
} else {
|
||||||
Name(id, fmt::format("vs_in_attr{}", input.binding));
|
Name(id, fmt::format("vs_in_attr{}", input.binding));
|
||||||
}
|
}
|
||||||
input_params[input.binding] = GetAttributeInfo(input.fmt, id);
|
input_params[input.binding] = GetAttributeInfo(input.fmt, id, false);
|
||||||
interfaces.push_back(id);
|
interfaces.push_back(id);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -320,10 +325,12 @@ void EmitContext::DefineOutputs() {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
const u32 num_components = info.stores.NumComponents(mrt);
|
const u32 num_components = info.stores.NumComponents(mrt);
|
||||||
frag_color[i] = DefineOutput(F32[num_components], i);
|
const AmdGpu::NumberFormat num_format{runtime_info.fs_info.color_buffers[i].num_format};
|
||||||
frag_num_comp[i] = num_components;
|
const Id type{GetAttributeType(*this, num_format)[num_components]};
|
||||||
Name(frag_color[i], fmt::format("frag_color{}", i));
|
const Id id = DefineOutput(type, i);
|
||||||
interfaces.push_back(frag_color[i]);
|
Name(id, fmt::format("frag_color{}", i));
|
||||||
|
frag_outputs[i] = GetAttributeInfo(num_format, id, true);
|
||||||
|
interfaces.push_back(id);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
|
|
|
@ -166,6 +166,7 @@ public:
|
||||||
Id input_s32{};
|
Id input_s32{};
|
||||||
Id output_u32{};
|
Id output_u32{};
|
||||||
Id output_f32{};
|
Id output_f32{};
|
||||||
|
Id output_s32{};
|
||||||
|
|
||||||
boost::container::small_vector<Id, 16> interfaces;
|
boost::container::small_vector<Id, 16> interfaces;
|
||||||
|
|
||||||
|
@ -177,8 +178,6 @@ public:
|
||||||
Id frag_coord{};
|
Id frag_coord{};
|
||||||
Id front_facing{};
|
Id front_facing{};
|
||||||
Id frag_depth{};
|
Id frag_depth{};
|
||||||
std::array<Id, 8> frag_color{};
|
|
||||||
std::array<u32, 8> frag_num_comp{};
|
|
||||||
Id clip_distances{};
|
Id clip_distances{};
|
||||||
Id cull_distances{};
|
Id cull_distances{};
|
||||||
|
|
||||||
|
@ -237,11 +236,13 @@ public:
|
||||||
Id pointer_type;
|
Id pointer_type;
|
||||||
Id component_type;
|
Id component_type;
|
||||||
u32 num_components;
|
u32 num_components;
|
||||||
|
bool is_integer{};
|
||||||
bool is_default{};
|
bool is_default{};
|
||||||
s32 buffer_handle{-1};
|
s32 buffer_handle{-1};
|
||||||
};
|
};
|
||||||
std::array<SpirvAttribute, 32> input_params{};
|
std::array<SpirvAttribute, 32> input_params{};
|
||||||
std::array<SpirvAttribute, 32> output_params{};
|
std::array<SpirvAttribute, 32> output_params{};
|
||||||
|
std::array<SpirvAttribute, 8> frag_outputs{};
|
||||||
|
|
||||||
private:
|
private:
|
||||||
void DefineArithmeticTypes();
|
void DefineArithmeticTypes();
|
||||||
|
@ -254,7 +255,7 @@ private:
|
||||||
void DefineImagesAndSamplers();
|
void DefineImagesAndSamplers();
|
||||||
void DefineSharedMemory();
|
void DefineSharedMemory();
|
||||||
|
|
||||||
SpirvAttribute GetAttributeInfo(AmdGpu::NumberFormat fmt, Id id);
|
SpirvAttribute GetAttributeInfo(AmdGpu::NumberFormat fmt, Id id, bool output);
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace Shader::Backend::SPIRV
|
} // namespace Shader::Backend::SPIRV
|
||||||
|
|
|
@ -25,7 +25,7 @@ void Translator::EmitExport(const GcnInst& inst) {
|
||||||
return comp;
|
return comp;
|
||||||
}
|
}
|
||||||
const u32 index = u32(attrib) - u32(IR::Attribute::RenderTarget0);
|
const u32 index = u32(attrib) - u32(IR::Attribute::RenderTarget0);
|
||||||
switch (runtime_info.fs_info.mrt_swizzles[index]) {
|
switch (runtime_info.fs_info.color_buffers[index].mrt_swizzle) {
|
||||||
case MrtSwizzle::Identity:
|
case MrtSwizzle::Identity:
|
||||||
return comp;
|
return comp;
|
||||||
case MrtSwizzle::Alt:
|
case MrtSwizzle::Alt:
|
||||||
|
|
|
@ -80,10 +80,16 @@ struct FragmentRuntimeInfo {
|
||||||
auto operator<=>(const PsInput&) const noexcept = default;
|
auto operator<=>(const PsInput&) const noexcept = default;
|
||||||
};
|
};
|
||||||
boost::container::static_vector<PsInput, 32> inputs;
|
boost::container::static_vector<PsInput, 32> inputs;
|
||||||
std::array<MrtSwizzle, MaxColorBuffers> mrt_swizzles;
|
struct PsColorBuffer {
|
||||||
|
AmdGpu::NumberFormat num_format;
|
||||||
|
MrtSwizzle mrt_swizzle;
|
||||||
|
|
||||||
|
auto operator<=>(const PsColorBuffer&) const noexcept = default;
|
||||||
|
};
|
||||||
|
std::array<PsColorBuffer, MaxColorBuffers> color_buffers;
|
||||||
|
|
||||||
bool operator==(const FragmentRuntimeInfo& other) const noexcept {
|
bool operator==(const FragmentRuntimeInfo& other) const noexcept {
|
||||||
return std::ranges::equal(mrt_swizzles, other.mrt_swizzles) &&
|
return std::ranges::equal(color_buffers, other.color_buffers) &&
|
||||||
std::ranges::equal(inputs, other.inputs);
|
std::ranges::equal(inputs, other.inputs);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
|
@ -26,6 +26,7 @@ using Liverpool = AmdGpu::Liverpool;
|
||||||
struct GraphicsPipelineKey {
|
struct GraphicsPipelineKey {
|
||||||
std::array<size_t, MaxShaderStages> stage_hashes;
|
std::array<size_t, MaxShaderStages> stage_hashes;
|
||||||
std::array<vk::Format, Liverpool::NumColorBuffers> color_formats;
|
std::array<vk::Format, Liverpool::NumColorBuffers> color_formats;
|
||||||
|
std::array<AmdGpu::NumberFormat, Liverpool::NumColorBuffers> color_num_formats;
|
||||||
std::array<Liverpool::ColorBuffer::SwapMode, Liverpool::NumColorBuffers> mrt_swizzles;
|
std::array<Liverpool::ColorBuffer::SwapMode, Liverpool::NumColorBuffers> mrt_swizzles;
|
||||||
vk::Format depth_format;
|
vk::Format depth_format;
|
||||||
vk::Format stencil_format;
|
vk::Format stencil_format;
|
||||||
|
|
|
@ -95,10 +95,6 @@ Shader::RuntimeInfo PipelineCache::BuildRuntimeInfo(Shader::Stage stage) {
|
||||||
case Shader::Stage::Fragment: {
|
case Shader::Stage::Fragment: {
|
||||||
info.num_user_data = regs.ps_program.settings.num_user_regs;
|
info.num_user_data = regs.ps_program.settings.num_user_regs;
|
||||||
info.num_allocated_vgprs = regs.ps_program.settings.num_vgprs * 4;
|
info.num_allocated_vgprs = regs.ps_program.settings.num_vgprs * 4;
|
||||||
std::ranges::transform(graphics_key.mrt_swizzles, info.fs_info.mrt_swizzles.begin(),
|
|
||||||
[](Liverpool::ColorBuffer::SwapMode mode) {
|
|
||||||
return static_cast<Shader::MrtSwizzle>(mode);
|
|
||||||
});
|
|
||||||
const auto& ps_inputs = regs.ps_inputs;
|
const auto& ps_inputs = regs.ps_inputs;
|
||||||
for (u32 i = 0; i < regs.num_interp; i++) {
|
for (u32 i = 0; i < regs.num_interp; i++) {
|
||||||
info.fs_info.inputs.push_back({
|
info.fs_info.inputs.push_back({
|
||||||
|
@ -108,6 +104,12 @@ Shader::RuntimeInfo PipelineCache::BuildRuntimeInfo(Shader::Stage stage) {
|
||||||
.default_value = u8(ps_inputs[i].default_value),
|
.default_value = u8(ps_inputs[i].default_value),
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
for (u32 i = 0; i < Shader::MaxColorBuffers; i++) {
|
||||||
|
info.fs_info.color_buffers[i] = {
|
||||||
|
.num_format = graphics_key.color_num_formats[i],
|
||||||
|
.mrt_swizzle = static_cast<Shader::MrtSwizzle>(graphics_key.mrt_swizzles[i]),
|
||||||
|
};
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case Shader::Stage::Compute: {
|
case Shader::Stage::Compute: {
|
||||||
|
@ -244,6 +246,7 @@ bool PipelineCache::RefreshGraphicsKey() {
|
||||||
// attachments. This might be not a case as HW color buffers can be bound in an arbitrary
|
// attachments. This might be not a case as HW color buffers can be bound in an arbitrary
|
||||||
// order. We need to do some arrays compaction at this stage
|
// order. We need to do some arrays compaction at this stage
|
||||||
key.color_formats.fill(vk::Format::eUndefined);
|
key.color_formats.fill(vk::Format::eUndefined);
|
||||||
|
key.color_num_formats.fill(AmdGpu::NumberFormat::Unorm);
|
||||||
key.blend_controls.fill({});
|
key.blend_controls.fill({});
|
||||||
key.write_masks.fill({});
|
key.write_masks.fill({});
|
||||||
key.mrt_swizzles.fill(Liverpool::ColorBuffer::SwapMode::Standard);
|
key.mrt_swizzles.fill(Liverpool::ColorBuffer::SwapMode::Standard);
|
||||||
|
@ -261,6 +264,7 @@ bool PipelineCache::RefreshGraphicsKey() {
|
||||||
const bool is_vo_surface = renderer->IsVideoOutSurface(col_buf);
|
const bool is_vo_surface = renderer->IsVideoOutSurface(col_buf);
|
||||||
key.color_formats[remapped_cb] = LiverpoolToVK::AdjustColorBufferFormat(
|
key.color_formats[remapped_cb] = LiverpoolToVK::AdjustColorBufferFormat(
|
||||||
base_format, col_buf.info.comp_swap.Value(), false /*is_vo_surface*/);
|
base_format, col_buf.info.comp_swap.Value(), false /*is_vo_surface*/);
|
||||||
|
key.color_num_formats[remapped_cb] = col_buf.NumFormat();
|
||||||
if (base_format == key.color_formats[remapped_cb]) {
|
if (base_format == key.color_formats[remapped_cb]) {
|
||||||
key.mrt_swizzles[remapped_cb] = col_buf.info.comp_swap.Value();
|
key.mrt_swizzles[remapped_cb] = col_buf.info.comp_swap.Value();
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue