mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-01-28 01:08:24 +00:00
shader_recompiler: Implement shader export formats. (#2226)
This commit is contained in:
parent
b3c573f798
commit
56f4b8a2b8
2
externals/sirit
vendored
2
externals/sirit
vendored
|
@ -1 +1 @@
|
|||
Subproject commit 26ad5a9d0fe13260b0d7d6c64419d01a196b2e32
|
||||
Subproject commit d6f3c0d99862ab2ff8f95e9ac221560f1f97e29a
|
|
@ -58,4 +58,48 @@ Id EmitUnpackHalf2x16(EmitContext& ctx, Id value) {
|
|||
return ctx.OpUnpackHalf2x16(ctx.F32[2], value);
|
||||
}
|
||||
|
||||
Id EmitPackUnorm2x16(EmitContext& ctx, Id value) {
|
||||
return ctx.OpPackUnorm2x16(ctx.U32[1], value);
|
||||
}
|
||||
|
||||
Id EmitUnpackUnorm2x16(EmitContext& ctx, Id value) {
|
||||
return ctx.OpUnpackUnorm2x16(ctx.F32[2], value);
|
||||
}
|
||||
|
||||
Id EmitPackSnorm2x16(EmitContext& ctx, Id value) {
|
||||
return ctx.OpPackSnorm2x16(ctx.U32[1], value);
|
||||
}
|
||||
|
||||
Id EmitUnpackSnorm2x16(EmitContext& ctx, Id value) {
|
||||
return ctx.OpUnpackSnorm2x16(ctx.F32[2], value);
|
||||
}
|
||||
|
||||
Id EmitPackUint2x16(EmitContext& ctx, Id value) {
|
||||
// No SPIR-V instruction for this, do it manually.
|
||||
const auto x{ctx.OpCompositeExtract(ctx.U32[1], value, 0)};
|
||||
const auto y{ctx.OpCompositeExtract(ctx.U32[1], value, 1)};
|
||||
return ctx.OpBitFieldInsert(ctx.U32[1], x, y, ctx.ConstU32(16U), ctx.ConstU32(16U));
|
||||
}
|
||||
|
||||
Id EmitUnpackUint2x16(EmitContext& ctx, Id value) {
|
||||
// No SPIR-V instruction for this, do it manually.
|
||||
const auto x{ctx.OpBitFieldUExtract(ctx.U32[1], value, ctx.ConstU32(0U), ctx.ConstU32(16U))};
|
||||
const auto y{ctx.OpBitFieldUExtract(ctx.U32[1], value, ctx.ConstU32(16U), ctx.ConstU32(16U))};
|
||||
return ctx.OpCompositeConstruct(ctx.U32[2], x, y);
|
||||
}
|
||||
|
||||
Id EmitPackSint2x16(EmitContext& ctx, Id value) {
|
||||
// No SPIR-V instruction for this, do it manually.
|
||||
const auto x{ctx.OpCompositeExtract(ctx.U32[1], value, 0)};
|
||||
const auto y{ctx.OpCompositeExtract(ctx.U32[1], value, 1)};
|
||||
return ctx.OpBitFieldInsert(ctx.U32[1], x, y, ctx.ConstU32(16U), ctx.ConstU32(16U));
|
||||
}
|
||||
|
||||
Id EmitUnpackSint2x16(EmitContext& ctx, Id value) {
|
||||
// No SPIR-V instruction for this, do it manually.
|
||||
const auto x{ctx.OpBitFieldSExtract(ctx.U32[1], value, ctx.ConstU32(0U), ctx.ConstU32(16U))};
|
||||
const auto y{ctx.OpBitFieldSExtract(ctx.U32[1], value, ctx.ConstU32(16U), ctx.ConstU32(16U))};
|
||||
return ctx.OpCompositeConstruct(ctx.U32[2], x, y);
|
||||
}
|
||||
|
||||
} // namespace Shader::Backend::SPIRV
|
||||
|
|
|
@ -197,6 +197,14 @@ Id EmitPackFloat2x16(EmitContext& ctx, Id value);
|
|||
Id EmitUnpackFloat2x16(EmitContext& ctx, Id value);
|
||||
Id EmitPackHalf2x16(EmitContext& ctx, Id value);
|
||||
Id EmitUnpackHalf2x16(EmitContext& ctx, Id value);
|
||||
Id EmitPackUnorm2x16(EmitContext& ctx, Id value);
|
||||
Id EmitUnpackUnorm2x16(EmitContext& ctx, Id value);
|
||||
Id EmitPackSnorm2x16(EmitContext& ctx, Id value);
|
||||
Id EmitUnpackSnorm2x16(EmitContext& ctx, Id value);
|
||||
Id EmitPackUint2x16(EmitContext& ctx, Id value);
|
||||
Id EmitUnpackUint2x16(EmitContext& ctx, Id value);
|
||||
Id EmitPackSint2x16(EmitContext& ctx, Id value);
|
||||
Id EmitUnpackSint2x16(EmitContext& ctx, Id value);
|
||||
Id EmitFPAbs16(EmitContext& ctx, Id value);
|
||||
Id EmitFPAbs32(EmitContext& ctx, Id value);
|
||||
Id EmitFPAbs64(EmitContext& ctx, Id value);
|
||||
|
|
|
@ -7,6 +7,125 @@
|
|||
|
||||
namespace Shader::Gcn {
|
||||
|
||||
u32 SwizzleMrtComponent(const FragmentRuntimeInfo::PsColorBuffer& color_buffer, u32 comp) {
|
||||
const auto [r, g, b, a] = color_buffer.swizzle;
|
||||
const std::array swizzle_array = {r, g, b, a};
|
||||
const auto swizzled_comp_type = static_cast<u32>(swizzle_array[comp]);
|
||||
constexpr auto min_comp_type = static_cast<u32>(AmdGpu::CompSwizzle::Red);
|
||||
return swizzled_comp_type >= min_comp_type ? swizzled_comp_type - min_comp_type : comp;
|
||||
}
|
||||
|
||||
void Translator::ExportMrtValue(IR::Attribute attribute, u32 comp, const IR::F32& value,
|
||||
const FragmentRuntimeInfo::PsColorBuffer& color_buffer) {
|
||||
const auto converted = ApplyWriteNumberConversion(ir, value, color_buffer.num_conversion);
|
||||
ir.SetAttribute(attribute, converted, comp);
|
||||
}
|
||||
|
||||
void Translator::ExportMrtCompressed(IR::Attribute attribute, u32 idx, const IR::U32& value) {
|
||||
const u32 color_buffer_idx =
|
||||
static_cast<u32>(attribute) - static_cast<u32>(IR::Attribute::RenderTarget0);
|
||||
const auto color_buffer = runtime_info.fs_info.color_buffers[color_buffer_idx];
|
||||
|
||||
IR::Value unpacked_value;
|
||||
bool is_integer = false;
|
||||
switch (color_buffer.export_format) {
|
||||
case AmdGpu::Liverpool::ShaderExportFormat::Zero:
|
||||
// No export
|
||||
return;
|
||||
case AmdGpu::Liverpool::ShaderExportFormat::ABGR_FP16:
|
||||
unpacked_value = ir.UnpackHalf2x16(value);
|
||||
break;
|
||||
case AmdGpu::Liverpool::ShaderExportFormat::ABGR_UNORM16:
|
||||
unpacked_value = ir.UnpackUnorm2x16(value);
|
||||
break;
|
||||
case AmdGpu::Liverpool::ShaderExportFormat::ABGR_SNORM16:
|
||||
unpacked_value = ir.UnpackSnorm2x16(value);
|
||||
break;
|
||||
case AmdGpu::Liverpool::ShaderExportFormat::ABGR_UINT16:
|
||||
unpacked_value = ir.UnpackUint2x16(value);
|
||||
is_integer = true;
|
||||
break;
|
||||
case AmdGpu::Liverpool::ShaderExportFormat::ABGR_SINT16:
|
||||
unpacked_value = ir.UnpackSint2x16(value);
|
||||
is_integer = true;
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE_MSG("Unimplemented compressed MRT export format {}",
|
||||
static_cast<u32>(color_buffer.export_format));
|
||||
break;
|
||||
}
|
||||
|
||||
const auto r = ir.CompositeExtract(unpacked_value, 0);
|
||||
const auto g = ir.CompositeExtract(unpacked_value, 1);
|
||||
const IR::F32 float_r = is_integer ? ir.BitCast<IR::F32>(IR::U32{r}) : IR::F32{r};
|
||||
const IR::F32 float_g = is_integer ? ir.BitCast<IR::F32>(IR::U32{g}) : IR::F32{g};
|
||||
|
||||
const auto swizzled_r = SwizzleMrtComponent(color_buffer, idx * 2);
|
||||
const auto swizzled_g = SwizzleMrtComponent(color_buffer, idx * 2 + 1);
|
||||
|
||||
ExportMrtValue(attribute, swizzled_r, float_r, color_buffer);
|
||||
ExportMrtValue(attribute, swizzled_g, float_g, color_buffer);
|
||||
}
|
||||
|
||||
void Translator::ExportMrtUncompressed(IR::Attribute attribute, u32 comp, const IR::F32& value) {
|
||||
const u32 color_buffer_idx =
|
||||
static_cast<u32>(attribute) - static_cast<u32>(IR::Attribute::RenderTarget0);
|
||||
const auto color_buffer = runtime_info.fs_info.color_buffers[color_buffer_idx];
|
||||
const auto swizzled_comp = SwizzleMrtComponent(color_buffer, comp);
|
||||
|
||||
switch (color_buffer.export_format) {
|
||||
case AmdGpu::Liverpool::ShaderExportFormat::Zero:
|
||||
// No export
|
||||
return;
|
||||
case AmdGpu::Liverpool::ShaderExportFormat::R_32:
|
||||
// Red only
|
||||
if (swizzled_comp != 0) {
|
||||
return;
|
||||
}
|
||||
break;
|
||||
case AmdGpu::Liverpool::ShaderExportFormat::GR_32:
|
||||
// Red and Green only
|
||||
if (swizzled_comp != 0 && swizzled_comp != 1) {
|
||||
return;
|
||||
}
|
||||
break;
|
||||
case AmdGpu::Liverpool::ShaderExportFormat::AR_32:
|
||||
// Red and Alpha only
|
||||
if (swizzled_comp != 0 && swizzled_comp != 3) {
|
||||
return;
|
||||
}
|
||||
break;
|
||||
case AmdGpu::Liverpool::ShaderExportFormat::ABGR_32:
|
||||
// All components
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE_MSG("Unimplemented uncompressed MRT export format {}",
|
||||
static_cast<u32>(color_buffer.export_format));
|
||||
break;
|
||||
}
|
||||
ExportMrtValue(attribute, swizzled_comp, value, color_buffer);
|
||||
}
|
||||
|
||||
void Translator::ExportCompressed(IR::Attribute attribute, u32 idx, const IR::U32& value) {
|
||||
if (IsMrt(attribute)) {
|
||||
ExportMrtCompressed(attribute, idx, value);
|
||||
return;
|
||||
}
|
||||
const IR::Value unpacked_value = ir.UnpackHalf2x16(value);
|
||||
const IR::F32 r = IR::F32{ir.CompositeExtract(unpacked_value, 0)};
|
||||
const IR::F32 g = IR::F32{ir.CompositeExtract(unpacked_value, 1)};
|
||||
ir.SetAttribute(attribute, r, idx * 2);
|
||||
ir.SetAttribute(attribute, g, idx * 2 + 1);
|
||||
}
|
||||
|
||||
void Translator::ExportUncompressed(IR::Attribute attribute, u32 comp, const IR::F32& value) {
|
||||
if (IsMrt(attribute)) {
|
||||
ExportMrtUncompressed(attribute, comp, value);
|
||||
return;
|
||||
}
|
||||
ir.SetAttribute(attribute, value, comp);
|
||||
}
|
||||
|
||||
void Translator::EmitExport(const GcnInst& inst) {
|
||||
if (ir.block->has_multiple_predecessors && info.stage == Stage::Fragment) {
|
||||
ir.Discard(ir.LogicalNot(ir.GetExec()));
|
||||
|
@ -26,41 +145,15 @@ void Translator::EmitExport(const GcnInst& inst) {
|
|||
IR::VectorReg(inst.src[3].code),
|
||||
};
|
||||
|
||||
const auto set_attribute = [&](u32 comp, IR::F32 value) {
|
||||
if (!IR::IsMrt(attrib)) {
|
||||
ir.SetAttribute(attrib, value, comp);
|
||||
return;
|
||||
}
|
||||
const u32 index = u32(attrib) - u32(IR::Attribute::RenderTarget0);
|
||||
const auto col_buf = runtime_info.fs_info.color_buffers[index];
|
||||
const auto converted = IR::ApplyWriteNumberConversion(ir, value, col_buf.num_conversion);
|
||||
const auto [r, g, b, a] = col_buf.swizzle;
|
||||
const std::array swizzle_array = {r, g, b, a};
|
||||
const auto swizzled_comp = swizzle_array[comp];
|
||||
if (u32(swizzled_comp) < u32(AmdGpu::CompSwizzle::Red)) {
|
||||
ir.SetAttribute(attrib, converted, comp);
|
||||
return;
|
||||
}
|
||||
ir.SetAttribute(attrib, converted, u32(swizzled_comp) - u32(AmdGpu::CompSwizzle::Red));
|
||||
};
|
||||
|
||||
const auto unpack = [&](u32 idx) {
|
||||
const IR::Value value = ir.UnpackHalf2x16(ir.GetVectorReg(vsrc[idx]));
|
||||
const IR::F32 r = IR::F32{ir.CompositeExtract(value, 0)};
|
||||
const IR::F32 g = IR::F32{ir.CompositeExtract(value, 1)};
|
||||
set_attribute(idx * 2, r);
|
||||
set_attribute(idx * 2 + 1, g);
|
||||
};
|
||||
|
||||
// Components are float16 packed into a VGPR
|
||||
if (exp.compr) {
|
||||
// Export R, G
|
||||
if (exp.en & 1) {
|
||||
unpack(0);
|
||||
ExportCompressed(attrib, 0, ir.GetVectorReg<IR::U32>(vsrc[0]));
|
||||
}
|
||||
// Export B, A
|
||||
if ((exp.en >> 2) & 1) {
|
||||
unpack(1);
|
||||
ExportCompressed(attrib, 1, ir.GetVectorReg<IR::U32>(vsrc[1]));
|
||||
}
|
||||
} else {
|
||||
// Components are float32 into separate VGPRS
|
||||
|
@ -69,8 +162,7 @@ void Translator::EmitExport(const GcnInst& inst) {
|
|||
if ((mask & 1) == 0) {
|
||||
continue;
|
||||
}
|
||||
const IR::F32 comp = ir.GetVectorReg<IR::F32>(vsrc[i]);
|
||||
set_attribute(i, comp);
|
||||
ExportUncompressed(attrib, i, ir.GetVectorReg<IR::F32>(vsrc[i]));
|
||||
}
|
||||
}
|
||||
if (IR::IsMrt(attrib)) {
|
||||
|
|
|
@ -170,6 +170,7 @@ public:
|
|||
void V_SUBBREV_U32(const GcnInst& inst);
|
||||
void V_LDEXP_F32(const GcnInst& inst);
|
||||
void V_CVT_PKNORM_U16_F32(const GcnInst& inst);
|
||||
void V_CVT_PKNORM_I16_F32(const GcnInst& inst);
|
||||
void V_CVT_PKRTZ_F16_F32(const GcnInst& inst);
|
||||
|
||||
// VOP1
|
||||
|
@ -244,6 +245,7 @@ public:
|
|||
void V_SAD(const GcnInst& inst);
|
||||
void V_SAD_U32(const GcnInst& inst);
|
||||
void V_CVT_PK_U16_U32(const GcnInst& inst);
|
||||
void V_CVT_PK_I16_I32(const GcnInst& inst);
|
||||
void V_CVT_PK_U8_F32(const GcnInst& inst);
|
||||
void V_LSHL_B64(const GcnInst& inst);
|
||||
void V_MUL_F64(const GcnInst& inst);
|
||||
|
@ -306,6 +308,13 @@ private:
|
|||
IR::F32 SelectCubeResult(const IR::F32& x, const IR::F32& y, const IR::F32& z,
|
||||
const IR::F32& x_res, const IR::F32& y_res, const IR::F32& z_res);
|
||||
|
||||
void ExportMrtValue(IR::Attribute attribute, u32 comp, const IR::F32& value,
|
||||
const FragmentRuntimeInfo::PsColorBuffer& color_buffer);
|
||||
void ExportMrtCompressed(IR::Attribute attribute, u32 idx, const IR::U32& value);
|
||||
void ExportMrtUncompressed(IR::Attribute attribute, u32 comp, const IR::F32& value);
|
||||
void ExportCompressed(IR::Attribute attribute, u32 idx, const IR::U32& value);
|
||||
void ExportUncompressed(IR::Attribute attribute, u32 comp, const IR::F32& value);
|
||||
|
||||
void LogMissingOpcode(const GcnInst& inst);
|
||||
|
||||
private:
|
||||
|
|
|
@ -96,6 +96,8 @@ void Translator::EmitVectorAlu(const GcnInst& inst) {
|
|||
return V_LDEXP_F32(inst);
|
||||
case Opcode::V_CVT_PKNORM_U16_F32:
|
||||
return V_CVT_PKNORM_U16_F32(inst);
|
||||
case Opcode::V_CVT_PKNORM_I16_F32:
|
||||
return V_CVT_PKNORM_I16_F32(inst);
|
||||
case Opcode::V_CVT_PKRTZ_F16_F32:
|
||||
return V_CVT_PKRTZ_F16_F32(inst);
|
||||
|
||||
|
@ -376,6 +378,8 @@ void Translator::EmitVectorAlu(const GcnInst& inst) {
|
|||
return V_SAD_U32(inst);
|
||||
case Opcode::V_CVT_PK_U16_U32:
|
||||
return V_CVT_PK_U16_U32(inst);
|
||||
case Opcode::V_CVT_PK_I16_I32:
|
||||
return V_CVT_PK_I16_I32(inst);
|
||||
case Opcode::V_CVT_PK_U8_F32:
|
||||
return V_CVT_PK_U8_F32(inst);
|
||||
case Opcode::V_LSHL_B64:
|
||||
|
@ -645,12 +649,15 @@ void Translator::V_LDEXP_F32(const GcnInst& inst) {
|
|||
}
|
||||
|
||||
void Translator::V_CVT_PKNORM_U16_F32(const GcnInst& inst) {
|
||||
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
|
||||
const IR::F32 src1{GetSrc<IR::F32>(inst.src[1])};
|
||||
const IR::U32 dst0 = ir.ConvertFToU(32, ir.FPMul(src0, ir.Imm32(65535.f)));
|
||||
const IR::U32 dst1 = ir.ConvertFToU(32, ir.FPMul(src1, ir.Imm32(65535.f)));
|
||||
const IR::VectorReg dst_reg{inst.dst[0].code};
|
||||
ir.SetVectorReg(dst_reg, ir.BitFieldInsert(dst0, dst1, ir.Imm32(16), ir.Imm32(16)));
|
||||
const IR::Value vec_f32 =
|
||||
ir.CompositeConstruct(GetSrc<IR::F32>(inst.src[0]), GetSrc<IR::F32>(inst.src[1]));
|
||||
SetDst(inst.dst[0], ir.PackUnorm2x16(vec_f32));
|
||||
}
|
||||
|
||||
void Translator::V_CVT_PKNORM_I16_F32(const GcnInst& inst) {
|
||||
const IR::Value vec_f32 =
|
||||
ir.CompositeConstruct(GetSrc<IR::F32>(inst.src[0]), GetSrc<IR::F32>(inst.src[1]));
|
||||
SetDst(inst.dst[0], ir.PackSnorm2x16(vec_f32));
|
||||
}
|
||||
|
||||
void Translator::V_CVT_PKRTZ_F16_F32(const GcnInst& inst) {
|
||||
|
@ -1237,11 +1244,15 @@ void Translator::V_SAD_U32(const GcnInst& inst) {
|
|||
}
|
||||
|
||||
void Translator::V_CVT_PK_U16_U32(const GcnInst& inst) {
|
||||
const IR::U32 src0{GetSrc(inst.src[0])};
|
||||
const IR::U32 src1{GetSrc(inst.src[1])};
|
||||
const IR::U32 lo = ir.IMin(src0, ir.Imm32(0xFFFF), false);
|
||||
const IR::U32 hi = ir.IMin(src1, ir.Imm32(0xFFFF), false);
|
||||
SetDst(inst.dst[0], ir.BitFieldInsert(lo, hi, ir.Imm32(16), ir.Imm32(16)));
|
||||
const IR::Value vec_u32 =
|
||||
ir.CompositeConstruct(GetSrc<IR::U32>(inst.src[0]), GetSrc<IR::U32>(inst.src[1]));
|
||||
SetDst(inst.dst[0], ir.PackUint2x16(vec_u32));
|
||||
}
|
||||
|
||||
void Translator::V_CVT_PK_I16_I32(const GcnInst& inst) {
|
||||
const IR::Value vec_u32 =
|
||||
ir.CompositeConstruct(GetSrc<IR::U32>(inst.src[0]), GetSrc<IR::U32>(inst.src[1]));
|
||||
SetDst(inst.dst[0], ir.PackSint2x16(vec_u32));
|
||||
}
|
||||
|
||||
void Translator::V_CVT_PK_U8_F32(const GcnInst& inst) {
|
||||
|
|
|
@ -795,6 +795,38 @@ Value IREmitter::UnpackHalf2x16(const U32& value) {
|
|||
return Inst(Opcode::UnpackHalf2x16, value);
|
||||
}
|
||||
|
||||
U32 IREmitter::PackUnorm2x16(const Value& vector) {
|
||||
return Inst<U32>(Opcode::PackUnorm2x16, vector);
|
||||
}
|
||||
|
||||
Value IREmitter::UnpackUnorm2x16(const U32& value) {
|
||||
return Inst(Opcode::UnpackUnorm2x16, value);
|
||||
}
|
||||
|
||||
U32 IREmitter::PackSnorm2x16(const Value& vector) {
|
||||
return Inst<U32>(Opcode::PackSnorm2x16, vector);
|
||||
}
|
||||
|
||||
Value IREmitter::UnpackSnorm2x16(const U32& value) {
|
||||
return Inst(Opcode::UnpackSnorm2x16, value);
|
||||
}
|
||||
|
||||
U32 IREmitter::PackUint2x16(const Value& value) {
|
||||
return Inst<U32>(Opcode::PackUint2x16, value);
|
||||
}
|
||||
|
||||
Value IREmitter::UnpackUint2x16(const U32& value) {
|
||||
return Inst(Opcode::UnpackUint2x16, value);
|
||||
}
|
||||
|
||||
U32 IREmitter::PackSint2x16(const Value& value) {
|
||||
return Inst<U32>(Opcode::PackSint2x16, value);
|
||||
}
|
||||
|
||||
Value IREmitter::UnpackSint2x16(const U32& value) {
|
||||
return Inst(Opcode::UnpackSint2x16, value);
|
||||
}
|
||||
|
||||
F32F64 IREmitter::FPMul(const F32F64& a, const F32F64& b) {
|
||||
if (a.Type() != b.Type()) {
|
||||
UNREACHABLE_MSG("Mismatching types {} and {}", a.Type(), b.Type());
|
||||
|
|
|
@ -175,6 +175,14 @@ public:
|
|||
|
||||
[[nodiscard]] U32 PackHalf2x16(const Value& vector);
|
||||
[[nodiscard]] Value UnpackHalf2x16(const U32& value);
|
||||
[[nodiscard]] U32 PackUnorm2x16(const Value& vector);
|
||||
[[nodiscard]] Value UnpackUnorm2x16(const U32& value);
|
||||
[[nodiscard]] U32 PackSnorm2x16(const Value& vector);
|
||||
[[nodiscard]] Value UnpackSnorm2x16(const U32& value);
|
||||
[[nodiscard]] U32 PackUint2x16(const Value& value);
|
||||
[[nodiscard]] Value UnpackUint2x16(const U32& value);
|
||||
[[nodiscard]] U32 PackSint2x16(const Value& value);
|
||||
[[nodiscard]] Value UnpackSint2x16(const U32& value);
|
||||
|
||||
[[nodiscard]] F32F64 FPAdd(const F32F64& a, const F32F64& b);
|
||||
[[nodiscard]] F32F64 FPSub(const F32F64& a, const F32F64& b);
|
||||
|
|
|
@ -187,6 +187,14 @@ OPCODE(PackFloat2x16, U32, F16x
|
|||
OPCODE(UnpackFloat2x16, F16x2, U32, )
|
||||
OPCODE(PackHalf2x16, U32, F32x2, )
|
||||
OPCODE(UnpackHalf2x16, F32x2, U32, )
|
||||
OPCODE(PackUnorm2x16, U32, F32x2, )
|
||||
OPCODE(UnpackUnorm2x16, F32x2, U32, )
|
||||
OPCODE(PackSnorm2x16, U32, F32x2, )
|
||||
OPCODE(UnpackSnorm2x16, F32x2, U32, )
|
||||
OPCODE(PackUint2x16, U32, U32x2, )
|
||||
OPCODE(UnpackUint2x16, U32x2, U32, )
|
||||
OPCODE(PackSint2x16, U32, U32x2, )
|
||||
OPCODE(UnpackSint2x16, U32x2, U32, )
|
||||
|
||||
// Floating-point operations
|
||||
OPCODE(FPAbs32, F32, F32, )
|
||||
|
|
|
@ -348,6 +348,22 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) {
|
|||
return FoldInverseFunc(inst, IR::Opcode::UnpackFloat2x16);
|
||||
case IR::Opcode::UnpackFloat2x16:
|
||||
return FoldInverseFunc(inst, IR::Opcode::PackFloat2x16);
|
||||
case IR::Opcode::PackUnorm2x16:
|
||||
return FoldInverseFunc(inst, IR::Opcode::UnpackUnorm2x16);
|
||||
case IR::Opcode::UnpackUnorm2x16:
|
||||
return FoldInverseFunc(inst, IR::Opcode::PackUnorm2x16);
|
||||
case IR::Opcode::PackSnorm2x16:
|
||||
return FoldInverseFunc(inst, IR::Opcode::UnpackSnorm2x16);
|
||||
case IR::Opcode::UnpackSnorm2x16:
|
||||
return FoldInverseFunc(inst, IR::Opcode::PackSnorm2x16);
|
||||
case IR::Opcode::PackUint2x16:
|
||||
return FoldInverseFunc(inst, IR::Opcode::UnpackUint2x16);
|
||||
case IR::Opcode::UnpackUint2x16:
|
||||
return FoldInverseFunc(inst, IR::Opcode::PackUint2x16);
|
||||
case IR::Opcode::PackSint2x16:
|
||||
return FoldInverseFunc(inst, IR::Opcode::UnpackSint2x16);
|
||||
case IR::Opcode::UnpackSint2x16:
|
||||
return FoldInverseFunc(inst, IR::Opcode::PackSint2x16);
|
||||
case IR::Opcode::SelectU1:
|
||||
case IR::Opcode::SelectU8:
|
||||
case IR::Opcode::SelectU16:
|
||||
|
|
|
@ -184,6 +184,7 @@ struct FragmentRuntimeInfo {
|
|||
AmdGpu::NumberFormat num_format;
|
||||
AmdGpu::NumberConversion num_conversion;
|
||||
AmdGpu::CompMapping swizzle;
|
||||
AmdGpu::Liverpool::ShaderExportFormat export_format;
|
||||
|
||||
auto operator<=>(const PsColorBuffer&) const noexcept = default;
|
||||
};
|
||||
|
|
|
@ -266,6 +266,10 @@ struct Liverpool {
|
|||
BitField<20, 4, ShaderExportFormat> col5;
|
||||
BitField<24, 4, ShaderExportFormat> col6;
|
||||
BitField<28, 4, ShaderExportFormat> col7;
|
||||
|
||||
[[nodiscard]] ShaderExportFormat GetFormat(const u32 buf_idx) const {
|
||||
return static_cast<ShaderExportFormat>((raw >> (buf_idx * 4)) & 0xfu);
|
||||
}
|
||||
};
|
||||
|
||||
union VsOutputControl {
|
||||
|
|
|
@ -35,9 +35,8 @@ struct GraphicsPipelineKey {
|
|||
std::array<size_t, MaxShaderStages> stage_hashes;
|
||||
u32 num_color_attachments;
|
||||
std::array<vk::Format, Liverpool::NumColorBuffers> color_formats;
|
||||
std::array<AmdGpu::NumberFormat, Liverpool::NumColorBuffers> color_num_formats;
|
||||
std::array<AmdGpu::NumberConversion, Liverpool::NumColorBuffers> color_num_conversions;
|
||||
std::array<AmdGpu::CompMapping, Liverpool::NumColorBuffers> color_swizzles;
|
||||
std::array<Shader::FragmentRuntimeInfo::PsColorBuffer, Liverpool::NumColorBuffers>
|
||||
color_buffers;
|
||||
vk::Format depth_format;
|
||||
vk::Format stencil_format;
|
||||
|
||||
|
|
|
@ -167,11 +167,7 @@ const Shader::RuntimeInfo& PipelineCache::BuildRuntimeInfo(Stage stage, LogicalS
|
|||
};
|
||||
}
|
||||
for (u32 i = 0; i < Shader::MaxColorBuffers; i++) {
|
||||
info.fs_info.color_buffers[i] = {
|
||||
.num_format = graphics_key.color_num_formats[i],
|
||||
.num_conversion = graphics_key.color_num_conversions[i],
|
||||
.swizzle = graphics_key.color_swizzles[i],
|
||||
};
|
||||
info.fs_info.color_buffers[i] = graphics_key.color_buffers[i];
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
@ -309,11 +305,9 @@ bool PipelineCache::RefreshGraphicsKey() {
|
|||
// order. We need to do some arrays compaction at this stage
|
||||
key.num_color_attachments = 0;
|
||||
key.color_formats.fill(vk::Format::eUndefined);
|
||||
key.color_num_formats.fill(AmdGpu::NumberFormat::Unorm);
|
||||
key.color_num_conversions.fill(AmdGpu::NumberConversion::None);
|
||||
key.color_buffers.fill({});
|
||||
key.blend_controls.fill({});
|
||||
key.write_masks.fill({});
|
||||
key.color_swizzles.fill({});
|
||||
key.vertex_buffer_formats.fill(vk::Format::eUndefined);
|
||||
|
||||
key.patch_control_points = 0;
|
||||
|
@ -338,9 +332,12 @@ bool PipelineCache::RefreshGraphicsKey() {
|
|||
|
||||
key.color_formats[remapped_cb] =
|
||||
LiverpoolToVK::SurfaceFormat(col_buf.GetDataFmt(), col_buf.GetNumberFmt());
|
||||
key.color_num_formats[remapped_cb] = col_buf.GetNumberFmt();
|
||||
key.color_num_conversions[remapped_cb] = col_buf.GetNumberConversion();
|
||||
key.color_swizzles[remapped_cb] = col_buf.Swizzle();
|
||||
key.color_buffers[remapped_cb] = {
|
||||
.num_format = col_buf.GetNumberFmt(),
|
||||
.num_conversion = col_buf.GetNumberConversion(),
|
||||
.swizzle = col_buf.Swizzle(),
|
||||
.export_format = regs.color_export_format.GetFormat(cb),
|
||||
};
|
||||
}
|
||||
|
||||
fetch_shader = std::nullopt;
|
||||
|
@ -456,7 +453,7 @@ bool PipelineCache::RefreshGraphicsKey() {
|
|||
// of the latter we need to change format to undefined, and either way we need to
|
||||
// increment the index for the null attachment binding.
|
||||
key.color_formats[remapped_cb] = vk::Format::eUndefined;
|
||||
key.color_swizzles[remapped_cb] = {};
|
||||
key.color_buffers[remapped_cb] = {};
|
||||
++remapped_cb;
|
||||
continue;
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue