recompiler: handle reads of output variables in hull shaders (#1962)

* Handle output control point reads in hull shader. Might need additional barriers

* output storage class
This commit is contained in:
baggins183 2024-12-29 02:37:15 -08:00 committed by GitHub
parent da9e45b582
commit 62c47cb1b7
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 38 additions and 22 deletions

View file

@ -217,14 +217,6 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp, Id index) {
const auto pointer{
ctx.OpAccessChain(component_ptr, ctx.tess_coord, ctx.ConstU32(component))};
return ctx.OpLoad(ctx.F32[1], pointer);
} else if (IR::IsParam(attr)) {
const u32 param_id{u32(attr) - u32(IR::Attribute::Param0)};
const auto param = ctx.input_params.at(param_id).id;
const auto param_arr_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[4]);
const auto pointer{ctx.OpAccessChain(param_arr_ptr, param, index)};
const auto position_comp_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[1]);
return ctx.OpLoad(ctx.F32[1],
ctx.OpAccessChain(position_comp_ptr, pointer, ctx.ConstU32(comp)));
}
UNREACHABLE();
}
@ -351,6 +343,13 @@ Id EmitGetTessGenericAttribute(EmitContext& ctx, Id vertex_index, Id attr_index,
vertex_index, attr_index, comp_index));
}
Id EmitReadTcsGenericOuputAttribute(EmitContext& ctx, Id vertex_index, Id attr_index,
Id comp_index) {
const auto attr_comp_ptr = ctx.TypePointer(spv::StorageClass::Output, ctx.F32[1]);
return ctx.OpLoad(ctx.F32[1], ctx.OpAccessChain(attr_comp_ptr, ctx.output_attr_array,
vertex_index, attr_index, comp_index));
}
void EmitSetTcsGenericAttribute(EmitContext& ctx, Id value, Id attr_index, Id comp_index) {
// Implied vertex index is invocation_id
const auto component_ptr = ctx.TypePointer(spv::StorageClass::Output, ctx.F32[1]);

View file

@ -89,6 +89,8 @@ Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp);
void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 comp);
Id EmitGetTessGenericAttribute(EmitContext& ctx, Id vertex_index, Id attr_index, Id comp_index);
void EmitSetTcsGenericAttribute(EmitContext& ctx, Id value, Id attr_index, Id comp_index);
Id EmitReadTcsGenericOuputAttribute(EmitContext& ctx, Id vertex_index, Id attr_index,
Id comp_index);
Id EmitGetPatch(EmitContext& ctx, IR::Patch patch);
void EmitSetPatch(EmitContext& ctx, IR::Patch patch, Id value);
void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, Id value);

View file

@ -255,10 +255,6 @@ void Translator::BUFFER_STORE(u32 num_dwords, bool is_typed, const GcnInst& inst
"Non immediate offset not supported");
}
if (info.stage == Stage::Hull) {
// printf("here\n"); // break
}
IR::Value address = [&] -> IR::Value {
if (is_ring) {
return ir.CompositeConstruct(ir.GetVectorReg(vaddr), soffset);

View file

@ -288,6 +288,12 @@ void IREmitter::SetTcsGenericAttribute(const F32& value, const U32& attr_index,
Inst(Opcode::SetTcsGenericAttribute, value, attr_index, comp_index);
}
F32 IREmitter::ReadTcsGenericOuputAttribute(const U32& vertex_index, const U32& attr_index,
const U32& comp_index) {
return Inst<F32>(IR::Opcode::ReadTcsGenericOuputAttribute, vertex_index, attr_index,
comp_index);
}
F32 IREmitter::GetPatch(Patch patch) {
return Inst<F32>(Opcode::GetPatch, patch);
}

View file

@ -90,6 +90,9 @@ public:
const U32& comp_index);
void SetTcsGenericAttribute(const F32& value, const U32& attr_index, const U32& comp_index);
[[nodiscard]] F32 ReadTcsGenericOuputAttribute(const U32& vertex_index, const U32& attr_index,
const U32& comp_index);
[[nodiscard]] F32 GetPatch(Patch patch);
void SetPatch(Patch patch, const F32& value);

View file

@ -64,6 +64,8 @@ OPCODE(GetPatch, F32, Patc
OPCODE(SetPatch, Void, Patch, F32, )
OPCODE(GetTessGenericAttribute, F32, U32, U32, U32, )
OPCODE(SetTcsGenericAttribute, Void, F32, U32, U32, )
OPCODE(ReadTcsGenericOuputAttribute, F32, U32, U32, U32, )
// Flags
OPCODE(GetScc, U1, Void, )

View file

@ -343,8 +343,8 @@ static IR::U32 TryOptimizeAddressModulo(IR::U32 addr, u32 stride, IR::IREmitter&
// TODO: can optimize div in control point index similarly to mod
// Read a TCS input (InputCP region) or TES input (OutputCP region)
static IR::F32 ReadTessInputComponent(IR::U32 addr, const u32 stride, IR::IREmitter& ir,
u32 off_dw) {
static IR::F32 ReadTessControlPointAttribute(IR::U32 addr, const u32 stride, IR::IREmitter& ir,
u32 off_dw, bool is_output_read_in_tcs) {
if (off_dw > 0) {
addr = ir.IAdd(addr, ir.Imm32(off_dw));
}
@ -354,8 +354,12 @@ static IR::F32 ReadTessInputComponent(IR::U32 addr, const u32 stride, IR::IREmit
ir.ShiftRightLogical(ir.IMod(addr_for_attrs, ir.Imm32(stride)), ir.Imm32(4u));
const IR::U32 comp_index =
ir.ShiftRightLogical(ir.BitwiseAnd(addr_for_attrs, ir.Imm32(0xFU)), ir.Imm32(2u));
if (is_output_read_in_tcs) {
return ir.ReadTcsGenericOuputAttribute(control_point_index, attr_index, comp_index);
} else {
return ir.GetTessGenericAttribute(control_point_index, attr_index, comp_index);
}
}
} // namespace
@ -481,21 +485,25 @@ void HullShaderTransform(IR::Program& program, RuntimeInfo& runtime_info) {
case IR::Opcode::LoadSharedU128:
IR::IREmitter ir{*block, IR::Block::InstructionList::s_iterator_to(inst)};
const IR::U32 addr{inst.Arg(0)};
AttributeRegion region = GetAttributeRegionKind(&inst, info, runtime_info);
const AttributeRegion region = GetAttributeRegionKind(&inst, info, runtime_info);
const u32 num_dwords = opcode == IR::Opcode::LoadSharedU32
? 1
: (opcode == IR::Opcode::LoadSharedU64 ? 2 : 4);
ASSERT_MSG(region == AttributeRegion::InputCP,
"Unhandled read of output or patchconst attribute in hull shader");
ASSERT_MSG(region == AttributeRegion::InputCP ||
region == AttributeRegion::OutputCP,
"Unhandled read of patchconst attribute in hull shader");
const bool is_tcs_output_read = region == AttributeRegion::OutputCP;
const u32 stride = is_tcs_output_read ? runtime_info.hs_info.hs_output_cp_stride
: runtime_info.hs_info.ls_stride;
IR::Value attr_read;
if (num_dwords == 1) {
attr_read = ir.BitCast<IR::U32>(
ReadTessInputComponent(addr, runtime_info.hs_info.ls_stride, ir, 0));
ReadTessControlPointAttribute(addr, stride, ir, 0, is_tcs_output_read));
} else {
boost::container::static_vector<IR::Value, 4> read_components;
for (auto i = 0; i < num_dwords; i++) {
const IR::F32 component =
ReadTessInputComponent(addr, runtime_info.hs_info.ls_stride, ir, i);
ReadTessControlPointAttribute(addr, stride, ir, i, is_tcs_output_read);
read_components.push_back(ir.BitCast<IR::U32>(component));
}
attr_read = ir.CompositeConstruct(read_components);
@ -565,8 +573,8 @@ void DomainShaderTransform(IR::Program& program, RuntimeInfo& runtime_info) {
: (opcode == IR::Opcode::LoadSharedU64 ? 2 : 4);
const auto GetInput = [&](IR::U32 addr, u32 off_dw) -> IR::F32 {
if (region == AttributeRegion::OutputCP) {
return ReadTessInputComponent(
addr, runtime_info.vs_info.hs_output_cp_stride, ir, off_dw);
return ReadTessControlPointAttribute(
addr, runtime_info.vs_info.hs_output_cp_stride, ir, off_dw, false);
} else {
ASSERT(region == AttributeRegion::PatchConst);
return ir.GetPatch(IR::PatchGeneric((addr.U32() >> 2) + off_dw));