From 62c47cb1b74c22812c566e7a2aeb2968e7fcb999 Mon Sep 17 00:00:00 2001 From: baggins183 Date: Sun, 29 Dec 2024 02:37:15 -0800 Subject: [PATCH] recompiler: handle reads of output variables in hull shaders (#1962) * Handle output control point reads in hull shader. Might need additional barriers * output storage class --- .../spirv/emit_spirv_context_get_set.cpp | 15 +++++----- .../backend/spirv/emit_spirv_instructions.h | 2 ++ .../frontend/translate/vector_memory.cpp | 4 --- src/shader_recompiler/ir/ir_emitter.cpp | 6 ++++ src/shader_recompiler/ir/ir_emitter.h | 3 ++ src/shader_recompiler/ir/opcodes.inc | 2 ++ .../ir/passes/hull_shader_transform.cpp | 28 ++++++++++++------- 7 files changed, 38 insertions(+), 22 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index f3db6af56..4550440bb 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -217,14 +217,6 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp, Id index) { const auto pointer{ ctx.OpAccessChain(component_ptr, ctx.tess_coord, ctx.ConstU32(component))}; return ctx.OpLoad(ctx.F32[1], pointer); - } else if (IR::IsParam(attr)) { - const u32 param_id{u32(attr) - u32(IR::Attribute::Param0)}; - const auto param = ctx.input_params.at(param_id).id; - const auto param_arr_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[4]); - const auto pointer{ctx.OpAccessChain(param_arr_ptr, param, index)}; - const auto position_comp_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[1]); - return ctx.OpLoad(ctx.F32[1], - ctx.OpAccessChain(position_comp_ptr, pointer, ctx.ConstU32(comp))); } UNREACHABLE(); } @@ -351,6 +343,13 @@ Id EmitGetTessGenericAttribute(EmitContext& ctx, Id vertex_index, Id attr_index, vertex_index, attr_index, comp_index)); } +Id EmitReadTcsGenericOuputAttribute(EmitContext& ctx, Id vertex_index, Id attr_index, + Id comp_index) { + const auto attr_comp_ptr = ctx.TypePointer(spv::StorageClass::Output, ctx.F32[1]); + return ctx.OpLoad(ctx.F32[1], ctx.OpAccessChain(attr_comp_ptr, ctx.output_attr_array, + vertex_index, attr_index, comp_index)); +} + void EmitSetTcsGenericAttribute(EmitContext& ctx, Id value, Id attr_index, Id comp_index) { // Implied vertex index is invocation_id const auto component_ptr = ctx.TypePointer(spv::StorageClass::Output, ctx.F32[1]); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index 85bed589b..d26cf6662 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -89,6 +89,8 @@ Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp); void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 comp); Id EmitGetTessGenericAttribute(EmitContext& ctx, Id vertex_index, Id attr_index, Id comp_index); void EmitSetTcsGenericAttribute(EmitContext& ctx, Id value, Id attr_index, Id comp_index); +Id EmitReadTcsGenericOuputAttribute(EmitContext& ctx, Id vertex_index, Id attr_index, + Id comp_index); Id EmitGetPatch(EmitContext& ctx, IR::Patch patch); void EmitSetPatch(EmitContext& ctx, IR::Patch patch, Id value); void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, Id value); diff --git a/src/shader_recompiler/frontend/translate/vector_memory.cpp b/src/shader_recompiler/frontend/translate/vector_memory.cpp index 7c3db9551..79d46cd42 100644 --- a/src/shader_recompiler/frontend/translate/vector_memory.cpp +++ b/src/shader_recompiler/frontend/translate/vector_memory.cpp @@ -255,10 +255,6 @@ void Translator::BUFFER_STORE(u32 num_dwords, bool is_typed, const GcnInst& inst "Non immediate offset not supported"); } - if (info.stage == Stage::Hull) { - // printf("here\n"); // break - } - IR::Value address = [&] -> IR::Value { if (is_ring) { return ir.CompositeConstruct(ir.GetVectorReg(vaddr), soffset); diff --git a/src/shader_recompiler/ir/ir_emitter.cpp b/src/shader_recompiler/ir/ir_emitter.cpp index c9d97679f..20e6eae0b 100644 --- a/src/shader_recompiler/ir/ir_emitter.cpp +++ b/src/shader_recompiler/ir/ir_emitter.cpp @@ -288,6 +288,12 @@ void IREmitter::SetTcsGenericAttribute(const F32& value, const U32& attr_index, Inst(Opcode::SetTcsGenericAttribute, value, attr_index, comp_index); } +F32 IREmitter::ReadTcsGenericOuputAttribute(const U32& vertex_index, const U32& attr_index, + const U32& comp_index) { + return Inst(IR::Opcode::ReadTcsGenericOuputAttribute, vertex_index, attr_index, + comp_index); +} + F32 IREmitter::GetPatch(Patch patch) { return Inst(Opcode::GetPatch, patch); } diff --git a/src/shader_recompiler/ir/ir_emitter.h b/src/shader_recompiler/ir/ir_emitter.h index 4679a0133..f65baee2a 100644 --- a/src/shader_recompiler/ir/ir_emitter.h +++ b/src/shader_recompiler/ir/ir_emitter.h @@ -90,6 +90,9 @@ public: const U32& comp_index); void SetTcsGenericAttribute(const F32& value, const U32& attr_index, const U32& comp_index); + [[nodiscard]] F32 ReadTcsGenericOuputAttribute(const U32& vertex_index, const U32& attr_index, + const U32& comp_index); + [[nodiscard]] F32 GetPatch(Patch patch); void SetPatch(Patch patch, const F32& value); diff --git a/src/shader_recompiler/ir/opcodes.inc b/src/shader_recompiler/ir/opcodes.inc index cf2c3b67e..1194c3792 100644 --- a/src/shader_recompiler/ir/opcodes.inc +++ b/src/shader_recompiler/ir/opcodes.inc @@ -64,6 +64,8 @@ OPCODE(GetPatch, F32, Patc OPCODE(SetPatch, Void, Patch, F32, ) OPCODE(GetTessGenericAttribute, F32, U32, U32, U32, ) OPCODE(SetTcsGenericAttribute, Void, F32, U32, U32, ) +OPCODE(ReadTcsGenericOuputAttribute, F32, U32, U32, U32, ) + // Flags OPCODE(GetScc, U1, Void, ) diff --git a/src/shader_recompiler/ir/passes/hull_shader_transform.cpp b/src/shader_recompiler/ir/passes/hull_shader_transform.cpp index 895c9823e..6164fec12 100644 --- a/src/shader_recompiler/ir/passes/hull_shader_transform.cpp +++ b/src/shader_recompiler/ir/passes/hull_shader_transform.cpp @@ -343,8 +343,8 @@ static IR::U32 TryOptimizeAddressModulo(IR::U32 addr, u32 stride, IR::IREmitter& // TODO: can optimize div in control point index similarly to mod // Read a TCS input (InputCP region) or TES input (OutputCP region) -static IR::F32 ReadTessInputComponent(IR::U32 addr, const u32 stride, IR::IREmitter& ir, - u32 off_dw) { +static IR::F32 ReadTessControlPointAttribute(IR::U32 addr, const u32 stride, IR::IREmitter& ir, + u32 off_dw, bool is_output_read_in_tcs) { if (off_dw > 0) { addr = ir.IAdd(addr, ir.Imm32(off_dw)); } @@ -354,7 +354,11 @@ static IR::F32 ReadTessInputComponent(IR::U32 addr, const u32 stride, IR::IREmit ir.ShiftRightLogical(ir.IMod(addr_for_attrs, ir.Imm32(stride)), ir.Imm32(4u)); const IR::U32 comp_index = ir.ShiftRightLogical(ir.BitwiseAnd(addr_for_attrs, ir.Imm32(0xFU)), ir.Imm32(2u)); - return ir.GetTessGenericAttribute(control_point_index, attr_index, comp_index); + if (is_output_read_in_tcs) { + return ir.ReadTcsGenericOuputAttribute(control_point_index, attr_index, comp_index); + } else { + return ir.GetTessGenericAttribute(control_point_index, attr_index, comp_index); + } } } // namespace @@ -481,21 +485,25 @@ void HullShaderTransform(IR::Program& program, RuntimeInfo& runtime_info) { case IR::Opcode::LoadSharedU128: IR::IREmitter ir{*block, IR::Block::InstructionList::s_iterator_to(inst)}; const IR::U32 addr{inst.Arg(0)}; - AttributeRegion region = GetAttributeRegionKind(&inst, info, runtime_info); + const AttributeRegion region = GetAttributeRegionKind(&inst, info, runtime_info); const u32 num_dwords = opcode == IR::Opcode::LoadSharedU32 ? 1 : (opcode == IR::Opcode::LoadSharedU64 ? 2 : 4); - ASSERT_MSG(region == AttributeRegion::InputCP, - "Unhandled read of output or patchconst attribute in hull shader"); + ASSERT_MSG(region == AttributeRegion::InputCP || + region == AttributeRegion::OutputCP, + "Unhandled read of patchconst attribute in hull shader"); + const bool is_tcs_output_read = region == AttributeRegion::OutputCP; + const u32 stride = is_tcs_output_read ? runtime_info.hs_info.hs_output_cp_stride + : runtime_info.hs_info.ls_stride; IR::Value attr_read; if (num_dwords == 1) { attr_read = ir.BitCast( - ReadTessInputComponent(addr, runtime_info.hs_info.ls_stride, ir, 0)); + ReadTessControlPointAttribute(addr, stride, ir, 0, is_tcs_output_read)); } else { boost::container::static_vector read_components; for (auto i = 0; i < num_dwords; i++) { const IR::F32 component = - ReadTessInputComponent(addr, runtime_info.hs_info.ls_stride, ir, i); + ReadTessControlPointAttribute(addr, stride, ir, i, is_tcs_output_read); read_components.push_back(ir.BitCast(component)); } attr_read = ir.CompositeConstruct(read_components); @@ -565,8 +573,8 @@ void DomainShaderTransform(IR::Program& program, RuntimeInfo& runtime_info) { : (opcode == IR::Opcode::LoadSharedU64 ? 2 : 4); const auto GetInput = [&](IR::U32 addr, u32 off_dw) -> IR::F32 { if (region == AttributeRegion::OutputCP) { - return ReadTessInputComponent( - addr, runtime_info.vs_info.hs_output_cp_stride, ir, off_dw); + return ReadTessControlPointAttribute( + addr, runtime_info.vs_info.hs_output_cp_stride, ir, off_dw, false); } else { ASSERT(region == AttributeRegion::PatchConst); return ir.GetPatch(IR::PatchGeneric((addr.U32() >> 2) + off_dw));