From 74d43d059f5de360dc9e15289fa51385a8cc74f2 Mon Sep 17 00:00:00 2001 From: Lizardy <6063922+lzardy@users.noreply.github.com> Date: Sat, 17 Aug 2024 15:06:06 -0400 Subject: [PATCH] shader_recompiler: BUFFER_ATOMIC & DS_* Opcodes (#428) * BUFFER_ATOMIC | DS_MINMAX_U32 - Emission of BufferAtomicU32 - Addition of Buffer opcodes to IR - Translator for BUFFER_ATOMIC Opcode - Translators for DS_MAXMIN_U32 Opcodes * Clang Format & UNREACHABLE_MSG * clang * no crash on compile * clang * Shared Atomics * reuse * rm vscode * resolve * opcodes * side effects * attempt fix shader comp * failed attempt to fix * clang * do correct vdata set (still fails) * clang * fixed BUFFER_ATOMIC_ADD, DS_ADD_U32 fails * data share should work * clang * resource tracking for buffer atomic * clang * distinguish RTN opcodes * clean IsBufferInstruction --------- Co-authored-by: microsoftv <6063922+microsoftv@users.noreply.github.com> --- .../backend/spirv/emit_spirv_atomic.cpp | 85 +++++++++++++++++++ .../spirv/emit_spirv_context_get_set.cpp | 1 + .../backend/spirv/emit_spirv_instructions.h | 16 ++++ .../frontend/translate/data_share.cpp | 48 +++++++++++ .../frontend/translate/translate.h | 4 + .../frontend/translate/vector_memory.cpp | 58 +++++++++++++ src/shader_recompiler/ir/ir_emitter.cpp | 66 ++++++++++++++ src/shader_recompiler/ir/ir_emitter.h | 23 +++++ src/shader_recompiler/ir/microinstruction.cpp | 16 ++++ src/shader_recompiler/ir/opcodes.inc | 20 +++++ .../ir/passes/resource_tracking_pass.cpp | 65 ++++++++------ 11 files changed, 375 insertions(+), 27 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp index e0bc4b77d..37e91d3b1 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp @@ -12,6 +12,25 @@ std::pair AtomicArgs(EmitContext& ctx) { return {scope, semantics}; } +Id SharedAtomicU32(EmitContext& ctx, Id offset, Id value, + Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id)) { + const Id shift_id{ctx.ConstU32(2U)}; + const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)}; + const Id pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index)}; + const auto [scope, semantics]{AtomicArgs(ctx)}; + return (ctx.*atomic_func)(ctx.U32[1], pointer, scope, semantics, value); +} + +Id BufferAtomicU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value, + Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id)) { + auto& buffer = ctx.buffers[handle]; + address = ctx.OpIAdd(ctx.U32[1], address, buffer.offset); + const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u)); + const Id ptr = ctx.OpAccessChain(buffer.pointer_type, buffer.id, ctx.u32_zero_value, index); + const auto [scope, semantics]{AtomicArgs(ctx)}; + return (ctx.*atomic_func)(ctx.U32[1], ptr, scope, semantics, value); +} + Id ImageAtomicU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value, Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id)) { const auto& texture = ctx.images[handle & 0xFFFF]; @@ -21,6 +40,72 @@ Id ImageAtomicU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id va } } // Anonymous namespace +Id EmitSharedAtomicIAdd32(EmitContext& ctx, Id offset, Id value) { + return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicIAdd); +} + +Id EmitSharedAtomicUMax32(EmitContext& ctx, Id offset, Id value) { + return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicUMax); +} + +Id EmitSharedAtomicSMax32(EmitContext& ctx, Id offset, Id value) { + return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicSMax); +} + +Id EmitSharedAtomicUMin32(EmitContext& ctx, Id offset, Id value) { + return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicUMin); +} + +Id EmitSharedAtomicSMin32(EmitContext& ctx, Id offset, Id value) { + return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicSMin); +} + +Id EmitBufferAtomicIAdd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { + return BufferAtomicU32(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicIAdd); +} + +Id EmitBufferAtomicSMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { + return BufferAtomicU32(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicSMin); +} + +Id EmitBufferAtomicUMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { + return BufferAtomicU32(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicUMin); +} + +Id EmitBufferAtomicSMax32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { + return BufferAtomicU32(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicSMax); +} + +Id EmitBufferAtomicUMax32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { + return BufferAtomicU32(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicUMax); +} + +Id EmitBufferAtomicInc32(EmitContext&, IR::Inst*, u32, Id, Id) { + // TODO + UNREACHABLE_MSG("Unsupported BUFFER_ATOMIC opcode: ", IR::Opcode::BufferAtomicInc32); +} + +Id EmitBufferAtomicDec32(EmitContext&, IR::Inst*, u32, Id, Id) { + // TODO + UNREACHABLE_MSG("Unsupported BUFFER_ATOMIC opcode: ", IR::Opcode::BufferAtomicDec32); +} + +Id EmitBufferAtomicAnd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { + return BufferAtomicU32(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicAnd); +} + +Id EmitBufferAtomicOr32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { + return BufferAtomicU32(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicOr); +} + +Id EmitBufferAtomicXor32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { + return BufferAtomicU32(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicXor); +} + +Id EmitBufferAtomicExchange32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { + return BufferAtomicU32(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicExchange); +} + Id EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value) { return ImageAtomicU32(ctx, inst, handle, coords, value, &Sirit::Module::OpAtomicIAdd); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index f933ed3c6..0b02f3a37 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -495,6 +495,7 @@ static void EmitStoreBufferFormatF32xN(EmitContext& ctx, u32 handle, Id address, case AmdGpu::DataFormat::Format8_8_8_8: case AmdGpu::DataFormat::Format16: case AmdGpu::DataFormat::Format32: + case AmdGpu::DataFormat::Format32_32: case AmdGpu::DataFormat::Format32_32_32_32: { ASSERT(N == AmdGpu::NumComponents(format)); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index 8a0fcd4b8..bc39bc0f3 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -81,6 +81,17 @@ void EmitStoreBufferFormatF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id void EmitStoreBufferFormatF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); void EmitStoreBufferFormatF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); void EmitStoreBufferU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); +Id EmitBufferAtomicIAdd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); +Id EmitBufferAtomicSMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); +Id EmitBufferAtomicUMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); +Id EmitBufferAtomicSMax32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); +Id EmitBufferAtomicUMax32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); +Id EmitBufferAtomicInc32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); +Id EmitBufferAtomicDec32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); +Id EmitBufferAtomicAnd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); +Id EmitBufferAtomicOr32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); +Id EmitBufferAtomicXor32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); +Id EmitBufferAtomicExchange32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp); Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp); void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 comp); @@ -103,6 +114,11 @@ Id EmitLoadSharedU128(EmitContext& ctx, Id offset); void EmitWriteSharedU32(EmitContext& ctx, Id offset, Id value); void EmitWriteSharedU64(EmitContext& ctx, Id offset, Id value); void EmitWriteSharedU128(EmitContext& ctx, Id offset, Id value); +Id EmitSharedAtomicIAdd32(EmitContext& ctx, Id offset, Id value); +Id EmitSharedAtomicUMax32(EmitContext& ctx, Id offset, Id value); +Id EmitSharedAtomicSMax32(EmitContext& ctx, Id offset, Id value); +Id EmitSharedAtomicUMin32(EmitContext& ctx, Id offset, Id value); +Id EmitSharedAtomicSMin32(EmitContext& ctx, Id offset, Id value); Id EmitCompositeConstructU32x2(EmitContext& ctx, Id e1, Id e2); Id EmitCompositeConstructU32x3(EmitContext& ctx, Id e1, Id e2, Id e3); Id EmitCompositeConstructU32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4); diff --git a/src/shader_recompiler/frontend/translate/data_share.cpp b/src/shader_recompiler/frontend/translate/data_share.cpp index 532e024e2..b7b5aa138 100644 --- a/src/shader_recompiler/frontend/translate/data_share.cpp +++ b/src/shader_recompiler/frontend/translate/data_share.cpp @@ -25,6 +25,18 @@ void Translator::EmitDataShare(const GcnInst& inst) { return DS_WRITE(32, false, true, inst); case Opcode::DS_WRITE2_B64: return DS_WRITE(64, false, true, inst); + case Opcode::DS_ADD_U32: + return DS_ADD_U32(inst, false); + case Opcode::DS_MIN_U32: + return DS_MIN_U32(inst, false); + case Opcode::DS_MAX_U32: + return DS_MAX_U32(inst, false); + case Opcode::DS_ADD_RTN_U32: + return DS_ADD_U32(inst, true); + case Opcode::DS_MIN_RTN_U32: + return DS_MIN_U32(inst, true); + case Opcode::DS_MAX_RTN_U32: + return DS_MAX_U32(inst, true); default: LogMissingOpcode(inst); } @@ -110,6 +122,42 @@ void Translator::DS_WRITE(int bit_size, bool is_signed, bool is_pair, const GcnI } } +void Translator::DS_ADD_U32(const GcnInst& inst, bool rtn) { + const IR::U32 addr{GetSrc(inst.src[0])}; + const IR::U32 data{GetSrc(inst.src[1])}; + const IR::U32 offset = ir.Imm32(u32(inst.control.ds.offset0)); + const IR::U32 addr_offset = ir.IAdd(addr, offset); + IR::VectorReg dst_reg{inst.dst[0].code}; + const IR::Value original_val = ir.SharedAtomicIAdd(addr_offset, data); + if (rtn) { + SetDst(inst.dst[0], IR::U32{original_val}); + } +} + +void Translator::DS_MIN_U32(const GcnInst& inst, bool rtn) { + const IR::U32 addr{GetSrc(inst.src[0])}; + const IR::U32 data{GetSrc(inst.src[1])}; + const IR::U32 offset = ir.Imm32(u32(inst.control.ds.offset0)); + const IR::U32 addr_offset = ir.IAdd(addr, offset); + IR::VectorReg dst_reg{inst.dst[0].code}; + const IR::Value original_val = ir.SharedAtomicIMin(addr_offset, data, false); + if (rtn) { + SetDst(inst.dst[0], IR::U32{original_val}); + } +} + +void Translator::DS_MAX_U32(const GcnInst& inst, bool rtn) { + const IR::U32 addr{GetSrc(inst.src[0])}; + const IR::U32 data{GetSrc(inst.src[1])}; + const IR::U32 offset = ir.Imm32(u32(inst.control.ds.offset0)); + const IR::U32 addr_offset = ir.IAdd(addr, offset); + IR::VectorReg dst_reg{inst.dst[0].code}; + const IR::Value original_val = ir.SharedAtomicIMax(addr_offset, data, false); + if (rtn) { + SetDst(inst.dst[0], IR::U32{original_val}); + } +} + void Translator::S_BARRIER() { ir.Barrier(); } diff --git a/src/shader_recompiler/frontend/translate/translate.h b/src/shader_recompiler/frontend/translate/translate.h index 9ebcb1163..009acabdf 100644 --- a/src/shader_recompiler/frontend/translate/translate.h +++ b/src/shader_recompiler/frontend/translate/translate.h @@ -187,6 +187,7 @@ public: // Vector Memory void BUFFER_LOAD_FORMAT(u32 num_dwords, bool is_typed, bool is_format, const GcnInst& inst); void BUFFER_STORE_FORMAT(u32 num_dwords, bool is_typed, bool is_format, const GcnInst& inst); + void BUFFER_ATOMIC(AtomicOp op, const GcnInst& inst); // Vector interpolation void V_INTERP_P2_F32(const GcnInst& inst); @@ -196,6 +197,9 @@ public: void DS_SWIZZLE_B32(const GcnInst& inst); void DS_READ(int bit_size, bool is_signed, bool is_pair, const GcnInst& inst); void DS_WRITE(int bit_size, bool is_signed, bool is_pair, const GcnInst& inst); + void DS_ADD_U32(const GcnInst& inst, bool rtn); + void DS_MIN_U32(const GcnInst& inst, bool rtn); + void DS_MAX_U32(const GcnInst& inst, bool rtn); void V_READFIRSTLANE_B32(const GcnInst& inst); void V_READLANE_B32(const GcnInst& inst); void V_WRITELANE_B32(const GcnInst& inst); diff --git a/src/shader_recompiler/frontend/translate/vector_memory.cpp b/src/shader_recompiler/frontend/translate/vector_memory.cpp index 01a549f44..08674fa2d 100644 --- a/src/shader_recompiler/frontend/translate/vector_memory.cpp +++ b/src/shader_recompiler/frontend/translate/vector_memory.cpp @@ -104,6 +104,10 @@ void Translator::EmitVectorMemory(const GcnInst& inst) { return BUFFER_STORE_FORMAT(3, false, false, inst); case Opcode::BUFFER_STORE_DWORDX4: return BUFFER_STORE_FORMAT(4, false, false, inst); + + // Buffer atomic operations + case Opcode::BUFFER_ATOMIC_ADD: + return BUFFER_ATOMIC(AtomicOp::Add, inst); default: LogMissingOpcode(inst); } @@ -435,6 +439,60 @@ void Translator::BUFFER_STORE_FORMAT(u32 num_dwords, bool is_typed, bool is_form } } +// TODO: U64 +void Translator::BUFFER_ATOMIC(AtomicOp op, const GcnInst& inst) { + const auto& mubuf = inst.control.mubuf; + const IR::VectorReg vaddr{inst.src[0].code}; + const IR::VectorReg vdata{inst.src[1].code}; + const IR::ScalarReg srsrc{inst.src[2].code * 4}; + const IR::U32 soffset{GetSrc(inst.src[3])}; + ASSERT_MSG(soffset.IsImmediate() && soffset.U32() == 0, "Non immediate offset not supported"); + + IR::BufferInstInfo info{}; + info.index_enable.Assign(mubuf.idxen); + info.inst_offset.Assign(mubuf.offset); + info.offset_enable.Assign(mubuf.offen); + + IR::Value vdata_val = ir.GetVectorReg(vdata); + const IR::U32 address = ir.GetVectorReg(vaddr); + const IR::Value handle = + ir.CompositeConstruct(ir.GetScalarReg(srsrc), ir.GetScalarReg(srsrc + 1), + ir.GetScalarReg(srsrc + 2), ir.GetScalarReg(srsrc + 3)); + + const IR::Value original_val = [&] { + switch (op) { + case AtomicOp::Swap: + return ir.BufferAtomicExchange(handle, address, vdata_val, info); + case AtomicOp::Add: + return ir.BufferAtomicIAdd(handle, address, vdata_val, info); + case AtomicOp::Smin: + return ir.BufferAtomicIMin(handle, address, vdata_val, true, info); + case AtomicOp::Umin: + return ir.BufferAtomicIMin(handle, address, vdata_val, false, info); + case AtomicOp::Smax: + return ir.BufferAtomicIMax(handle, address, vdata_val, true, info); + case AtomicOp::Umax: + return ir.BufferAtomicIMax(handle, address, vdata_val, false, info); + case AtomicOp::And: + return ir.BufferAtomicAnd(handle, address, vdata_val, info); + case AtomicOp::Or: + return ir.BufferAtomicOr(handle, address, vdata_val, info); + case AtomicOp::Xor: + return ir.BufferAtomicXor(handle, address, vdata_val, info); + case AtomicOp::Inc: + return ir.BufferAtomicInc(handle, address, vdata_val, info); + case AtomicOp::Dec: + return ir.BufferAtomicDec(handle, address, vdata_val, info); + default: + UNREACHABLE(); + } + }(); + + if (mubuf.glc) { + ir.SetVectorReg(vdata, IR::U32{original_val}); + } +} + void Translator::IMAGE_GET_LOD(const GcnInst& inst) { const auto& mimg = inst.control.mimg; IR::VectorReg dst_reg{inst.dst[0].code}; diff --git a/src/shader_recompiler/ir/ir_emitter.cpp b/src/shader_recompiler/ir/ir_emitter.cpp index 4271ac359..3ae068072 100644 --- a/src/shader_recompiler/ir/ir_emitter.cpp +++ b/src/shader_recompiler/ir/ir_emitter.cpp @@ -286,6 +286,25 @@ void IREmitter::WriteShared(int bit_size, const Value& value, const U32& offset) } } +U32F32 IREmitter::SharedAtomicIAdd(const U32& address, const U32F32& data) { + switch (data.Type()) { + case Type::U32: + return Inst(Opcode::SharedAtomicIAdd32, address, data); + default: + ThrowInvalidType(data.Type()); + } +} + +U32 IREmitter::SharedAtomicIMin(const U32& address, const U32& data, bool is_signed) { + return is_signed ? Inst(Opcode::SharedAtomicSMin32, address, data) + : Inst(Opcode::SharedAtomicUMin32, address, data); +} + +U32 IREmitter::SharedAtomicIMax(const U32& address, const U32& data, bool is_signed) { + return is_signed ? Inst(Opcode::SharedAtomicSMax32, address, data) + : Inst(Opcode::SharedAtomicUMax32, address, data); +} + U32 IREmitter::ReadConst(const Value& base, const U32& offset) { return Inst(Opcode::ReadConst, base, offset); } @@ -347,6 +366,53 @@ void IREmitter::StoreBuffer(int num_dwords, const Value& handle, const Value& ad } } +Value IREmitter::BufferAtomicIAdd(const Value& handle, const Value& address, const Value& value, + BufferInstInfo info) { + return Inst(Opcode::BufferAtomicIAdd32, Flags{info}, handle, address, value); +} + +Value IREmitter::BufferAtomicIMin(const Value& handle, const Value& address, const Value& value, + bool is_signed, BufferInstInfo info) { + return is_signed ? Inst(Opcode::BufferAtomicSMin32, Flags{info}, handle, address, value) + : Inst(Opcode::BufferAtomicUMin32, Flags{info}, handle, address, value); +} + +Value IREmitter::BufferAtomicIMax(const Value& handle, const Value& address, const Value& value, + bool is_signed, BufferInstInfo info) { + return is_signed ? Inst(Opcode::BufferAtomicSMax32, Flags{info}, handle, address, value) + : Inst(Opcode::BufferAtomicUMax32, Flags{info}, handle, address, value); +} + +Value IREmitter::BufferAtomicInc(const Value& handle, const Value& address, const Value& value, + BufferInstInfo info) { + return Inst(Opcode::BufferAtomicInc32, Flags{info}, handle, address, value); +} + +Value IREmitter::BufferAtomicDec(const Value& handle, const Value& address, const Value& value, + BufferInstInfo info) { + return Inst(Opcode::BufferAtomicDec32, Flags{info}, handle, address, value); +} + +Value IREmitter::BufferAtomicAnd(const Value& handle, const Value& address, const Value& value, + BufferInstInfo info) { + return Inst(Opcode::BufferAtomicAnd32, Flags{info}, handle, address, value); +} + +Value IREmitter::BufferAtomicOr(const Value& handle, const Value& address, const Value& value, + BufferInstInfo info) { + return Inst(Opcode::BufferAtomicOr32, Flags{info}, handle, address, value); +} + +Value IREmitter::BufferAtomicXor(const Value& handle, const Value& address, const Value& value, + BufferInstInfo info) { + return Inst(Opcode::BufferAtomicXor32, Flags{info}, handle, address, value); +} + +Value IREmitter::BufferAtomicExchange(const Value& handle, const Value& address, const Value& value, + BufferInstInfo info) { + return Inst(Opcode::BufferAtomicExchange32, Flags{info}, handle, address, value); +} + void IREmitter::StoreBufferFormat(int num_dwords, const Value& handle, const Value& address, const Value& data, BufferInstInfo info) { switch (num_dwords) { diff --git a/src/shader_recompiler/ir/ir_emitter.h b/src/shader_recompiler/ir/ir_emitter.h index 59ced93e3..be7f25153 100644 --- a/src/shader_recompiler/ir/ir_emitter.h +++ b/src/shader_recompiler/ir/ir_emitter.h @@ -84,6 +84,10 @@ public: [[nodiscard]] Value LoadShared(int bit_size, bool is_signed, const U32& offset); void WriteShared(int bit_size, const Value& value, const U32& offset); + [[nodiscard]] U32F32 SharedAtomicIAdd(const U32& address, const U32F32& data); + [[nodiscard]] U32 SharedAtomicIMin(const U32& address, const U32& data, bool is_signed); + [[nodiscard]] U32 SharedAtomicIMax(const U32& address, const U32& data, bool is_signed); + [[nodiscard]] U32 ReadConst(const Value& base, const U32& offset); [[nodiscard]] F32 ReadConstBuffer(const Value& handle, const U32& index); @@ -96,6 +100,25 @@ public: void StoreBufferFormat(int num_dwords, const Value& handle, const Value& address, const Value& data, BufferInstInfo info); + [[nodiscard]] Value BufferAtomicIAdd(const Value& handle, const Value& address, + const Value& value, BufferInstInfo info); + [[nodiscard]] Value BufferAtomicIMin(const Value& handle, const Value& address, + const Value& value, bool is_signed, BufferInstInfo info); + [[nodiscard]] Value BufferAtomicIMax(const Value& handle, const Value& address, + const Value& value, bool is_signed, BufferInstInfo info); + [[nodiscard]] Value BufferAtomicInc(const Value& handle, const Value& address, + const Value& value, BufferInstInfo info); + [[nodiscard]] Value BufferAtomicDec(const Value& handle, const Value& address, + const Value& value, BufferInstInfo info); + [[nodiscard]] Value BufferAtomicAnd(const Value& handle, const Value& address, + const Value& value, BufferInstInfo info); + [[nodiscard]] Value BufferAtomicOr(const Value& handle, const Value& address, + const Value& value, BufferInstInfo info); + [[nodiscard]] Value BufferAtomicXor(const Value& handle, const Value& address, + const Value& value, BufferInstInfo info); + [[nodiscard]] Value BufferAtomicExchange(const Value& handle, const Value& address, + const Value& value, BufferInstInfo info); + [[nodiscard]] U32 LaneId(); [[nodiscard]] U32 WarpId(); [[nodiscard]] U32 QuadShuffle(const U32& value, const U32& index); diff --git a/src/shader_recompiler/ir/microinstruction.cpp b/src/shader_recompiler/ir/microinstruction.cpp index a8166125e..e35be8a7f 100644 --- a/src/shader_recompiler/ir/microinstruction.cpp +++ b/src/shader_recompiler/ir/microinstruction.cpp @@ -60,9 +60,25 @@ bool Inst::MayHaveSideEffects() const noexcept { case Opcode::StoreBufferFormatF32x3: case Opcode::StoreBufferFormatF32x4: case Opcode::StoreBufferU32: + case Opcode::BufferAtomicIAdd32: + case Opcode::BufferAtomicSMin32: + case Opcode::BufferAtomicUMin32: + case Opcode::BufferAtomicSMax32: + case Opcode::BufferAtomicUMax32: + case Opcode::BufferAtomicInc32: + case Opcode::BufferAtomicDec32: + case Opcode::BufferAtomicAnd32: + case Opcode::BufferAtomicOr32: + case Opcode::BufferAtomicXor32: + case Opcode::BufferAtomicExchange32: case Opcode::WriteSharedU128: case Opcode::WriteSharedU64: case Opcode::WriteSharedU32: + case Opcode::SharedAtomicIAdd32: + case Opcode::SharedAtomicSMin32: + case Opcode::SharedAtomicUMin32: + case Opcode::SharedAtomicSMax32: + case Opcode::SharedAtomicUMax32: case Opcode::ImageWrite: case Opcode::ImageAtomicIAdd32: case Opcode::ImageAtomicSMin32: diff --git a/src/shader_recompiler/ir/opcodes.inc b/src/shader_recompiler/ir/opcodes.inc index 4c6122a83..e9ecd4350 100644 --- a/src/shader_recompiler/ir/opcodes.inc +++ b/src/shader_recompiler/ir/opcodes.inc @@ -33,6 +33,13 @@ OPCODE(WriteSharedU32, Void, U32, OPCODE(WriteSharedU64, Void, U32, U32x2, ) OPCODE(WriteSharedU128, Void, U32, U32x4, ) +// Shared atomic operations +OPCODE(SharedAtomicIAdd32, U32, U32, U32, ) +OPCODE(SharedAtomicSMin32, U32, U32, U32, ) +OPCODE(SharedAtomicUMin32, U32, U32, U32, ) +OPCODE(SharedAtomicSMax32, U32, U32, U32, ) +OPCODE(SharedAtomicUMax32, U32, U32, U32, ) + // Context getters/setters OPCODE(GetUserData, U32, ScalarReg, ) OPCODE(GetThreadBitScalarReg, U1, ScalarReg, ) @@ -88,6 +95,19 @@ OPCODE(StoreBufferFormatF32x3, Void, Opaq OPCODE(StoreBufferFormatF32x4, Void, Opaque, Opaque, F32x4, ) OPCODE(StoreBufferU32, Void, Opaque, Opaque, U32, ) +// Buffer atomic operations +OPCODE(BufferAtomicIAdd32, U32, Opaque, Opaque, U32 ) +OPCODE(BufferAtomicSMin32, U32, Opaque, Opaque, U32 ) +OPCODE(BufferAtomicUMin32, U32, Opaque, Opaque, U32 ) +OPCODE(BufferAtomicSMax32, U32, Opaque, Opaque, U32 ) +OPCODE(BufferAtomicUMax32, U32, Opaque, Opaque, U32 ) +OPCODE(BufferAtomicInc32, U32, Opaque, Opaque, U32, ) +OPCODE(BufferAtomicDec32, U32, Opaque, Opaque, U32, ) +OPCODE(BufferAtomicAnd32, U32, Opaque, Opaque, U32, ) +OPCODE(BufferAtomicOr32, U32, Opaque, Opaque, U32, ) +OPCODE(BufferAtomicXor32, U32, Opaque, Opaque, U32, ) +OPCODE(BufferAtomicExchange32, U32, Opaque, Opaque, U32, ) + // Vector utility OPCODE(CompositeConstructU32x2, U32x2, U32, U32, ) OPCODE(CompositeConstructU32x3, U32x3, U32, U32, U32, ) diff --git a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp index 97fc5b999..20a66ad0c 100644 --- a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp +++ b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp @@ -20,6 +20,42 @@ struct SharpLocation { auto operator<=>(const SharpLocation&) const = default; }; +bool IsBufferAtomic(const IR::Inst& inst) { + switch (inst.GetOpcode()) { + case IR::Opcode::BufferAtomicIAdd32: + case IR::Opcode::BufferAtomicSMin32: + case IR::Opcode::BufferAtomicUMin32: + case IR::Opcode::BufferAtomicSMax32: + case IR::Opcode::BufferAtomicUMax32: + case IR::Opcode::BufferAtomicInc32: + case IR::Opcode::BufferAtomicDec32: + case IR::Opcode::BufferAtomicAnd32: + case IR::Opcode::BufferAtomicOr32: + case IR::Opcode::BufferAtomicXor32: + case IR::Opcode::BufferAtomicExchange32: + return true; + default: + return false; + } +} + +bool IsBufferStore(const IR::Inst& inst) { + switch (inst.GetOpcode()) { + case IR::Opcode::StoreBufferF32: + case IR::Opcode::StoreBufferF32x2: + case IR::Opcode::StoreBufferF32x3: + case IR::Opcode::StoreBufferF32x4: + case IR::Opcode::StoreBufferFormatF32: + case IR::Opcode::StoreBufferFormatF32x2: + case IR::Opcode::StoreBufferFormatF32x3: + case IR::Opcode::StoreBufferFormatF32x4: + case IR::Opcode::StoreBufferU32: + return true; + default: + return IsBufferAtomic(inst); + } +} + bool IsBufferInstruction(const IR::Inst& inst) { switch (inst.GetOpcode()) { case IR::Opcode::LoadBufferF32: @@ -33,18 +69,9 @@ bool IsBufferInstruction(const IR::Inst& inst) { case IR::Opcode::LoadBufferU32: case IR::Opcode::ReadConstBuffer: case IR::Opcode::ReadConstBufferU32: - case IR::Opcode::StoreBufferF32: - case IR::Opcode::StoreBufferF32x2: - case IR::Opcode::StoreBufferF32x3: - case IR::Opcode::StoreBufferF32x4: - case IR::Opcode::StoreBufferFormatF32: - case IR::Opcode::StoreBufferFormatF32x2: - case IR::Opcode::StoreBufferFormatF32x3: - case IR::Opcode::StoreBufferFormatF32x4: - case IR::Opcode::StoreBufferU32: return true; default: - return false; + return IsBufferStore(inst); } } @@ -108,29 +135,13 @@ IR::Type BufferDataType(const IR::Inst& inst, AmdGpu::NumberFormat num_format) { case IR::Opcode::LoadBufferU32: case IR::Opcode::ReadConstBufferU32: case IR::Opcode::StoreBufferU32: + case IR::Opcode::BufferAtomicIAdd32: return IR::Type::U32; default: UNREACHABLE(); } } -bool IsBufferStore(const IR::Inst& inst) { - switch (inst.GetOpcode()) { - case IR::Opcode::StoreBufferF32: - case IR::Opcode::StoreBufferF32x2: - case IR::Opcode::StoreBufferF32x3: - case IR::Opcode::StoreBufferF32x4: - case IR::Opcode::StoreBufferFormatF32: - case IR::Opcode::StoreBufferFormatF32x2: - case IR::Opcode::StoreBufferFormatF32x3: - case IR::Opcode::StoreBufferFormatF32x4: - case IR::Opcode::StoreBufferU32: - return true; - default: - return false; - } -} - bool IsImageInstruction(const IR::Inst& inst) { switch (inst.GetOpcode()) { case IR::Opcode::ImageSampleExplicitLod: