From b0860d6e8c95cf585f28a5096135878b15e47931 Mon Sep 17 00:00:00 2001 From: Jamie Tong Date: Sun, 1 Dec 2024 04:39:11 +0800 Subject: [PATCH] implement DS_AND_B32, DS_OR_B32, DS_XOR_B32 (#1593) * implement DS_OR_B32 * implement DS_AND_B32, DS_XOR_B32 --- .../backend/spirv/emit_spirv_atomic.cpp | 12 +++++ .../backend/spirv/emit_spirv_instructions.h | 3 ++ .../frontend/translate/data_share.cpp | 49 +++++++++++++++++++ .../frontend/translate/translate.h | 3 ++ src/shader_recompiler/ir/ir_emitter.cpp | 12 +++++ src/shader_recompiler/ir/ir_emitter.h | 3 ++ src/shader_recompiler/ir/microinstruction.cpp | 3 ++ src/shader_recompiler/ir/opcodes.inc | 3 ++ 8 files changed, 88 insertions(+) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp index a58b2778f..ce65a5ccb 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp @@ -60,6 +60,18 @@ Id EmitSharedAtomicSMin32(EmitContext& ctx, Id offset, Id value) { return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicSMin); } +Id EmitSharedAtomicAnd32(EmitContext& ctx, Id offset, Id value) { + return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicAnd); +} + +Id EmitSharedAtomicOr32(EmitContext& ctx, Id offset, Id value) { + return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicOr); +} + +Id EmitSharedAtomicXor32(EmitContext& ctx, Id offset, Id value) { + return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicXor); +} + Id EmitBufferAtomicIAdd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { return BufferAtomicU32(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicIAdd); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index 12361991a..cc3db880c 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -112,6 +112,9 @@ Id EmitSharedAtomicUMax32(EmitContext& ctx, Id offset, Id value); Id EmitSharedAtomicSMax32(EmitContext& ctx, Id offset, Id value); Id EmitSharedAtomicUMin32(EmitContext& ctx, Id offset, Id value); Id EmitSharedAtomicSMin32(EmitContext& ctx, Id offset, Id value); +Id EmitSharedAtomicAnd32(EmitContext& ctx, Id offset, Id value); +Id EmitSharedAtomicOr32(EmitContext& ctx, Id offset, Id value); +Id EmitSharedAtomicXor32(EmitContext& ctx, Id offset, Id value); Id EmitCompositeConstructU32x2(EmitContext& ctx, Id e1, Id e2); Id EmitCompositeConstructU32x3(EmitContext& ctx, Id e1, Id e2, Id e3); Id EmitCompositeConstructU32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4); diff --git a/src/shader_recompiler/frontend/translate/data_share.cpp b/src/shader_recompiler/frontend/translate/data_share.cpp index 2f5932f80..5914f9fe3 100644 --- a/src/shader_recompiler/frontend/translate/data_share.cpp +++ b/src/shader_recompiler/frontend/translate/data_share.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include "shader_recompiler/frontend/translate/translate.h" +#include "shader_recompiler/ir/reg.h" namespace Shader::Gcn { @@ -18,6 +19,12 @@ void Translator::EmitDataShare(const GcnInst& inst) { return DS_MIN_U32(inst, false, false); case Opcode::DS_MAX_U32: return DS_MAX_U32(inst, false, false); + case Opcode::DS_AND_B32: + return DS_AND_B32(inst, false); + case Opcode::DS_OR_B32: + return DS_OR_B32(inst, false); + case Opcode::DS_XOR_B32: + return DS_XOR_B32(inst, false); case Opcode::DS_WRITE_B32: return DS_WRITE(32, false, false, false, inst); case Opcode::DS_WRITE2_B32: @@ -30,6 +37,12 @@ void Translator::EmitDataShare(const GcnInst& inst) { return DS_MIN_U32(inst, false, true); case Opcode::DS_MAX_RTN_U32: return DS_MAX_U32(inst, false, true); + case Opcode::DS_AND_RTN_B32: + return DS_AND_B32(inst, true); + case Opcode::DS_OR_RTN_B32: + return DS_OR_B32(inst, true); + case Opcode::DS_XOR_RTN_B32: + return DS_XOR_B32(inst, true); case Opcode::DS_SWIZZLE_B32: return DS_SWIZZLE_B32(inst); case Opcode::DS_READ_B32: @@ -119,6 +132,42 @@ void Translator::DS_MAX_U32(const GcnInst& inst, bool is_signed, bool rtn) { } } +void Translator::DS_AND_B32(const GcnInst& inst, bool rtn) { + const IR::U32 addr{GetSrc(inst.src[0])}; + const IR::U32 data{GetSrc(inst.src[1])}; + const IR::U32 offset = + ir.Imm32((u32(inst.control.ds.offset1) << 8u) + u32(inst.control.ds.offset0)); + const IR::U32 addr_offset = ir.IAdd(addr, offset); + const IR::Value original_val = ir.SharedAtomicAnd(addr_offset, data); + if (rtn) { + SetDst(inst.dst[0], IR::U32{original_val}); + } +} + +void Translator::DS_OR_B32(const GcnInst& inst, bool rtn) { + const IR::U32 addr{GetSrc(inst.src[0])}; + const IR::U32 data{GetSrc(inst.src[1])}; + const IR::U32 offset = + ir.Imm32((u32(inst.control.ds.offset1) << 8u) + u32(inst.control.ds.offset0)); + const IR::U32 addr_offset = ir.IAdd(addr, offset); + const IR::Value original_val = ir.SharedAtomicOr(addr_offset, data); + if (rtn) { + SetDst(inst.dst[0], IR::U32{original_val}); + } +} + +void Translator::DS_XOR_B32(const GcnInst& inst, bool rtn) { + const IR::U32 addr{GetSrc(inst.src[0])}; + const IR::U32 data{GetSrc(inst.src[1])}; + const IR::U32 offset = + ir.Imm32((u32(inst.control.ds.offset1) << 8u) + u32(inst.control.ds.offset0)); + const IR::U32 addr_offset = ir.IAdd(addr, offset); + const IR::Value original_val = ir.SharedAtomicXor(addr_offset, data); + if (rtn) { + SetDst(inst.dst[0], IR::U32{original_val}); + } +} + void Translator::DS_WRITE(int bit_size, bool is_signed, bool is_pair, bool stride64, const GcnInst& inst) { const IR::U32 addr{ir.GetVectorReg(IR::VectorReg(inst.src[0].code))}; diff --git a/src/shader_recompiler/frontend/translate/translate.h b/src/shader_recompiler/frontend/translate/translate.h index f04038909..3b89372bd 100644 --- a/src/shader_recompiler/frontend/translate/translate.h +++ b/src/shader_recompiler/frontend/translate/translate.h @@ -250,6 +250,9 @@ public: void DS_MAX_U32(const GcnInst& inst, bool is_signed, bool rtn); void DS_WRITE(int bit_size, bool is_signed, bool is_pair, bool stride64, const GcnInst& inst); void DS_SWIZZLE_B32(const GcnInst& inst); + void DS_AND_B32(const GcnInst& inst, bool rtn); + void DS_OR_B32(const GcnInst& inst, bool rtn); + void DS_XOR_B32(const GcnInst& inst, bool rtn); void DS_READ(int bit_size, bool is_signed, bool is_pair, bool stride64, const GcnInst& inst); void DS_APPEND(const GcnInst& inst); void DS_CONSUME(const GcnInst& inst); diff --git a/src/shader_recompiler/ir/ir_emitter.cpp b/src/shader_recompiler/ir/ir_emitter.cpp index 73b33432b..78e7f2289 100644 --- a/src/shader_recompiler/ir/ir_emitter.cpp +++ b/src/shader_recompiler/ir/ir_emitter.cpp @@ -326,6 +326,18 @@ U32 IREmitter::SharedAtomicIMax(const U32& address, const U32& data, bool is_sig : Inst(Opcode::SharedAtomicUMax32, address, data); } +U32 IREmitter::SharedAtomicAnd(const U32& address, const U32& data) { + return Inst(Opcode::SharedAtomicAnd32, address, data); +} + +U32 IREmitter::SharedAtomicOr(const U32& address, const U32& data) { + return Inst(Opcode::SharedAtomicOr32, address, data); +} + +U32 IREmitter::SharedAtomicXor(const U32& address, const U32& data) { + return Inst(Opcode::SharedAtomicXor32, address, data); +} + U32 IREmitter::ReadConst(const Value& base, const U32& offset) { return Inst(Opcode::ReadConst, base, offset); } diff --git a/src/shader_recompiler/ir/ir_emitter.h b/src/shader_recompiler/ir/ir_emitter.h index b3f513085..cbd3780de 100644 --- a/src/shader_recompiler/ir/ir_emitter.h +++ b/src/shader_recompiler/ir/ir_emitter.h @@ -90,6 +90,9 @@ public: [[nodiscard]] U32F32 SharedAtomicIAdd(const U32& address, const U32F32& data); [[nodiscard]] U32 SharedAtomicIMin(const U32& address, const U32& data, bool is_signed); [[nodiscard]] U32 SharedAtomicIMax(const U32& address, const U32& data, bool is_signed); + [[nodiscard]] U32 SharedAtomicAnd(const U32& address, const U32& data); + [[nodiscard]] U32 SharedAtomicOr(const U32& address, const U32& data); + [[nodiscard]] U32 SharedAtomicXor(const U32& address, const U32& data); [[nodiscard]] U32 ReadConst(const Value& base, const U32& offset); [[nodiscard]] U32 ReadConstBuffer(const Value& handle, const U32& index); diff --git a/src/shader_recompiler/ir/microinstruction.cpp b/src/shader_recompiler/ir/microinstruction.cpp index f0b4882b3..abd31a728 100644 --- a/src/shader_recompiler/ir/microinstruction.cpp +++ b/src/shader_recompiler/ir/microinstruction.cpp @@ -77,6 +77,9 @@ bool Inst::MayHaveSideEffects() const noexcept { case Opcode::SharedAtomicUMin32: case Opcode::SharedAtomicSMax32: case Opcode::SharedAtomicUMax32: + case Opcode::SharedAtomicAnd32: + case Opcode::SharedAtomicOr32: + case Opcode::SharedAtomicXor32: case Opcode::ImageWrite: case Opcode::ImageAtomicIAdd32: case Opcode::ImageAtomicSMin32: diff --git a/src/shader_recompiler/ir/opcodes.inc b/src/shader_recompiler/ir/opcodes.inc index 51e10fb38..0283ccd0f 100644 --- a/src/shader_recompiler/ir/opcodes.inc +++ b/src/shader_recompiler/ir/opcodes.inc @@ -43,6 +43,9 @@ OPCODE(SharedAtomicSMin32, U32, U32, OPCODE(SharedAtomicUMin32, U32, U32, U32, ) OPCODE(SharedAtomicSMax32, U32, U32, U32, ) OPCODE(SharedAtomicUMax32, U32, U32, U32, ) +OPCODE(SharedAtomicAnd32, U32, U32, U32, ) +OPCODE(SharedAtomicOr32, U32, U32, U32, ) +OPCODE(SharedAtomicXor32, U32, U32, U32, ) // Context getters/setters OPCODE(GetUserData, U32, ScalarReg, )