implement DS_AND_B32, DS_OR_B32, DS_XOR_B32 (#1593)

* implement DS_OR_B32

* implement DS_AND_B32, DS_XOR_B32
This commit is contained in:
Jamie Tong 2024-12-01 04:39:11 +08:00 committed by GitHub
parent 5b6e0ab238
commit b0860d6e8c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 88 additions and 0 deletions

View file

@ -60,6 +60,18 @@ Id EmitSharedAtomicSMin32(EmitContext& ctx, Id offset, Id value) {
return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicSMin); return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicSMin);
} }
Id EmitSharedAtomicAnd32(EmitContext& ctx, Id offset, Id value) {
return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicAnd);
}
Id EmitSharedAtomicOr32(EmitContext& ctx, Id offset, Id value) {
return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicOr);
}
Id EmitSharedAtomicXor32(EmitContext& ctx, Id offset, Id value) {
return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicXor);
}
Id EmitBufferAtomicIAdd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { Id EmitBufferAtomicIAdd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
return BufferAtomicU32(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicIAdd); return BufferAtomicU32(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicIAdd);
} }

View file

@ -112,6 +112,9 @@ Id EmitSharedAtomicUMax32(EmitContext& ctx, Id offset, Id value);
Id EmitSharedAtomicSMax32(EmitContext& ctx, Id offset, Id value); Id EmitSharedAtomicSMax32(EmitContext& ctx, Id offset, Id value);
Id EmitSharedAtomicUMin32(EmitContext& ctx, Id offset, Id value); Id EmitSharedAtomicUMin32(EmitContext& ctx, Id offset, Id value);
Id EmitSharedAtomicSMin32(EmitContext& ctx, Id offset, Id value); Id EmitSharedAtomicSMin32(EmitContext& ctx, Id offset, Id value);
Id EmitSharedAtomicAnd32(EmitContext& ctx, Id offset, Id value);
Id EmitSharedAtomicOr32(EmitContext& ctx, Id offset, Id value);
Id EmitSharedAtomicXor32(EmitContext& ctx, Id offset, Id value);
Id EmitCompositeConstructU32x2(EmitContext& ctx, Id e1, Id e2); Id EmitCompositeConstructU32x2(EmitContext& ctx, Id e1, Id e2);
Id EmitCompositeConstructU32x3(EmitContext& ctx, Id e1, Id e2, Id e3); Id EmitCompositeConstructU32x3(EmitContext& ctx, Id e1, Id e2, Id e3);
Id EmitCompositeConstructU32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4); Id EmitCompositeConstructU32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4);

View file

@ -2,6 +2,7 @@
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
#include "shader_recompiler/frontend/translate/translate.h" #include "shader_recompiler/frontend/translate/translate.h"
#include "shader_recompiler/ir/reg.h"
namespace Shader::Gcn { namespace Shader::Gcn {
@ -18,6 +19,12 @@ void Translator::EmitDataShare(const GcnInst& inst) {
return DS_MIN_U32(inst, false, false); return DS_MIN_U32(inst, false, false);
case Opcode::DS_MAX_U32: case Opcode::DS_MAX_U32:
return DS_MAX_U32(inst, false, false); return DS_MAX_U32(inst, false, false);
case Opcode::DS_AND_B32:
return DS_AND_B32(inst, false);
case Opcode::DS_OR_B32:
return DS_OR_B32(inst, false);
case Opcode::DS_XOR_B32:
return DS_XOR_B32(inst, false);
case Opcode::DS_WRITE_B32: case Opcode::DS_WRITE_B32:
return DS_WRITE(32, false, false, false, inst); return DS_WRITE(32, false, false, false, inst);
case Opcode::DS_WRITE2_B32: case Opcode::DS_WRITE2_B32:
@ -30,6 +37,12 @@ void Translator::EmitDataShare(const GcnInst& inst) {
return DS_MIN_U32(inst, false, true); return DS_MIN_U32(inst, false, true);
case Opcode::DS_MAX_RTN_U32: case Opcode::DS_MAX_RTN_U32:
return DS_MAX_U32(inst, false, true); return DS_MAX_U32(inst, false, true);
case Opcode::DS_AND_RTN_B32:
return DS_AND_B32(inst, true);
case Opcode::DS_OR_RTN_B32:
return DS_OR_B32(inst, true);
case Opcode::DS_XOR_RTN_B32:
return DS_XOR_B32(inst, true);
case Opcode::DS_SWIZZLE_B32: case Opcode::DS_SWIZZLE_B32:
return DS_SWIZZLE_B32(inst); return DS_SWIZZLE_B32(inst);
case Opcode::DS_READ_B32: case Opcode::DS_READ_B32:
@ -119,6 +132,42 @@ void Translator::DS_MAX_U32(const GcnInst& inst, bool is_signed, bool rtn) {
} }
} }
void Translator::DS_AND_B32(const GcnInst& inst, bool rtn) {
const IR::U32 addr{GetSrc(inst.src[0])};
const IR::U32 data{GetSrc(inst.src[1])};
const IR::U32 offset =
ir.Imm32((u32(inst.control.ds.offset1) << 8u) + u32(inst.control.ds.offset0));
const IR::U32 addr_offset = ir.IAdd(addr, offset);
const IR::Value original_val = ir.SharedAtomicAnd(addr_offset, data);
if (rtn) {
SetDst(inst.dst[0], IR::U32{original_val});
}
}
void Translator::DS_OR_B32(const GcnInst& inst, bool rtn) {
const IR::U32 addr{GetSrc(inst.src[0])};
const IR::U32 data{GetSrc(inst.src[1])};
const IR::U32 offset =
ir.Imm32((u32(inst.control.ds.offset1) << 8u) + u32(inst.control.ds.offset0));
const IR::U32 addr_offset = ir.IAdd(addr, offset);
const IR::Value original_val = ir.SharedAtomicOr(addr_offset, data);
if (rtn) {
SetDst(inst.dst[0], IR::U32{original_val});
}
}
void Translator::DS_XOR_B32(const GcnInst& inst, bool rtn) {
const IR::U32 addr{GetSrc(inst.src[0])};
const IR::U32 data{GetSrc(inst.src[1])};
const IR::U32 offset =
ir.Imm32((u32(inst.control.ds.offset1) << 8u) + u32(inst.control.ds.offset0));
const IR::U32 addr_offset = ir.IAdd(addr, offset);
const IR::Value original_val = ir.SharedAtomicXor(addr_offset, data);
if (rtn) {
SetDst(inst.dst[0], IR::U32{original_val});
}
}
void Translator::DS_WRITE(int bit_size, bool is_signed, bool is_pair, bool stride64, void Translator::DS_WRITE(int bit_size, bool is_signed, bool is_pair, bool stride64,
const GcnInst& inst) { const GcnInst& inst) {
const IR::U32 addr{ir.GetVectorReg(IR::VectorReg(inst.src[0].code))}; const IR::U32 addr{ir.GetVectorReg(IR::VectorReg(inst.src[0].code))};

View file

@ -250,6 +250,9 @@ public:
void DS_MAX_U32(const GcnInst& inst, bool is_signed, bool rtn); void DS_MAX_U32(const GcnInst& inst, bool is_signed, bool rtn);
void DS_WRITE(int bit_size, bool is_signed, bool is_pair, bool stride64, const GcnInst& inst); void DS_WRITE(int bit_size, bool is_signed, bool is_pair, bool stride64, const GcnInst& inst);
void DS_SWIZZLE_B32(const GcnInst& inst); void DS_SWIZZLE_B32(const GcnInst& inst);
void DS_AND_B32(const GcnInst& inst, bool rtn);
void DS_OR_B32(const GcnInst& inst, bool rtn);
void DS_XOR_B32(const GcnInst& inst, bool rtn);
void DS_READ(int bit_size, bool is_signed, bool is_pair, bool stride64, const GcnInst& inst); void DS_READ(int bit_size, bool is_signed, bool is_pair, bool stride64, const GcnInst& inst);
void DS_APPEND(const GcnInst& inst); void DS_APPEND(const GcnInst& inst);
void DS_CONSUME(const GcnInst& inst); void DS_CONSUME(const GcnInst& inst);

View file

@ -326,6 +326,18 @@ U32 IREmitter::SharedAtomicIMax(const U32& address, const U32& data, bool is_sig
: Inst<U32>(Opcode::SharedAtomicUMax32, address, data); : Inst<U32>(Opcode::SharedAtomicUMax32, address, data);
} }
U32 IREmitter::SharedAtomicAnd(const U32& address, const U32& data) {
return Inst<U32>(Opcode::SharedAtomicAnd32, address, data);
}
U32 IREmitter::SharedAtomicOr(const U32& address, const U32& data) {
return Inst<U32>(Opcode::SharedAtomicOr32, address, data);
}
U32 IREmitter::SharedAtomicXor(const U32& address, const U32& data) {
return Inst<U32>(Opcode::SharedAtomicXor32, address, data);
}
U32 IREmitter::ReadConst(const Value& base, const U32& offset) { U32 IREmitter::ReadConst(const Value& base, const U32& offset) {
return Inst<U32>(Opcode::ReadConst, base, offset); return Inst<U32>(Opcode::ReadConst, base, offset);
} }

View file

@ -90,6 +90,9 @@ public:
[[nodiscard]] U32F32 SharedAtomicIAdd(const U32& address, const U32F32& data); [[nodiscard]] U32F32 SharedAtomicIAdd(const U32& address, const U32F32& data);
[[nodiscard]] U32 SharedAtomicIMin(const U32& address, const U32& data, bool is_signed); [[nodiscard]] U32 SharedAtomicIMin(const U32& address, const U32& data, bool is_signed);
[[nodiscard]] U32 SharedAtomicIMax(const U32& address, const U32& data, bool is_signed); [[nodiscard]] U32 SharedAtomicIMax(const U32& address, const U32& data, bool is_signed);
[[nodiscard]] U32 SharedAtomicAnd(const U32& address, const U32& data);
[[nodiscard]] U32 SharedAtomicOr(const U32& address, const U32& data);
[[nodiscard]] U32 SharedAtomicXor(const U32& address, const U32& data);
[[nodiscard]] U32 ReadConst(const Value& base, const U32& offset); [[nodiscard]] U32 ReadConst(const Value& base, const U32& offset);
[[nodiscard]] U32 ReadConstBuffer(const Value& handle, const U32& index); [[nodiscard]] U32 ReadConstBuffer(const Value& handle, const U32& index);

View file

@ -77,6 +77,9 @@ bool Inst::MayHaveSideEffects() const noexcept {
case Opcode::SharedAtomicUMin32: case Opcode::SharedAtomicUMin32:
case Opcode::SharedAtomicSMax32: case Opcode::SharedAtomicSMax32:
case Opcode::SharedAtomicUMax32: case Opcode::SharedAtomicUMax32:
case Opcode::SharedAtomicAnd32:
case Opcode::SharedAtomicOr32:
case Opcode::SharedAtomicXor32:
case Opcode::ImageWrite: case Opcode::ImageWrite:
case Opcode::ImageAtomicIAdd32: case Opcode::ImageAtomicIAdd32:
case Opcode::ImageAtomicSMin32: case Opcode::ImageAtomicSMin32:

View file

@ -43,6 +43,9 @@ OPCODE(SharedAtomicSMin32, U32, U32,
OPCODE(SharedAtomicUMin32, U32, U32, U32, ) OPCODE(SharedAtomicUMin32, U32, U32, U32, )
OPCODE(SharedAtomicSMax32, U32, U32, U32, ) OPCODE(SharedAtomicSMax32, U32, U32, U32, )
OPCODE(SharedAtomicUMax32, U32, U32, U32, ) OPCODE(SharedAtomicUMax32, U32, U32, U32, )
OPCODE(SharedAtomicAnd32, U32, U32, U32, )
OPCODE(SharedAtomicOr32, U32, U32, U32, )
OPCODE(SharedAtomicXor32, U32, U32, U32, )
// Context getters/setters // Context getters/setters
OPCODE(GetUserData, U32, ScalarReg, ) OPCODE(GetUserData, U32, ScalarReg, )