shader_recompiler: BUFFER_ATOMIC & DS_* Opcodes (#428)

* BUFFER_ATOMIC | DS_MINMAX_U32

- Emission of BufferAtomicU32
- Addition of Buffer opcodes to IR
- Translator for BUFFER_ATOMIC Opcode
- Translators for DS_MAXMIN_U32 Opcodes

* Clang Format & UNREACHABLE_MSG

* clang

* no crash on compile

* clang

* Shared Atomics

* reuse

* rm vscode

* resolve

* opcodes

* side effects

* attempt fix shader comp

* failed attempt to fix

* clang

* do correct vdata set (still fails)

* clang

* fixed BUFFER_ATOMIC_ADD, DS_ADD_U32 fails

* data share should work

* clang

* resource tracking for buffer atomic

* clang

* distinguish RTN opcodes

* clean IsBufferInstruction

---------

Co-authored-by: microsoftv <6063922+microsoftv@users.noreply.github.com>
This commit is contained in:
Lizardy 2024-08-17 15:06:06 -04:00 committed by GitHub
parent fa7e7a9963
commit 74d43d059f
11 changed files with 375 additions and 27 deletions

View file

@ -12,6 +12,25 @@ std::pair<Id, Id> AtomicArgs(EmitContext& ctx) {
return {scope, semantics}; return {scope, semantics};
} }
Id SharedAtomicU32(EmitContext& ctx, Id offset, Id value,
Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id)) {
const Id shift_id{ctx.ConstU32(2U)};
const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)};
const Id pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index)};
const auto [scope, semantics]{AtomicArgs(ctx)};
return (ctx.*atomic_func)(ctx.U32[1], pointer, scope, semantics, value);
}
Id BufferAtomicU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value,
Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id)) {
auto& buffer = ctx.buffers[handle];
address = ctx.OpIAdd(ctx.U32[1], address, buffer.offset);
const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u));
const Id ptr = ctx.OpAccessChain(buffer.pointer_type, buffer.id, ctx.u32_zero_value, index);
const auto [scope, semantics]{AtomicArgs(ctx)};
return (ctx.*atomic_func)(ctx.U32[1], ptr, scope, semantics, value);
}
Id ImageAtomicU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value, Id ImageAtomicU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value,
Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id)) { Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id)) {
const auto& texture = ctx.images[handle & 0xFFFF]; const auto& texture = ctx.images[handle & 0xFFFF];
@ -21,6 +40,72 @@ Id ImageAtomicU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id va
} }
} // Anonymous namespace } // Anonymous namespace
Id EmitSharedAtomicIAdd32(EmitContext& ctx, Id offset, Id value) {
return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicIAdd);
}
Id EmitSharedAtomicUMax32(EmitContext& ctx, Id offset, Id value) {
return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicUMax);
}
Id EmitSharedAtomicSMax32(EmitContext& ctx, Id offset, Id value) {
return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicSMax);
}
Id EmitSharedAtomicUMin32(EmitContext& ctx, Id offset, Id value) {
return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicUMin);
}
Id EmitSharedAtomicSMin32(EmitContext& ctx, Id offset, Id value) {
return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicSMin);
}
Id EmitBufferAtomicIAdd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
return BufferAtomicU32(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicIAdd);
}
Id EmitBufferAtomicSMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
return BufferAtomicU32(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicSMin);
}
Id EmitBufferAtomicUMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
return BufferAtomicU32(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicUMin);
}
Id EmitBufferAtomicSMax32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
return BufferAtomicU32(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicSMax);
}
Id EmitBufferAtomicUMax32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
return BufferAtomicU32(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicUMax);
}
Id EmitBufferAtomicInc32(EmitContext&, IR::Inst*, u32, Id, Id) {
// TODO
UNREACHABLE_MSG("Unsupported BUFFER_ATOMIC opcode: ", IR::Opcode::BufferAtomicInc32);
}
Id EmitBufferAtomicDec32(EmitContext&, IR::Inst*, u32, Id, Id) {
// TODO
UNREACHABLE_MSG("Unsupported BUFFER_ATOMIC opcode: ", IR::Opcode::BufferAtomicDec32);
}
Id EmitBufferAtomicAnd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
return BufferAtomicU32(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicAnd);
}
Id EmitBufferAtomicOr32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
return BufferAtomicU32(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicOr);
}
Id EmitBufferAtomicXor32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
return BufferAtomicU32(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicXor);
}
Id EmitBufferAtomicExchange32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
return BufferAtomicU32(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicExchange);
}
Id EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value) { Id EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value) {
return ImageAtomicU32(ctx, inst, handle, coords, value, &Sirit::Module::OpAtomicIAdd); return ImageAtomicU32(ctx, inst, handle, coords, value, &Sirit::Module::OpAtomicIAdd);
} }

View file

@ -495,6 +495,7 @@ static void EmitStoreBufferFormatF32xN(EmitContext& ctx, u32 handle, Id address,
case AmdGpu::DataFormat::Format8_8_8_8: case AmdGpu::DataFormat::Format8_8_8_8:
case AmdGpu::DataFormat::Format16: case AmdGpu::DataFormat::Format16:
case AmdGpu::DataFormat::Format32: case AmdGpu::DataFormat::Format32:
case AmdGpu::DataFormat::Format32_32:
case AmdGpu::DataFormat::Format32_32_32_32: { case AmdGpu::DataFormat::Format32_32_32_32: {
ASSERT(N == AmdGpu::NumComponents(format)); ASSERT(N == AmdGpu::NumComponents(format));

View file

@ -81,6 +81,17 @@ void EmitStoreBufferFormatF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id
void EmitStoreBufferFormatF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); void EmitStoreBufferFormatF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
void EmitStoreBufferFormatF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); void EmitStoreBufferFormatF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
void EmitStoreBufferU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); void EmitStoreBufferU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
Id EmitBufferAtomicIAdd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
Id EmitBufferAtomicSMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
Id EmitBufferAtomicUMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
Id EmitBufferAtomicSMax32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
Id EmitBufferAtomicUMax32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
Id EmitBufferAtomicInc32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
Id EmitBufferAtomicDec32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
Id EmitBufferAtomicAnd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
Id EmitBufferAtomicOr32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
Id EmitBufferAtomicXor32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
Id EmitBufferAtomicExchange32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp); Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp);
Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp); Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp);
void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 comp); void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 comp);
@ -103,6 +114,11 @@ Id EmitLoadSharedU128(EmitContext& ctx, Id offset);
void EmitWriteSharedU32(EmitContext& ctx, Id offset, Id value); void EmitWriteSharedU32(EmitContext& ctx, Id offset, Id value);
void EmitWriteSharedU64(EmitContext& ctx, Id offset, Id value); void EmitWriteSharedU64(EmitContext& ctx, Id offset, Id value);
void EmitWriteSharedU128(EmitContext& ctx, Id offset, Id value); void EmitWriteSharedU128(EmitContext& ctx, Id offset, Id value);
Id EmitSharedAtomicIAdd32(EmitContext& ctx, Id offset, Id value);
Id EmitSharedAtomicUMax32(EmitContext& ctx, Id offset, Id value);
Id EmitSharedAtomicSMax32(EmitContext& ctx, Id offset, Id value);
Id EmitSharedAtomicUMin32(EmitContext& ctx, Id offset, Id value);
Id EmitSharedAtomicSMin32(EmitContext& ctx, Id offset, Id value);
Id EmitCompositeConstructU32x2(EmitContext& ctx, Id e1, Id e2); Id EmitCompositeConstructU32x2(EmitContext& ctx, Id e1, Id e2);
Id EmitCompositeConstructU32x3(EmitContext& ctx, Id e1, Id e2, Id e3); Id EmitCompositeConstructU32x3(EmitContext& ctx, Id e1, Id e2, Id e3);
Id EmitCompositeConstructU32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4); Id EmitCompositeConstructU32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4);

View file

@ -25,6 +25,18 @@ void Translator::EmitDataShare(const GcnInst& inst) {
return DS_WRITE(32, false, true, inst); return DS_WRITE(32, false, true, inst);
case Opcode::DS_WRITE2_B64: case Opcode::DS_WRITE2_B64:
return DS_WRITE(64, false, true, inst); return DS_WRITE(64, false, true, inst);
case Opcode::DS_ADD_U32:
return DS_ADD_U32(inst, false);
case Opcode::DS_MIN_U32:
return DS_MIN_U32(inst, false);
case Opcode::DS_MAX_U32:
return DS_MAX_U32(inst, false);
case Opcode::DS_ADD_RTN_U32:
return DS_ADD_U32(inst, true);
case Opcode::DS_MIN_RTN_U32:
return DS_MIN_U32(inst, true);
case Opcode::DS_MAX_RTN_U32:
return DS_MAX_U32(inst, true);
default: default:
LogMissingOpcode(inst); LogMissingOpcode(inst);
} }
@ -110,6 +122,42 @@ void Translator::DS_WRITE(int bit_size, bool is_signed, bool is_pair, const GcnI
} }
} }
void Translator::DS_ADD_U32(const GcnInst& inst, bool rtn) {
const IR::U32 addr{GetSrc(inst.src[0])};
const IR::U32 data{GetSrc(inst.src[1])};
const IR::U32 offset = ir.Imm32(u32(inst.control.ds.offset0));
const IR::U32 addr_offset = ir.IAdd(addr, offset);
IR::VectorReg dst_reg{inst.dst[0].code};
const IR::Value original_val = ir.SharedAtomicIAdd(addr_offset, data);
if (rtn) {
SetDst(inst.dst[0], IR::U32{original_val});
}
}
void Translator::DS_MIN_U32(const GcnInst& inst, bool rtn) {
const IR::U32 addr{GetSrc(inst.src[0])};
const IR::U32 data{GetSrc(inst.src[1])};
const IR::U32 offset = ir.Imm32(u32(inst.control.ds.offset0));
const IR::U32 addr_offset = ir.IAdd(addr, offset);
IR::VectorReg dst_reg{inst.dst[0].code};
const IR::Value original_val = ir.SharedAtomicIMin(addr_offset, data, false);
if (rtn) {
SetDst(inst.dst[0], IR::U32{original_val});
}
}
void Translator::DS_MAX_U32(const GcnInst& inst, bool rtn) {
const IR::U32 addr{GetSrc(inst.src[0])};
const IR::U32 data{GetSrc(inst.src[1])};
const IR::U32 offset = ir.Imm32(u32(inst.control.ds.offset0));
const IR::U32 addr_offset = ir.IAdd(addr, offset);
IR::VectorReg dst_reg{inst.dst[0].code};
const IR::Value original_val = ir.SharedAtomicIMax(addr_offset, data, false);
if (rtn) {
SetDst(inst.dst[0], IR::U32{original_val});
}
}
void Translator::S_BARRIER() { void Translator::S_BARRIER() {
ir.Barrier(); ir.Barrier();
} }

View file

@ -187,6 +187,7 @@ public:
// Vector Memory // Vector Memory
void BUFFER_LOAD_FORMAT(u32 num_dwords, bool is_typed, bool is_format, const GcnInst& inst); void BUFFER_LOAD_FORMAT(u32 num_dwords, bool is_typed, bool is_format, const GcnInst& inst);
void BUFFER_STORE_FORMAT(u32 num_dwords, bool is_typed, bool is_format, const GcnInst& inst); void BUFFER_STORE_FORMAT(u32 num_dwords, bool is_typed, bool is_format, const GcnInst& inst);
void BUFFER_ATOMIC(AtomicOp op, const GcnInst& inst);
// Vector interpolation // Vector interpolation
void V_INTERP_P2_F32(const GcnInst& inst); void V_INTERP_P2_F32(const GcnInst& inst);
@ -196,6 +197,9 @@ public:
void DS_SWIZZLE_B32(const GcnInst& inst); void DS_SWIZZLE_B32(const GcnInst& inst);
void DS_READ(int bit_size, bool is_signed, bool is_pair, const GcnInst& inst); void DS_READ(int bit_size, bool is_signed, bool is_pair, const GcnInst& inst);
void DS_WRITE(int bit_size, bool is_signed, bool is_pair, const GcnInst& inst); void DS_WRITE(int bit_size, bool is_signed, bool is_pair, const GcnInst& inst);
void DS_ADD_U32(const GcnInst& inst, bool rtn);
void DS_MIN_U32(const GcnInst& inst, bool rtn);
void DS_MAX_U32(const GcnInst& inst, bool rtn);
void V_READFIRSTLANE_B32(const GcnInst& inst); void V_READFIRSTLANE_B32(const GcnInst& inst);
void V_READLANE_B32(const GcnInst& inst); void V_READLANE_B32(const GcnInst& inst);
void V_WRITELANE_B32(const GcnInst& inst); void V_WRITELANE_B32(const GcnInst& inst);

View file

@ -104,6 +104,10 @@ void Translator::EmitVectorMemory(const GcnInst& inst) {
return BUFFER_STORE_FORMAT(3, false, false, inst); return BUFFER_STORE_FORMAT(3, false, false, inst);
case Opcode::BUFFER_STORE_DWORDX4: case Opcode::BUFFER_STORE_DWORDX4:
return BUFFER_STORE_FORMAT(4, false, false, inst); return BUFFER_STORE_FORMAT(4, false, false, inst);
// Buffer atomic operations
case Opcode::BUFFER_ATOMIC_ADD:
return BUFFER_ATOMIC(AtomicOp::Add, inst);
default: default:
LogMissingOpcode(inst); LogMissingOpcode(inst);
} }
@ -435,6 +439,60 @@ void Translator::BUFFER_STORE_FORMAT(u32 num_dwords, bool is_typed, bool is_form
} }
} }
// TODO: U64
void Translator::BUFFER_ATOMIC(AtomicOp op, const GcnInst& inst) {
const auto& mubuf = inst.control.mubuf;
const IR::VectorReg vaddr{inst.src[0].code};
const IR::VectorReg vdata{inst.src[1].code};
const IR::ScalarReg srsrc{inst.src[2].code * 4};
const IR::U32 soffset{GetSrc(inst.src[3])};
ASSERT_MSG(soffset.IsImmediate() && soffset.U32() == 0, "Non immediate offset not supported");
IR::BufferInstInfo info{};
info.index_enable.Assign(mubuf.idxen);
info.inst_offset.Assign(mubuf.offset);
info.offset_enable.Assign(mubuf.offen);
IR::Value vdata_val = ir.GetVectorReg<Shader::IR::U32>(vdata);
const IR::U32 address = ir.GetVectorReg(vaddr);
const IR::Value handle =
ir.CompositeConstruct(ir.GetScalarReg(srsrc), ir.GetScalarReg(srsrc + 1),
ir.GetScalarReg(srsrc + 2), ir.GetScalarReg(srsrc + 3));
const IR::Value original_val = [&] {
switch (op) {
case AtomicOp::Swap:
return ir.BufferAtomicExchange(handle, address, vdata_val, info);
case AtomicOp::Add:
return ir.BufferAtomicIAdd(handle, address, vdata_val, info);
case AtomicOp::Smin:
return ir.BufferAtomicIMin(handle, address, vdata_val, true, info);
case AtomicOp::Umin:
return ir.BufferAtomicIMin(handle, address, vdata_val, false, info);
case AtomicOp::Smax:
return ir.BufferAtomicIMax(handle, address, vdata_val, true, info);
case AtomicOp::Umax:
return ir.BufferAtomicIMax(handle, address, vdata_val, false, info);
case AtomicOp::And:
return ir.BufferAtomicAnd(handle, address, vdata_val, info);
case AtomicOp::Or:
return ir.BufferAtomicOr(handle, address, vdata_val, info);
case AtomicOp::Xor:
return ir.BufferAtomicXor(handle, address, vdata_val, info);
case AtomicOp::Inc:
return ir.BufferAtomicInc(handle, address, vdata_val, info);
case AtomicOp::Dec:
return ir.BufferAtomicDec(handle, address, vdata_val, info);
default:
UNREACHABLE();
}
}();
if (mubuf.glc) {
ir.SetVectorReg(vdata, IR::U32{original_val});
}
}
void Translator::IMAGE_GET_LOD(const GcnInst& inst) { void Translator::IMAGE_GET_LOD(const GcnInst& inst) {
const auto& mimg = inst.control.mimg; const auto& mimg = inst.control.mimg;
IR::VectorReg dst_reg{inst.dst[0].code}; IR::VectorReg dst_reg{inst.dst[0].code};

View file

@ -286,6 +286,25 @@ void IREmitter::WriteShared(int bit_size, const Value& value, const U32& offset)
} }
} }
U32F32 IREmitter::SharedAtomicIAdd(const U32& address, const U32F32& data) {
switch (data.Type()) {
case Type::U32:
return Inst<U32>(Opcode::SharedAtomicIAdd32, address, data);
default:
ThrowInvalidType(data.Type());
}
}
U32 IREmitter::SharedAtomicIMin(const U32& address, const U32& data, bool is_signed) {
return is_signed ? Inst<U32>(Opcode::SharedAtomicSMin32, address, data)
: Inst<U32>(Opcode::SharedAtomicUMin32, address, data);
}
U32 IREmitter::SharedAtomicIMax(const U32& address, const U32& data, bool is_signed) {
return is_signed ? Inst<U32>(Opcode::SharedAtomicSMax32, address, data)
: Inst<U32>(Opcode::SharedAtomicUMax32, address, data);
}
U32 IREmitter::ReadConst(const Value& base, const U32& offset) { U32 IREmitter::ReadConst(const Value& base, const U32& offset) {
return Inst<U32>(Opcode::ReadConst, base, offset); return Inst<U32>(Opcode::ReadConst, base, offset);
} }
@ -347,6 +366,53 @@ void IREmitter::StoreBuffer(int num_dwords, const Value& handle, const Value& ad
} }
} }
Value IREmitter::BufferAtomicIAdd(const Value& handle, const Value& address, const Value& value,
BufferInstInfo info) {
return Inst(Opcode::BufferAtomicIAdd32, Flags{info}, handle, address, value);
}
Value IREmitter::BufferAtomicIMin(const Value& handle, const Value& address, const Value& value,
bool is_signed, BufferInstInfo info) {
return is_signed ? Inst(Opcode::BufferAtomicSMin32, Flags{info}, handle, address, value)
: Inst(Opcode::BufferAtomicUMin32, Flags{info}, handle, address, value);
}
Value IREmitter::BufferAtomicIMax(const Value& handle, const Value& address, const Value& value,
bool is_signed, BufferInstInfo info) {
return is_signed ? Inst(Opcode::BufferAtomicSMax32, Flags{info}, handle, address, value)
: Inst(Opcode::BufferAtomicUMax32, Flags{info}, handle, address, value);
}
Value IREmitter::BufferAtomicInc(const Value& handle, const Value& address, const Value& value,
BufferInstInfo info) {
return Inst(Opcode::BufferAtomicInc32, Flags{info}, handle, address, value);
}
Value IREmitter::BufferAtomicDec(const Value& handle, const Value& address, const Value& value,
BufferInstInfo info) {
return Inst(Opcode::BufferAtomicDec32, Flags{info}, handle, address, value);
}
Value IREmitter::BufferAtomicAnd(const Value& handle, const Value& address, const Value& value,
BufferInstInfo info) {
return Inst(Opcode::BufferAtomicAnd32, Flags{info}, handle, address, value);
}
Value IREmitter::BufferAtomicOr(const Value& handle, const Value& address, const Value& value,
BufferInstInfo info) {
return Inst(Opcode::BufferAtomicOr32, Flags{info}, handle, address, value);
}
Value IREmitter::BufferAtomicXor(const Value& handle, const Value& address, const Value& value,
BufferInstInfo info) {
return Inst(Opcode::BufferAtomicXor32, Flags{info}, handle, address, value);
}
Value IREmitter::BufferAtomicExchange(const Value& handle, const Value& address, const Value& value,
BufferInstInfo info) {
return Inst(Opcode::BufferAtomicExchange32, Flags{info}, handle, address, value);
}
void IREmitter::StoreBufferFormat(int num_dwords, const Value& handle, const Value& address, void IREmitter::StoreBufferFormat(int num_dwords, const Value& handle, const Value& address,
const Value& data, BufferInstInfo info) { const Value& data, BufferInstInfo info) {
switch (num_dwords) { switch (num_dwords) {

View file

@ -84,6 +84,10 @@ public:
[[nodiscard]] Value LoadShared(int bit_size, bool is_signed, const U32& offset); [[nodiscard]] Value LoadShared(int bit_size, bool is_signed, const U32& offset);
void WriteShared(int bit_size, const Value& value, const U32& offset); void WriteShared(int bit_size, const Value& value, const U32& offset);
[[nodiscard]] U32F32 SharedAtomicIAdd(const U32& address, const U32F32& data);
[[nodiscard]] U32 SharedAtomicIMin(const U32& address, const U32& data, bool is_signed);
[[nodiscard]] U32 SharedAtomicIMax(const U32& address, const U32& data, bool is_signed);
[[nodiscard]] U32 ReadConst(const Value& base, const U32& offset); [[nodiscard]] U32 ReadConst(const Value& base, const U32& offset);
[[nodiscard]] F32 ReadConstBuffer(const Value& handle, const U32& index); [[nodiscard]] F32 ReadConstBuffer(const Value& handle, const U32& index);
@ -96,6 +100,25 @@ public:
void StoreBufferFormat(int num_dwords, const Value& handle, const Value& address, void StoreBufferFormat(int num_dwords, const Value& handle, const Value& address,
const Value& data, BufferInstInfo info); const Value& data, BufferInstInfo info);
[[nodiscard]] Value BufferAtomicIAdd(const Value& handle, const Value& address,
const Value& value, BufferInstInfo info);
[[nodiscard]] Value BufferAtomicIMin(const Value& handle, const Value& address,
const Value& value, bool is_signed, BufferInstInfo info);
[[nodiscard]] Value BufferAtomicIMax(const Value& handle, const Value& address,
const Value& value, bool is_signed, BufferInstInfo info);
[[nodiscard]] Value BufferAtomicInc(const Value& handle, const Value& address,
const Value& value, BufferInstInfo info);
[[nodiscard]] Value BufferAtomicDec(const Value& handle, const Value& address,
const Value& value, BufferInstInfo info);
[[nodiscard]] Value BufferAtomicAnd(const Value& handle, const Value& address,
const Value& value, BufferInstInfo info);
[[nodiscard]] Value BufferAtomicOr(const Value& handle, const Value& address,
const Value& value, BufferInstInfo info);
[[nodiscard]] Value BufferAtomicXor(const Value& handle, const Value& address,
const Value& value, BufferInstInfo info);
[[nodiscard]] Value BufferAtomicExchange(const Value& handle, const Value& address,
const Value& value, BufferInstInfo info);
[[nodiscard]] U32 LaneId(); [[nodiscard]] U32 LaneId();
[[nodiscard]] U32 WarpId(); [[nodiscard]] U32 WarpId();
[[nodiscard]] U32 QuadShuffle(const U32& value, const U32& index); [[nodiscard]] U32 QuadShuffle(const U32& value, const U32& index);

View file

@ -60,9 +60,25 @@ bool Inst::MayHaveSideEffects() const noexcept {
case Opcode::StoreBufferFormatF32x3: case Opcode::StoreBufferFormatF32x3:
case Opcode::StoreBufferFormatF32x4: case Opcode::StoreBufferFormatF32x4:
case Opcode::StoreBufferU32: case Opcode::StoreBufferU32:
case Opcode::BufferAtomicIAdd32:
case Opcode::BufferAtomicSMin32:
case Opcode::BufferAtomicUMin32:
case Opcode::BufferAtomicSMax32:
case Opcode::BufferAtomicUMax32:
case Opcode::BufferAtomicInc32:
case Opcode::BufferAtomicDec32:
case Opcode::BufferAtomicAnd32:
case Opcode::BufferAtomicOr32:
case Opcode::BufferAtomicXor32:
case Opcode::BufferAtomicExchange32:
case Opcode::WriteSharedU128: case Opcode::WriteSharedU128:
case Opcode::WriteSharedU64: case Opcode::WriteSharedU64:
case Opcode::WriteSharedU32: case Opcode::WriteSharedU32:
case Opcode::SharedAtomicIAdd32:
case Opcode::SharedAtomicSMin32:
case Opcode::SharedAtomicUMin32:
case Opcode::SharedAtomicSMax32:
case Opcode::SharedAtomicUMax32:
case Opcode::ImageWrite: case Opcode::ImageWrite:
case Opcode::ImageAtomicIAdd32: case Opcode::ImageAtomicIAdd32:
case Opcode::ImageAtomicSMin32: case Opcode::ImageAtomicSMin32:

View file

@ -33,6 +33,13 @@ OPCODE(WriteSharedU32, Void, U32,
OPCODE(WriteSharedU64, Void, U32, U32x2, ) OPCODE(WriteSharedU64, Void, U32, U32x2, )
OPCODE(WriteSharedU128, Void, U32, U32x4, ) OPCODE(WriteSharedU128, Void, U32, U32x4, )
// Shared atomic operations
OPCODE(SharedAtomicIAdd32, U32, U32, U32, )
OPCODE(SharedAtomicSMin32, U32, U32, U32, )
OPCODE(SharedAtomicUMin32, U32, U32, U32, )
OPCODE(SharedAtomicSMax32, U32, U32, U32, )
OPCODE(SharedAtomicUMax32, U32, U32, U32, )
// Context getters/setters // Context getters/setters
OPCODE(GetUserData, U32, ScalarReg, ) OPCODE(GetUserData, U32, ScalarReg, )
OPCODE(GetThreadBitScalarReg, U1, ScalarReg, ) OPCODE(GetThreadBitScalarReg, U1, ScalarReg, )
@ -88,6 +95,19 @@ OPCODE(StoreBufferFormatF32x3, Void, Opaq
OPCODE(StoreBufferFormatF32x4, Void, Opaque, Opaque, F32x4, ) OPCODE(StoreBufferFormatF32x4, Void, Opaque, Opaque, F32x4, )
OPCODE(StoreBufferU32, Void, Opaque, Opaque, U32, ) OPCODE(StoreBufferU32, Void, Opaque, Opaque, U32, )
// Buffer atomic operations
OPCODE(BufferAtomicIAdd32, U32, Opaque, Opaque, U32 )
OPCODE(BufferAtomicSMin32, U32, Opaque, Opaque, U32 )
OPCODE(BufferAtomicUMin32, U32, Opaque, Opaque, U32 )
OPCODE(BufferAtomicSMax32, U32, Opaque, Opaque, U32 )
OPCODE(BufferAtomicUMax32, U32, Opaque, Opaque, U32 )
OPCODE(BufferAtomicInc32, U32, Opaque, Opaque, U32, )
OPCODE(BufferAtomicDec32, U32, Opaque, Opaque, U32, )
OPCODE(BufferAtomicAnd32, U32, Opaque, Opaque, U32, )
OPCODE(BufferAtomicOr32, U32, Opaque, Opaque, U32, )
OPCODE(BufferAtomicXor32, U32, Opaque, Opaque, U32, )
OPCODE(BufferAtomicExchange32, U32, Opaque, Opaque, U32, )
// Vector utility // Vector utility
OPCODE(CompositeConstructU32x2, U32x2, U32, U32, ) OPCODE(CompositeConstructU32x2, U32x2, U32, U32, )
OPCODE(CompositeConstructU32x3, U32x3, U32, U32, U32, ) OPCODE(CompositeConstructU32x3, U32x3, U32, U32, U32, )

View file

@ -20,6 +20,42 @@ struct SharpLocation {
auto operator<=>(const SharpLocation&) const = default; auto operator<=>(const SharpLocation&) const = default;
}; };
bool IsBufferAtomic(const IR::Inst& inst) {
switch (inst.GetOpcode()) {
case IR::Opcode::BufferAtomicIAdd32:
case IR::Opcode::BufferAtomicSMin32:
case IR::Opcode::BufferAtomicUMin32:
case IR::Opcode::BufferAtomicSMax32:
case IR::Opcode::BufferAtomicUMax32:
case IR::Opcode::BufferAtomicInc32:
case IR::Opcode::BufferAtomicDec32:
case IR::Opcode::BufferAtomicAnd32:
case IR::Opcode::BufferAtomicOr32:
case IR::Opcode::BufferAtomicXor32:
case IR::Opcode::BufferAtomicExchange32:
return true;
default:
return false;
}
}
bool IsBufferStore(const IR::Inst& inst) {
switch (inst.GetOpcode()) {
case IR::Opcode::StoreBufferF32:
case IR::Opcode::StoreBufferF32x2:
case IR::Opcode::StoreBufferF32x3:
case IR::Opcode::StoreBufferF32x4:
case IR::Opcode::StoreBufferFormatF32:
case IR::Opcode::StoreBufferFormatF32x2:
case IR::Opcode::StoreBufferFormatF32x3:
case IR::Opcode::StoreBufferFormatF32x4:
case IR::Opcode::StoreBufferU32:
return true;
default:
return IsBufferAtomic(inst);
}
}
bool IsBufferInstruction(const IR::Inst& inst) { bool IsBufferInstruction(const IR::Inst& inst) {
switch (inst.GetOpcode()) { switch (inst.GetOpcode()) {
case IR::Opcode::LoadBufferF32: case IR::Opcode::LoadBufferF32:
@ -33,18 +69,9 @@ bool IsBufferInstruction(const IR::Inst& inst) {
case IR::Opcode::LoadBufferU32: case IR::Opcode::LoadBufferU32:
case IR::Opcode::ReadConstBuffer: case IR::Opcode::ReadConstBuffer:
case IR::Opcode::ReadConstBufferU32: case IR::Opcode::ReadConstBufferU32:
case IR::Opcode::StoreBufferF32:
case IR::Opcode::StoreBufferF32x2:
case IR::Opcode::StoreBufferF32x3:
case IR::Opcode::StoreBufferF32x4:
case IR::Opcode::StoreBufferFormatF32:
case IR::Opcode::StoreBufferFormatF32x2:
case IR::Opcode::StoreBufferFormatF32x3:
case IR::Opcode::StoreBufferFormatF32x4:
case IR::Opcode::StoreBufferU32:
return true; return true;
default: default:
return false; return IsBufferStore(inst);
} }
} }
@ -108,29 +135,13 @@ IR::Type BufferDataType(const IR::Inst& inst, AmdGpu::NumberFormat num_format) {
case IR::Opcode::LoadBufferU32: case IR::Opcode::LoadBufferU32:
case IR::Opcode::ReadConstBufferU32: case IR::Opcode::ReadConstBufferU32:
case IR::Opcode::StoreBufferU32: case IR::Opcode::StoreBufferU32:
case IR::Opcode::BufferAtomicIAdd32:
return IR::Type::U32; return IR::Type::U32;
default: default:
UNREACHABLE(); UNREACHABLE();
} }
} }
bool IsBufferStore(const IR::Inst& inst) {
switch (inst.GetOpcode()) {
case IR::Opcode::StoreBufferF32:
case IR::Opcode::StoreBufferF32x2:
case IR::Opcode::StoreBufferF32x3:
case IR::Opcode::StoreBufferF32x4:
case IR::Opcode::StoreBufferFormatF32:
case IR::Opcode::StoreBufferFormatF32x2:
case IR::Opcode::StoreBufferFormatF32x3:
case IR::Opcode::StoreBufferFormatF32x4:
case IR::Opcode::StoreBufferU32:
return true;
default:
return false;
}
}
bool IsImageInstruction(const IR::Inst& inst) { bool IsImageInstruction(const IR::Inst& inst) {
switch (inst.GetOpcode()) { switch (inst.GetOpcode()) {
case IR::Opcode::ImageSampleExplicitLod: case IR::Opcode::ImageSampleExplicitLod: