ir_passes: Integrate DS barriers in block (#2020)

This commit is contained in:
TheTurtle 2025-01-02 22:52:10 +02:00 committed by GitHub
parent 67c531298a
commit dcc662ff1a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 52 additions and 26 deletions

View file

@ -205,7 +205,6 @@ void Translator::DS_WRITE(int bit_size, bool is_signed, bool is_pair, bool strid
addr, ir.Imm32((u32(inst.control.ds.offset1) << 8u) + u32(inst.control.ds.offset0))); addr, ir.Imm32((u32(inst.control.ds.offset1) << 8u) + u32(inst.control.ds.offset0)));
ir.WriteShared(bit_size, ir.GetVectorReg(data0), addr0); ir.WriteShared(bit_size, ir.GetVectorReg(data0), addr0);
} }
emit_ds_read_barrier = true;
} }
void Translator::DS_SWIZZLE_B32(const GcnInst& inst) { void Translator::DS_SWIZZLE_B32(const GcnInst& inst) {
@ -222,11 +221,6 @@ void Translator::DS_SWIZZLE_B32(const GcnInst& inst) {
void Translator::DS_READ(int bit_size, bool is_signed, bool is_pair, bool stride64, void Translator::DS_READ(int bit_size, bool is_signed, bool is_pair, bool stride64,
const GcnInst& inst) { const GcnInst& inst) {
if (emit_ds_read_barrier && profile.needs_lds_barriers) {
ir.Barrier();
emit_ds_read_barrier = false;
}
const IR::U32 addr{ir.GetVectorReg(IR::VectorReg(inst.src[0].code))}; const IR::U32 addr{ir.GetVectorReg(IR::VectorReg(inst.src[0].code))};
IR::VectorReg dst_reg{inst.dst[0].code}; IR::VectorReg dst_reg{inst.dst[0].code};
if (is_pair) { if (is_pair) {

View file

@ -308,7 +308,6 @@ private:
const RuntimeInfo& runtime_info; const RuntimeInfo& runtime_info;
const Profile& profile; const Profile& profile;
bool opcode_missing = false; bool opcode_missing = false;
bool emit_ds_read_barrier = false;
}; };
void Translate(IR::Block* block, u32 block_base, std::span<const GcnInst> inst_list, Info& info, void Translate(IR::Block* block, u32 block_base, std::span<const GcnInst> inst_list, Info& info,

View file

@ -8,6 +8,54 @@
namespace Shader::Optimization { namespace Shader::Optimization {
static void EmitBarrierInBlock(IR::Block* block) {
// This is inteded to insert a barrier when shared memory write and read
// occur in the same basic block. Also checks if branch depth is zero as
// we don't want to insert barrier in potentially divergent code.
bool emit_barrier_on_write = false;
bool emit_barrier_on_read = false;
const auto emit_barrier = [block](bool& emit_cond, IR::Inst& inst) {
if (emit_cond) {
IR::IREmitter ir{*block, IR::Block::InstructionList::s_iterator_to(inst)};
ir.Barrier();
emit_cond = false;
}
};
for (IR::Inst& inst : block->Instructions()) {
if (inst.GetOpcode() == IR::Opcode::LoadSharedU32 ||
inst.GetOpcode() == IR::Opcode::LoadSharedU64) {
emit_barrier(emit_barrier_on_read, inst);
emit_barrier_on_write = true;
}
if (inst.GetOpcode() == IR::Opcode::WriteSharedU32 ||
inst.GetOpcode() == IR::Opcode::WriteSharedU64) {
emit_barrier(emit_barrier_on_write, inst);
emit_barrier_on_read = true;
}
}
}
static void EmitBarrierInMergeBlock(const IR::AbstractSyntaxNode::Data& data) {
// Insert a barrier after divergent conditional blocks.
// This avoids potential softlocks and crashes when some threads
// initialize shared memory and others read from it.
const IR::U1 cond = data.if_node.cond;
const auto insert_barrier =
IR::BreadthFirstSearch(cond, [](IR::Inst* inst) -> std::optional<bool> {
if (inst->GetOpcode() == IR::Opcode::GetAttributeU32 &&
inst->Arg(0).Attribute() == IR::Attribute::LocalInvocationId) {
return true;
}
return std::nullopt;
});
if (insert_barrier) {
IR::Block* const merge = data.if_node.merge;
auto insert_point = std::ranges::find_if_not(merge->Instructions(), IR::IsPhi);
IR::IREmitter ir{*merge, insert_point};
ir.Barrier();
}
}
void SharedMemoryBarrierPass(IR::Program& program, const Profile& profile) { void SharedMemoryBarrierPass(IR::Program& program, const Profile& profile) {
if (!program.info.uses_shared || !profile.needs_lds_barriers) { if (!program.info.uses_shared || !profile.needs_lds_barriers) {
return; return;
@ -19,27 +67,12 @@ void SharedMemoryBarrierPass(IR::Program& program, const Profile& profile) {
--branch_depth; --branch_depth;
continue; continue;
} }
if (node.type != Type::If) { if (node.type == Type::If && branch_depth++ == 0) {
EmitBarrierInMergeBlock(node.data);
continue; continue;
} }
u32 curr_depth = branch_depth++; if (node.type == Type::Block && branch_depth == 0) {
if (curr_depth != 0) { EmitBarrierInBlock(node.data.block);
continue;
}
const IR::U1 cond = node.data.if_node.cond;
const auto insert_barrier =
IR::BreadthFirstSearch(cond, [](IR::Inst* inst) -> std::optional<bool> {
if (inst->GetOpcode() == IR::Opcode::GetAttributeU32 &&
inst->Arg(0).Attribute() == IR::Attribute::LocalInvocationId) {
return true;
}
return std::nullopt;
});
if (insert_barrier) {
IR::Block* const merge = node.data.if_node.merge;
auto insert_point = std::ranges::find_if_not(merge->Instructions(), IR::IsPhi);
IR::IREmitter ir{*merge, insert_point};
ir.Barrier();
} }
} }
} }