mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-01-15 11:25:13 +00:00
ir_passes: Integrate DS barriers in block (#2020)
This commit is contained in:
parent
67c531298a
commit
dcc662ff1a
|
@ -205,7 +205,6 @@ void Translator::DS_WRITE(int bit_size, bool is_signed, bool is_pair, bool strid
|
|||
addr, ir.Imm32((u32(inst.control.ds.offset1) << 8u) + u32(inst.control.ds.offset0)));
|
||||
ir.WriteShared(bit_size, ir.GetVectorReg(data0), addr0);
|
||||
}
|
||||
emit_ds_read_barrier = true;
|
||||
}
|
||||
|
||||
void Translator::DS_SWIZZLE_B32(const GcnInst& inst) {
|
||||
|
@ -222,11 +221,6 @@ void Translator::DS_SWIZZLE_B32(const GcnInst& inst) {
|
|||
|
||||
void Translator::DS_READ(int bit_size, bool is_signed, bool is_pair, bool stride64,
|
||||
const GcnInst& inst) {
|
||||
if (emit_ds_read_barrier && profile.needs_lds_barriers) {
|
||||
ir.Barrier();
|
||||
emit_ds_read_barrier = false;
|
||||
}
|
||||
|
||||
const IR::U32 addr{ir.GetVectorReg(IR::VectorReg(inst.src[0].code))};
|
||||
IR::VectorReg dst_reg{inst.dst[0].code};
|
||||
if (is_pair) {
|
||||
|
|
|
@ -308,7 +308,6 @@ private:
|
|||
const RuntimeInfo& runtime_info;
|
||||
const Profile& profile;
|
||||
bool opcode_missing = false;
|
||||
bool emit_ds_read_barrier = false;
|
||||
};
|
||||
|
||||
void Translate(IR::Block* block, u32 block_base, std::span<const GcnInst> inst_list, Info& info,
|
||||
|
|
|
@ -8,6 +8,54 @@
|
|||
|
||||
namespace Shader::Optimization {
|
||||
|
||||
static void EmitBarrierInBlock(IR::Block* block) {
|
||||
// This is inteded to insert a barrier when shared memory write and read
|
||||
// occur in the same basic block. Also checks if branch depth is zero as
|
||||
// we don't want to insert barrier in potentially divergent code.
|
||||
bool emit_barrier_on_write = false;
|
||||
bool emit_barrier_on_read = false;
|
||||
const auto emit_barrier = [block](bool& emit_cond, IR::Inst& inst) {
|
||||
if (emit_cond) {
|
||||
IR::IREmitter ir{*block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
ir.Barrier();
|
||||
emit_cond = false;
|
||||
}
|
||||
};
|
||||
for (IR::Inst& inst : block->Instructions()) {
|
||||
if (inst.GetOpcode() == IR::Opcode::LoadSharedU32 ||
|
||||
inst.GetOpcode() == IR::Opcode::LoadSharedU64) {
|
||||
emit_barrier(emit_barrier_on_read, inst);
|
||||
emit_barrier_on_write = true;
|
||||
}
|
||||
if (inst.GetOpcode() == IR::Opcode::WriteSharedU32 ||
|
||||
inst.GetOpcode() == IR::Opcode::WriteSharedU64) {
|
||||
emit_barrier(emit_barrier_on_write, inst);
|
||||
emit_barrier_on_read = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void EmitBarrierInMergeBlock(const IR::AbstractSyntaxNode::Data& data) {
|
||||
// Insert a barrier after divergent conditional blocks.
|
||||
// This avoids potential softlocks and crashes when some threads
|
||||
// initialize shared memory and others read from it.
|
||||
const IR::U1 cond = data.if_node.cond;
|
||||
const auto insert_barrier =
|
||||
IR::BreadthFirstSearch(cond, [](IR::Inst* inst) -> std::optional<bool> {
|
||||
if (inst->GetOpcode() == IR::Opcode::GetAttributeU32 &&
|
||||
inst->Arg(0).Attribute() == IR::Attribute::LocalInvocationId) {
|
||||
return true;
|
||||
}
|
||||
return std::nullopt;
|
||||
});
|
||||
if (insert_barrier) {
|
||||
IR::Block* const merge = data.if_node.merge;
|
||||
auto insert_point = std::ranges::find_if_not(merge->Instructions(), IR::IsPhi);
|
||||
IR::IREmitter ir{*merge, insert_point};
|
||||
ir.Barrier();
|
||||
}
|
||||
}
|
||||
|
||||
void SharedMemoryBarrierPass(IR::Program& program, const Profile& profile) {
|
||||
if (!program.info.uses_shared || !profile.needs_lds_barriers) {
|
||||
return;
|
||||
|
@ -19,27 +67,12 @@ void SharedMemoryBarrierPass(IR::Program& program, const Profile& profile) {
|
|||
--branch_depth;
|
||||
continue;
|
||||
}
|
||||
if (node.type != Type::If) {
|
||||
if (node.type == Type::If && branch_depth++ == 0) {
|
||||
EmitBarrierInMergeBlock(node.data);
|
||||
continue;
|
||||
}
|
||||
u32 curr_depth = branch_depth++;
|
||||
if (curr_depth != 0) {
|
||||
continue;
|
||||
}
|
||||
const IR::U1 cond = node.data.if_node.cond;
|
||||
const auto insert_barrier =
|
||||
IR::BreadthFirstSearch(cond, [](IR::Inst* inst) -> std::optional<bool> {
|
||||
if (inst->GetOpcode() == IR::Opcode::GetAttributeU32 &&
|
||||
inst->Arg(0).Attribute() == IR::Attribute::LocalInvocationId) {
|
||||
return true;
|
||||
}
|
||||
return std::nullopt;
|
||||
});
|
||||
if (insert_barrier) {
|
||||
IR::Block* const merge = node.data.if_node.merge;
|
||||
auto insert_point = std::ranges::find_if_not(merge->Instructions(), IR::IsPhi);
|
||||
IR::IREmitter ir{*merge, insert_point};
|
||||
ir.Barrier();
|
||||
if (node.type == Type::Block && branch_depth == 0) {
|
||||
EmitBarrierInBlock(node.data.block);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue