shader_recompilers: Improvements to SSA phi generation and lane instruction elimination (#1667)

* shader_recompiler: Add use tracking for Insts

* ssa_rewrite: Recursively remove phis

* ssa_rewrite: Correct recursive trivial phi elimination

* ir: Improve read lane folding pass

* control_flow: Avoid adding unnecessary divergant blocks

* clang format

* externals: Update ext-boost

---------

Co-authored-by: Frodo Baggins <baggins31084@proton.me>
This commit is contained in:
TheTurtle 2024-12-05 23:14:16 +02:00 committed by GitHub
parent 874508f8c2
commit 22a2741ea0
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
10 changed files with 175 additions and 76 deletions

2
externals/ext-boost vendored

@ -1 +1 @@
Subproject commit f2474e1b584fb7a3ed6f85ba875e6eacd742ec8a
Subproject commit ca6f230e67be7cc45fc919057f07b2aee64dadc1

View file

@ -47,6 +47,15 @@ static IR::Condition MakeCondition(const GcnInst& inst) {
}
}
static bool IgnoresExecMask(Opcode opcode) {
switch (opcode) {
case Opcode::V_WRITELANE_B32:
return true;
default:
return false;
}
}
static constexpr size_t LabelReserveSize = 32;
CFG::CFG(Common::ObjectPool<Block>& block_pool_, std::span<const GcnInst> inst_list_)
@ -133,20 +142,26 @@ void CFG::EmitDivergenceLabels() {
curr_begin = -1;
continue;
}
// Add a label to the instruction right after the open scope call.
// It is the start of a new basic block.
const auto& save_inst = inst_list[curr_begin];
const Label label = index_to_pc[curr_begin] + save_inst.length;
AddLabel(label);
// Add a label to the close scope instruction.
// There are 3 cases where we need to close a scope.
// * Close scope instruction inside the block
// * Close scope instruction at the end of the block (cbranch or endpgm)
// * Normal instruction at the end of the block
// For the last case we must NOT add a label as that would cause
// the instruction to be separated into its own basic block.
if (is_close) {
AddLabel(index_to_pc[index]);
// If all instructions in the scope ignore exec masking, we shouldn't insert a
// scope.
const auto start = inst_list.begin() + curr_begin + 1;
if (!std::ranges::all_of(start, inst_list.begin() + index, IgnoresExecMask,
&GcnInst::opcode)) {
// Add a label to the instruction right after the open scope call.
// It is the start of a new basic block.
const auto& save_inst = inst_list[curr_begin];
const Label label = index_to_pc[curr_begin] + save_inst.length;
AddLabel(label);
// Add a label to the close scope instruction.
// There are 3 cases where we need to close a scope.
// * Close scope instruction inside the block
// * Close scope instruction at the end of the block (cbranch or endpgm)
// * Normal instruction at the end of the block
// For the last case we must NOT add a label as that would cause
// the instruction to be separated into its own basic block.
if (is_close) {
AddLabel(index_to_pc[index]);
}
}
// Reset scope begin.
curr_begin = -1;

View file

@ -19,12 +19,14 @@ void Block::AppendNewInst(Opcode op, std::initializer_list<Value> args) {
Block::iterator Block::PrependNewInst(iterator insertion_point, const Inst& base_inst) {
Inst* const inst{inst_pool->Create(base_inst)};
inst->SetParent(this);
return instructions.insert(insertion_point, *inst);
}
Block::iterator Block::PrependNewInst(iterator insertion_point, Opcode op,
std::initializer_list<Value> args, u32 flags) {
Inst* const inst{inst_pool->Create(op, flags)};
inst->SetParent(this);
const auto result_it{instructions.insert(insertion_point, *inst)};
if (inst->NumArgs() != args.size()) {

View file

@ -2,6 +2,7 @@
// SPDX-License-Identifier: GPL-2.0-or-later
#include <algorithm>
#include <any>
#include <memory>
#include "shader_recompiler/exception.h"
@ -119,10 +120,10 @@ void Inst::SetArg(size_t index, Value value) {
}
const IR::Value arg{Arg(index)};
if (!arg.IsImmediate()) {
UndoUse(arg);
UndoUse(arg.Inst(), index);
}
if (!value.IsImmediate()) {
Use(value);
Use(value.Inst(), index);
}
if (op == Opcode::Phi) {
phi_args[index].second = value;
@ -143,7 +144,7 @@ Block* Inst::PhiBlock(size_t index) const {
void Inst::AddPhiOperand(Block* predecessor, const Value& value) {
if (!value.IsImmediate()) {
Use(value);
Use(value.Inst(), phi_args.size());
}
phi_args.emplace_back(predecessor, value);
}
@ -155,17 +156,19 @@ void Inst::Invalidate() {
void Inst::ClearArgs() {
if (op == Opcode::Phi) {
for (auto& pair : phi_args) {
for (auto i = 0; i < phi_args.size(); i++) {
auto& pair = phi_args[i];
IR::Value& value{pair.second};
if (!value.IsImmediate()) {
UndoUse(value);
UndoUse(value.Inst(), i);
}
}
phi_args.clear();
} else {
for (auto& value : args) {
for (auto i = 0; i < args.size(); i++) {
auto& value = args[i];
if (!value.IsImmediate()) {
UndoUse(value);
UndoUse(value.Inst(), i);
}
}
// Reset arguments to null
@ -174,13 +177,21 @@ void Inst::ClearArgs() {
}
}
void Inst::ReplaceUsesWith(Value replacement) {
Invalidate();
ReplaceOpcode(Opcode::Identity);
if (!replacement.IsImmediate()) {
Use(replacement);
void Inst::ReplaceUsesWith(Value replacement, bool preserve) {
// Copy since user->SetArg will mutate this->uses
// Could also do temp_uses = std::move(uses) but more readable
const auto temp_uses = uses;
for (const auto& [user, operand] : temp_uses) {
DEBUG_ASSERT(user->Arg(operand).Inst() == this);
user->SetArg(operand, replacement);
}
Invalidate();
if (preserve) {
// Still useful to have Identity for indirection.
// SSA pass would be more complicated without it
ReplaceOpcode(Opcode::Identity);
SetArg(0, replacement);
}
args[0] = replacement;
}
void Inst::ReplaceOpcode(IR::Opcode opcode) {
@ -195,14 +206,15 @@ void Inst::ReplaceOpcode(IR::Opcode opcode) {
op = opcode;
}
void Inst::Use(const Value& value) {
Inst* const inst{value.Inst()};
++inst->use_count;
void Inst::Use(Inst* used, u32 operand) {
DEBUG_ASSERT(0 == std::count(used->uses.begin(), used->uses.end(), IR::Use(this, operand)));
used->uses.emplace_front(this, operand);
}
void Inst::UndoUse(const Value& value) {
Inst* const inst{value.Inst()};
--inst->use_count;
void Inst::UndoUse(Inst* used, u32 operand) {
IR::Use use(this, operand);
DEBUG_ASSERT(1 == std::count(used->uses.begin(), used->uses.end(), use));
used->uses.remove(use);
}
} // namespace Shader::IR

View file

@ -43,7 +43,7 @@ bool FoldCommutative(IR::Inst& inst, ImmFn&& imm_fn) {
if (is_lhs_immediate && is_rhs_immediate) {
const auto result{imm_fn(Arg<T>(lhs), Arg<T>(rhs))};
inst.ReplaceUsesWith(IR::Value{result});
inst.ReplaceUsesWithAndRemove(IR::Value{result});
return false;
}
if (is_lhs_immediate && !is_rhs_immediate) {
@ -75,7 +75,7 @@ bool FoldWhenAllImmediates(IR::Inst& inst, Func&& func) {
return false;
}
using Indices = std::make_index_sequence<Common::LambdaTraits<decltype(func)>::NUM_ARGS>;
inst.ReplaceUsesWith(EvalImmediates(inst, func, Indices{}));
inst.ReplaceUsesWithAndRemove(EvalImmediates(inst, func, Indices{}));
return true;
}
@ -83,12 +83,12 @@ template <IR::Opcode op, typename Dest, typename Source>
void FoldBitCast(IR::Inst& inst, IR::Opcode reverse) {
const IR::Value value{inst.Arg(0)};
if (value.IsImmediate()) {
inst.ReplaceUsesWith(IR::Value{std::bit_cast<Dest>(Arg<Source>(value))});
inst.ReplaceUsesWithAndRemove(IR::Value{std::bit_cast<Dest>(Arg<Source>(value))});
return;
}
IR::Inst* const arg_inst{value.InstRecursive()};
if (arg_inst->GetOpcode() == reverse) {
inst.ReplaceUsesWith(arg_inst->Arg(0));
inst.ReplaceUsesWithAndRemove(arg_inst->Arg(0));
return;
}
}
@ -131,7 +131,7 @@ void FoldCompositeExtract(IR::Inst& inst, IR::Opcode construct, IR::Opcode inser
if (!result) {
return;
}
inst.ReplaceUsesWith(*result);
inst.ReplaceUsesWithAndRemove(*result);
}
void FoldConvert(IR::Inst& inst, IR::Opcode opposite) {
@ -141,7 +141,7 @@ void FoldConvert(IR::Inst& inst, IR::Opcode opposite) {
}
IR::Inst* const producer{value.InstRecursive()};
if (producer->GetOpcode() == opposite) {
inst.ReplaceUsesWith(producer->Arg(0));
inst.ReplaceUsesWithAndRemove(producer->Arg(0));
}
}
@ -152,9 +152,9 @@ void FoldLogicalAnd(IR::Inst& inst) {
const IR::Value rhs{inst.Arg(1)};
if (rhs.IsImmediate()) {
if (rhs.U1()) {
inst.ReplaceUsesWith(inst.Arg(0));
inst.ReplaceUsesWithAndRemove(inst.Arg(0));
} else {
inst.ReplaceUsesWith(IR::Value{false});
inst.ReplaceUsesWithAndRemove(IR::Value{false});
}
}
}
@ -162,7 +162,7 @@ void FoldLogicalAnd(IR::Inst& inst) {
void FoldSelect(IR::Inst& inst) {
const IR::Value cond{inst.Arg(0)};
if (cond.IsImmediate()) {
inst.ReplaceUsesWith(cond.U1() ? inst.Arg(1) : inst.Arg(2));
inst.ReplaceUsesWithAndRemove(cond.U1() ? inst.Arg(1) : inst.Arg(2));
}
}
@ -173,9 +173,9 @@ void FoldLogicalOr(IR::Inst& inst) {
const IR::Value rhs{inst.Arg(1)};
if (rhs.IsImmediate()) {
if (rhs.U1()) {
inst.ReplaceUsesWith(IR::Value{true});
inst.ReplaceUsesWithAndRemove(IR::Value{true});
} else {
inst.ReplaceUsesWith(inst.Arg(0));
inst.ReplaceUsesWithAndRemove(inst.Arg(0));
}
}
}
@ -183,12 +183,12 @@ void FoldLogicalOr(IR::Inst& inst) {
void FoldLogicalNot(IR::Inst& inst) {
const IR::U1 value{inst.Arg(0)};
if (value.IsImmediate()) {
inst.ReplaceUsesWith(IR::Value{!value.U1()});
inst.ReplaceUsesWithAndRemove(IR::Value{!value.U1()});
return;
}
IR::Inst* const arg{value.InstRecursive()};
if (arg->GetOpcode() == IR::Opcode::LogicalNot) {
inst.ReplaceUsesWith(arg->Arg(0));
inst.ReplaceUsesWithAndRemove(arg->Arg(0));
}
}
@ -199,7 +199,7 @@ void FoldInverseFunc(IR::Inst& inst, IR::Opcode reverse) {
}
IR::Inst* const arg_inst{value.InstRecursive()};
if (arg_inst->GetOpcode() == reverse) {
inst.ReplaceUsesWith(arg_inst->Arg(0));
inst.ReplaceUsesWithAndRemove(arg_inst->Arg(0));
return;
}
}
@ -211,7 +211,7 @@ void FoldAdd(IR::Block& block, IR::Inst& inst) {
}
const IR::Value rhs{inst.Arg(1)};
if (rhs.IsImmediate() && Arg<T>(rhs) == 0) {
inst.ReplaceUsesWith(inst.Arg(0));
inst.ReplaceUsesWithAndRemove(inst.Arg(0));
return;
}
}
@ -226,21 +226,58 @@ void FoldCmpClass(IR::Block& block, IR::Inst& inst) {
} else if ((class_mask & IR::FloatClassFunc::Finite) == IR::FloatClassFunc::Finite) {
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
const IR::F32 value = IR::F32{inst.Arg(0)};
inst.ReplaceUsesWith(ir.LogicalNot(ir.LogicalOr(ir.FPIsInf(value), ir.FPIsInf(value))));
inst.ReplaceUsesWithAndRemove(
ir.LogicalNot(ir.LogicalOr(ir.FPIsInf(value), ir.FPIsInf(value))));
} else {
UNREACHABLE();
}
}
void FoldReadLane(IR::Inst& inst) {
void FoldReadLane(IR::Block& block, IR::Inst& inst) {
const u32 lane = inst.Arg(1).U32();
IR::Inst* prod = inst.Arg(0).InstRecursive();
while (prod->GetOpcode() == IR::Opcode::WriteLane) {
if (prod->Arg(2).U32() == lane) {
inst.ReplaceUsesWith(prod->Arg(1));
const auto search_chain = [lane](const IR::Inst* prod) -> IR::Value {
while (prod->GetOpcode() == IR::Opcode::WriteLane) {
if (prod->Arg(2).U32() == lane) {
return prod->Arg(1);
}
prod = prod->Arg(0).InstRecursive();
}
return {};
};
if (prod->GetOpcode() == IR::Opcode::WriteLane) {
if (const IR::Value value = search_chain(prod); !value.IsEmpty()) {
inst.ReplaceUsesWith(value);
}
return;
}
if (prod->GetOpcode() == IR::Opcode::Phi) {
boost::container::small_vector<IR::Value, 2> phi_args;
for (size_t arg_index = 0; arg_index < prod->NumArgs(); ++arg_index) {
const IR::Inst* arg{prod->Arg(arg_index).InstRecursive()};
if (arg->GetOpcode() != IR::Opcode::WriteLane) {
return;
}
const IR::Value value = search_chain(arg);
if (value.IsEmpty()) {
continue;
}
phi_args.emplace_back(value);
}
if (std::ranges::all_of(phi_args, [&](IR::Value value) { return value == phi_args[0]; })) {
inst.ReplaceUsesWith(phi_args[0]);
return;
}
prod = prod->Arg(0).InstRecursive();
const auto insert_point = IR::Block::InstructionList::s_iterator_to(*prod);
IR::Inst* const new_phi{&*block.PrependNewInst(insert_point, IR::Opcode::Phi)};
new_phi->SetFlags(IR::Type::U32);
for (size_t arg_index = 0; arg_index < phi_args.size(); arg_index++) {
new_phi->AddPhiOperand(prod->PhiBlock(arg_index), phi_args[arg_index]);
}
inst.ReplaceUsesWith(IR::Value{new_phi});
}
}
@ -290,7 +327,7 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) {
case IR::Opcode::SelectF64:
return FoldSelect(inst);
case IR::Opcode::ReadLane:
return FoldReadLane(inst);
return FoldReadLane(block, inst);
case IR::Opcode::FPNeg32:
FoldWhenAllImmediates(inst, [](f32 a) { return -a; });
return;

View file

@ -25,7 +25,7 @@ void LowerSharedMemToRegisters(IR::Program& program) {
});
ASSERT(it != ds_writes.end());
// Replace data read with value written.
inst.ReplaceUsesWith((*it)->Arg(1));
inst.ReplaceUsesWithAndRemove((*it)->Arg(1));
}
}
}

View file

@ -596,7 +596,7 @@ void PatchImageSampleInstruction(IR::Block& block, IR::Inst& inst, Info& info,
}
return ir.ImageSampleImplicitLod(handle, coords, bias, offset, inst_info);
}();
inst.ReplaceUsesWith(new_inst);
inst.ReplaceUsesWithAndRemove(new_inst);
}
void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& descriptors) {

View file

@ -164,7 +164,6 @@ IR::Opcode UndefOpcode(const FlagTag) noexcept {
enum class Status {
Start,
SetValue,
PreparePhiArgument,
PushPhiArgument,
};
@ -253,12 +252,10 @@ public:
IR::Inst* const phi{stack.back().phi};
phi->AddPhiOperand(*stack.back().pred_it, stack.back().result);
++stack.back().pred_it;
}
[[fallthrough]];
case Status::PreparePhiArgument:
prepare_phi_operand();
break;
}
}
} while (stack.size() > 1);
return stack.back().result;
}
@ -266,9 +263,7 @@ public:
void SealBlock(IR::Block* block) {
const auto it{incomplete_phis.find(block)};
if (it != incomplete_phis.end()) {
for (auto& pair : it->second) {
auto& variant{pair.first};
auto& phi{pair.second};
for (auto& [variant, phi] : it->second) {
std::visit([&](auto& variable) { AddPhiOperands(variable, *phi, block); }, variant);
}
}
@ -289,7 +284,7 @@ private:
const size_t num_args{phi.NumArgs()};
for (size_t arg_index = 0; arg_index < num_args; ++arg_index) {
const IR::Value& op{phi.Arg(arg_index)};
if (op.Resolve() == same.Resolve() || op == IR::Value{&phi}) {
if (op.Resolve() == same.Resolve() || op.Resolve() == IR::Value{&phi}) {
// Unique value or self-reference
continue;
}
@ -314,9 +309,15 @@ private:
++reinsert_point;
}
// Reinsert the phi node and reroute all its uses to the "same" value
const auto users = phi.Uses();
list.insert(reinsert_point, phi);
phi.ReplaceUsesWith(same);
// TODO: Try to recursively remove all phi users, which might have become trivial
// Try to recursively remove all phi users, which might have become trivial
for (const auto& [user, arg_index] : users) {
if (user->GetOpcode() == IR::Opcode::Phi) {
TryRemoveTrivialPhi(*user, user->GetParent(), undef_opcode);
}
}
return same;
}

View file

@ -8,6 +8,7 @@
#include <cstring>
#include <type_traits>
#include <utility>
#include <boost/container/list.hpp>
#include <boost/container/small_vector.hpp>
#include <boost/intrusive/list.hpp>
@ -107,6 +108,16 @@ public:
explicit TypedValue(IR::Inst* inst_) : TypedValue(Value(inst_)) {}
};
struct Use {
Inst* user;
u32 operand;
Use() = default;
Use(Inst* user_, u32 operand_) : user(user_), operand(operand_) {}
Use(const Use&) = default;
bool operator==(const Use&) const noexcept = default;
};
class Inst : public boost::intrusive::list_base_hook<> {
public:
explicit Inst(IR::Opcode op_, u32 flags_) noexcept;
@ -118,14 +129,22 @@ public:
Inst& operator=(Inst&&) = delete;
Inst(Inst&&) = delete;
IR::Block* GetParent() const {
ASSERT(parent);
return parent;
}
void SetParent(IR::Block* block) {
parent = block;
}
/// Get the number of uses this instruction has.
[[nodiscard]] int UseCount() const noexcept {
return use_count;
return uses.size();
}
/// Determines whether this instruction has uses or not.
[[nodiscard]] bool HasUses() const noexcept {
return use_count > 0;
return uses.size() > 0;
}
/// Get the opcode this microinstruction represents.
@ -167,7 +186,13 @@ public:
void Invalidate();
void ClearArgs();
void ReplaceUsesWith(Value replacement);
void ReplaceUsesWithAndRemove(Value replacement) {
ReplaceUsesWith(replacement, false);
}
void ReplaceUsesWith(Value replacement) {
ReplaceUsesWith(replacement, true);
}
void ReplaceOpcode(IR::Opcode opcode);
@ -197,25 +222,32 @@ public:
return std::bit_cast<DefinitionType>(definition);
}
const auto Uses() const {
return uses;
}
private:
struct NonTriviallyDummy {
NonTriviallyDummy() noexcept {}
};
void Use(const Value& value);
void UndoUse(const Value& value);
void Use(Inst* used, u32 operand);
void UndoUse(Inst* used, u32 operand);
void ReplaceUsesWith(Value replacement, bool preserve);
IR::Opcode op{};
int use_count{};
u32 flags{};
u32 definition{};
IR::Block* parent{};
union {
NonTriviallyDummy dummy{};
boost::container::small_vector<std::pair<Block*, Value>, 2> phi_args;
std::array<Value, 6> args;
};
boost::container::list<IR::Use> uses;
};
static_assert(sizeof(Inst) <= 128, "Inst size unintentionally increased");
static_assert(sizeof(Inst) <= 160, "Inst size unintentionally increased");
using U1 = TypedValue<Type::U1>;
using U8 = TypedValue<Type::U8>;
@ -373,4 +405,4 @@ template <>
struct hash<Shader::IR::Value> {
std::size_t operator()(const Shader::IR::Value& v) const;
};
} // namespace std
} // namespace std

View file

@ -715,7 +715,7 @@ Liverpool::Task Liverpool::ProcessCompute(std::span<const u32> acb, int vqid) {
false);
} else if (dma_data->src_sel == DmaDataSrc::Gds &&
dma_data->dst_sel == DmaDataDst::Memory) {
LOG_WARNING(Render_Vulkan, "GDS memory read");
// LOG_WARNING(Render_Vulkan, "GDS memory read");
} else if (dma_data->src_sel == DmaDataSrc::Memory &&
dma_data->dst_sel == DmaDataDst::Memory) {
rasterizer->InlineData(dma_data->DstAddress<VAddr>(),