mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2024-12-27 01:46:41 +00:00
ir: Add heuristic based LDS barrier pass (#1801)
* ir: Add heuristic based LDS barrier pass * Attempts to insert barriers after zero-depth divergant conditional blocks in shaders that use shared memory * lds_barriers: Limit to nvidia * Intel has historically had problems with cs barriers, will debug other time
This commit is contained in:
parent
adf4b635f7
commit
188eebb92a
|
@ -671,6 +671,7 @@ set(SHADER_RECOMPILER src/shader_recompiler/exception.h
|
|||
src/shader_recompiler/ir/passes/resource_tracking_pass.cpp
|
||||
src/shader_recompiler/ir/passes/ring_access_elimination.cpp
|
||||
src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp
|
||||
src/shader_recompiler/ir/passes/shared_memory_barrier_pass.cpp
|
||||
src/shader_recompiler/ir/passes/ssa_rewrite_pass.cpp
|
||||
src/shader_recompiler/ir/abstract_syntax_list.h
|
||||
src/shader_recompiler/ir/attribute.cpp
|
||||
|
|
|
@ -6,6 +6,10 @@
|
|||
#include "shader_recompiler/ir/basic_block.h"
|
||||
#include "shader_recompiler/ir/program.h"
|
||||
|
||||
namespace Shader {
|
||||
struct Profile;
|
||||
}
|
||||
|
||||
namespace Shader::Optimization {
|
||||
|
||||
void SsaRewritePass(IR::BlockList& program);
|
||||
|
@ -21,5 +25,6 @@ void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtim
|
|||
void TessellationPreprocess(IR::Program& program, RuntimeInfo& runtime_info);
|
||||
void HullShaderTransform(IR::Program& program, RuntimeInfo& runtime_info);
|
||||
void DomainShaderTransform(IR::Program& program, RuntimeInfo& runtime_info);
|
||||
void SharedMemoryBarrierPass(IR::Program& program, const Profile& profile);
|
||||
|
||||
} // namespace Shader::Optimization
|
||||
|
|
|
@ -0,0 +1,46 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "shader_recompiler/ir/breadth_first_search.h"
|
||||
#include "shader_recompiler/ir/ir_emitter.h"
|
||||
#include "shader_recompiler/ir/program.h"
|
||||
#include "shader_recompiler/profile.h"
|
||||
|
||||
namespace Shader::Optimization {
|
||||
|
||||
void SharedMemoryBarrierPass(IR::Program& program, const Profile& profile) {
|
||||
if (!program.info.uses_shared || !profile.needs_lds_barriers) {
|
||||
return;
|
||||
}
|
||||
using Type = IR::AbstractSyntaxNode::Type;
|
||||
u32 branch_depth{};
|
||||
for (const IR::AbstractSyntaxNode& node : program.syntax_list) {
|
||||
if (node.type == Type::EndIf) {
|
||||
--branch_depth;
|
||||
continue;
|
||||
}
|
||||
if (node.type != Type::If) {
|
||||
continue;
|
||||
}
|
||||
u32 curr_depth = branch_depth++;
|
||||
if (curr_depth != 0) {
|
||||
continue;
|
||||
}
|
||||
const IR::U1 cond = node.data.if_node.cond;
|
||||
const auto insert_barrier = IR::BreadthFirstSearch(cond, [](IR::Inst* inst) -> std::optional<bool> {
|
||||
if (inst->GetOpcode() == IR::Opcode::GetAttributeU32 &&
|
||||
inst->Arg(0).Attribute() == IR::Attribute::LocalInvocationId) {
|
||||
return true;
|
||||
}
|
||||
return std::nullopt;
|
||||
});
|
||||
if (insert_barrier) {
|
||||
IR::Block* const merge = node.data.if_node.merge;
|
||||
auto insert_point = std::ranges::find_if_not(merge->Instructions(), IR::IsPhi);
|
||||
IR::IREmitter ir{*merge, insert_point};
|
||||
ir.Barrier();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Shader::Optimization
|
|
@ -27,6 +27,7 @@ struct Profile {
|
|||
bool has_broken_spirv_clamp{};
|
||||
bool lower_left_origin_mode{};
|
||||
bool needs_manual_interpolation{};
|
||||
bool needs_lds_barriers{};
|
||||
u64 min_ssbo_alignment{};
|
||||
};
|
||||
|
||||
|
|
|
@ -91,6 +91,7 @@ IR::Program TranslateProgram(std::span<const u32> code, Pools& pools, Info& info
|
|||
Shader::Optimization::IdentityRemovalPass(program.blocks);
|
||||
Shader::Optimization::DeadCodeEliminationPass(program);
|
||||
Shader::Optimization::CollectShaderInfoPass(program);
|
||||
Shader::Optimization::SharedMemoryBarrierPass(program, profile);
|
||||
|
||||
return program;
|
||||
}
|
||||
|
|
|
@ -204,6 +204,7 @@ PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_,
|
|||
.supports_image_load_store_lod = instance_.IsImageLoadStoreLodSupported(),
|
||||
.needs_manual_interpolation = instance.IsFragmentShaderBarycentricSupported() &&
|
||||
instance.GetDriverID() == vk::DriverId::eNvidiaProprietary,
|
||||
.needs_lds_barriers = instance.GetDriverID() == vk::DriverId::eNvidiaProprietary,
|
||||
};
|
||||
auto [cache_result, cache] = instance.GetDevice().createPipelineCacheUnique({});
|
||||
ASSERT_MSG(cache_result == vk::Result::eSuccess, "Failed to create pipeline cache: {}",
|
||||
|
|
Loading…
Reference in a new issue