From 875428e784cf4b09506fc71abf53b54780d46891 Mon Sep 17 00:00:00 2001 From: IndecisiveTurtle <47210458+raphaelthegreat@users.noreply.github.com> Date: Thu, 19 Dec 2024 01:41:01 +0200 Subject: [PATCH] lds_barriers: Limit to nvidia * Intel has historically had problems with cs barriers, will debug other time --- src/shader_recompiler/ir/passes/ir_passes.h | 6 +++++- .../ir/passes/shared_memory_barrier_pass.cpp | 5 +++-- src/shader_recompiler/profile.h | 1 + src/shader_recompiler/recompiler.cpp | 2 +- src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 1 + 5 files changed, 11 insertions(+), 4 deletions(-) diff --git a/src/shader_recompiler/ir/passes/ir_passes.h b/src/shader_recompiler/ir/passes/ir_passes.h index ab5eb0fd7..8a71d9e1f 100644 --- a/src/shader_recompiler/ir/passes/ir_passes.h +++ b/src/shader_recompiler/ir/passes/ir_passes.h @@ -6,6 +6,10 @@ #include "shader_recompiler/ir/basic_block.h" #include "shader_recompiler/ir/program.h" +namespace Shader { +struct Profile; +} + namespace Shader::Optimization { void SsaRewritePass(IR::BlockList& program); @@ -21,6 +25,6 @@ void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtim void TessellationPreprocess(IR::Program& program, RuntimeInfo& runtime_info); void HullShaderTransform(IR::Program& program, RuntimeInfo& runtime_info); void DomainShaderTransform(IR::Program& program, RuntimeInfo& runtime_info); -void SharedMemoryBarrierPass(IR::Program& program); +void SharedMemoryBarrierPass(IR::Program& program, const Profile& profile); } // namespace Shader::Optimization diff --git a/src/shader_recompiler/ir/passes/shared_memory_barrier_pass.cpp b/src/shader_recompiler/ir/passes/shared_memory_barrier_pass.cpp index fe0847187..cae001e96 100644 --- a/src/shader_recompiler/ir/passes/shared_memory_barrier_pass.cpp +++ b/src/shader_recompiler/ir/passes/shared_memory_barrier_pass.cpp @@ -4,11 +4,12 @@ #include "shader_recompiler/ir/breadth_first_search.h" #include "shader_recompiler/ir/ir_emitter.h" #include "shader_recompiler/ir/program.h" +#include "shader_recompiler/profile.h" namespace Shader::Optimization { -void SharedMemoryBarrierPass(IR::Program& program) { - if (!program.info.uses_shared) { +void SharedMemoryBarrierPass(IR::Program& program, const Profile& profile) { + if (!program.info.uses_shared || !profile.needs_lds_barriers) { return; } using Type = IR::AbstractSyntaxNode::Type; diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index c00e37f9c..fc8c5956e 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -27,6 +27,7 @@ struct Profile { bool has_broken_spirv_clamp{}; bool lower_left_origin_mode{}; bool needs_manual_interpolation{}; + bool needs_lds_barriers{}; u64 min_ssbo_alignment{}; }; diff --git a/src/shader_recompiler/recompiler.cpp b/src/shader_recompiler/recompiler.cpp index 60e0b7df2..bb027a11e 100644 --- a/src/shader_recompiler/recompiler.cpp +++ b/src/shader_recompiler/recompiler.cpp @@ -91,7 +91,7 @@ IR::Program TranslateProgram(std::span code, Pools& pools, Info& info Shader::Optimization::IdentityRemovalPass(program.blocks); Shader::Optimization::DeadCodeEliminationPass(program); Shader::Optimization::CollectShaderInfoPass(program); - Shader::Optimization::SharedMemoryBarrierPass(program); + Shader::Optimization::SharedMemoryBarrierPass(program, profile); return program; } diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 50396287b..4b88bd374 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -204,6 +204,7 @@ PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_, .supports_image_load_store_lod = instance_.IsImageLoadStoreLodSupported(), .needs_manual_interpolation = instance.IsFragmentShaderBarycentricSupported() && instance.GetDriverID() == vk::DriverId::eNvidiaProprietary, + .needs_lds_barriers = instance.GetDriverID() == vk::DriverId::eNvidiaProprietary, }; auto [cache_result, cache] = instance.GetDevice().createPipelineCacheUnique({}); ASSERT_MSG(cache_result == vk::Result::eSuccess, "Failed to create pipeline cache: {}",