From 46dc7cafa1b9873b7b1f842c6558440839ee28e9 Mon Sep 17 00:00:00 2001 From: IndecisiveTurtle <47210458+raphaelthegreat@users.noreply.github.com> Date: Tue, 24 Dec 2024 16:04:30 +0200 Subject: [PATCH] data_share: Emit barrier before reads * Fixes artifacts in TLG when using NVIDIA gpus. When LDS is written and read in the same basic block, the barrier pass wont handle it properly, so insert a barrier before reads --- src/shader_recompiler/frontend/translate/data_share.cpp | 8 ++++++++ src/shader_recompiler/frontend/translate/translate.h | 1 + 2 files changed, 9 insertions(+) diff --git a/src/shader_recompiler/frontend/translate/data_share.cpp b/src/shader_recompiler/frontend/translate/data_share.cpp index 116935b9..4408cae2 100644 --- a/src/shader_recompiler/frontend/translate/data_share.cpp +++ b/src/shader_recompiler/frontend/translate/data_share.cpp @@ -1,7 +1,9 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later + #include "shader_recompiler/frontend/translate/translate.h" #include "shader_recompiler/ir/reg.h" +#include "shader_recompiler/profile.h" #include "shader_recompiler/runtime_info.h" namespace Shader::Gcn { @@ -203,6 +205,7 @@ void Translator::DS_WRITE(int bit_size, bool is_signed, bool is_pair, bool strid addr, ir.Imm32((u32(inst.control.ds.offset1) << 8u) + u32(inst.control.ds.offset0))); ir.WriteShared(bit_size, ir.GetVectorReg(data0), addr0); } + emit_ds_read_barrier = true; } void Translator::DS_SWIZZLE_B32(const GcnInst& inst) { @@ -219,6 +222,11 @@ void Translator::DS_SWIZZLE_B32(const GcnInst& inst) { void Translator::DS_READ(int bit_size, bool is_signed, bool is_pair, bool stride64, const GcnInst& inst) { + if (emit_ds_read_barrier && profile.needs_lds_barriers) { + ir.Barrier(); + emit_ds_read_barrier = false; + } + const IR::U32 addr{ir.GetVectorReg(IR::VectorReg(inst.src[0].code))}; IR::VectorReg dst_reg{inst.dst[0].code}; if (is_pair) { diff --git a/src/shader_recompiler/frontend/translate/translate.h b/src/shader_recompiler/frontend/translate/translate.h index 218b66d7..fd4d8d86 100644 --- a/src/shader_recompiler/frontend/translate/translate.h +++ b/src/shader_recompiler/frontend/translate/translate.h @@ -306,6 +306,7 @@ private: const RuntimeInfo& runtime_info; const Profile& profile; bool opcode_missing = false; + bool emit_ds_read_barrier = false; }; void Translate(IR::Block* block, u32 block_base, std::span inst_list, Info& info,