data_share: Emit barrier before reads

* Fixes artifacts in TLG when using NVIDIA gpus. When LDS is written and read in the same basic block, the barrier pass wont handle it properly, so insert a barrier before reads
This commit is contained in:
IndecisiveTurtle 2024-12-24 16:04:30 +02:00
parent a7220577e9
commit 46dc7cafa1
2 changed files with 9 additions and 0 deletions

View file

@ -1,7 +1,9 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "shader_recompiler/frontend/translate/translate.h"
#include "shader_recompiler/ir/reg.h"
#include "shader_recompiler/profile.h"
#include "shader_recompiler/runtime_info.h"
namespace Shader::Gcn {
@ -203,6 +205,7 @@ void Translator::DS_WRITE(int bit_size, bool is_signed, bool is_pair, bool strid
addr, ir.Imm32((u32(inst.control.ds.offset1) << 8u) + u32(inst.control.ds.offset0)));
ir.WriteShared(bit_size, ir.GetVectorReg(data0), addr0);
}
emit_ds_read_barrier = true;
}
void Translator::DS_SWIZZLE_B32(const GcnInst& inst) {
@ -219,6 +222,11 @@ void Translator::DS_SWIZZLE_B32(const GcnInst& inst) {
void Translator::DS_READ(int bit_size, bool is_signed, bool is_pair, bool stride64,
const GcnInst& inst) {
if (emit_ds_read_barrier && profile.needs_lds_barriers) {
ir.Barrier();
emit_ds_read_barrier = false;
}
const IR::U32 addr{ir.GetVectorReg(IR::VectorReg(inst.src[0].code))};
IR::VectorReg dst_reg{inst.dst[0].code};
if (is_pair) {

View file

@ -306,6 +306,7 @@ private:
const RuntimeInfo& runtime_info;
const Profile& profile;
bool opcode_missing = false;
bool emit_ds_read_barrier = false;
};
void Translate(IR::Block* block, u32 block_base, std::span<const GcnInst> inst_list, Info& info,