From 5aa3a4d4a00c720b1285c05a1b5bf6c85820c690 Mon Sep 17 00:00:00 2001 From: raphaelthegreat <47210458+raphaelthegreat@users.noreply.github.com> Date: Thu, 6 Jun 2024 02:24:30 +0300 Subject: [PATCH] shader: Fix block processing order in dead code elimination pass --- .../frontend/control_flow_graph.h | 1 + .../frontend/structured_control_flow.cpp | 10 ++- .../frontend/translate/scalar_alu.cpp | 9 ++- .../frontend/translate/translate.cpp | 19 +---- .../frontend/translate/translate.h | 4 +- .../ir/passes/dead_code_elimination_pass.cpp | 6 +- src/shader_recompiler/ir/passes/ir_passes.h | 2 +- src/shader_recompiler/ir/value.h | 1 - src/shader_recompiler/recompiler.cpp | 2 +- .../texture_cache/texture_cache.cpp | 81 +++++++++---------- 10 files changed, 60 insertions(+), 75 deletions(-) diff --git a/src/shader_recompiler/frontend/control_flow_graph.h b/src/shader_recompiler/frontend/control_flow_graph.h index b9eb12aa..d343ca7d 100644 --- a/src/shader_recompiler/frontend/control_flow_graph.h +++ b/src/shader_recompiler/frontend/control_flow_graph.h @@ -41,6 +41,7 @@ struct Block : Hook { EndClass end_class{}; Block* branch_true{}; Block* branch_false{}; + bool is_dummy{}; }; class CFG { diff --git a/src/shader_recompiler/frontend/structured_control_flow.cpp b/src/shader_recompiler/frontend/structured_control_flow.cpp index 79be146a..49fe2052 100644 --- a/src/shader_recompiler/frontend/structured_control_flow.cpp +++ b/src/shader_recompiler/frontend/structured_control_flow.cpp @@ -630,9 +630,11 @@ private: break; case StatementType::Code: { ensure_block(); - const u32 start = stmt.block->begin_index; - const u32 size = stmt.block->end_index - start + 1; - Translate(current_block, inst_list.subspan(start, size), info); + if (!stmt.block->is_dummy) { + const u32 start = stmt.block->begin_index; + const u32 size = stmt.block->end_index - start + 1; + Translate(current_block, inst_list.subspan(start, size), info); + } break; } case StatementType::SetVariable: { @@ -808,7 +810,7 @@ private: ObjectPool& inst_pool; ObjectPool& block_pool; IR::AbstractSyntaxList& syntax_list; - const Block dummy_flow_block{}; + const Block dummy_flow_block{.is_dummy = true}; std::span inst_list; Info& info; }; diff --git a/src/shader_recompiler/frontend/translate/scalar_alu.cpp b/src/shader_recompiler/frontend/translate/scalar_alu.cpp index bdb6acc7..8c4c90be 100644 --- a/src/shader_recompiler/frontend/translate/scalar_alu.cpp +++ b/src/shader_recompiler/frontend/translate/scalar_alu.cpp @@ -104,18 +104,21 @@ void Translator::S_MOV_B64(const GcnInst& inst) { if (inst.src[0].field == OperandField::VccLo || inst.dst[0].field == OperandField::VccLo) { return; } - const IR::U1 src0{GetSrc(inst.src[0])}; if (inst.dst[0].field == OperandField::ScalarGPR && inst.src[0].field == OperandField::ExecLo) { // Exec context push exec_contexts[inst.dst[0].code] = true; + ir.SetThreadBitScalarReg(IR::ScalarReg(inst.dst[0].code), ir.GetExec()); } else if (inst.dst[0].field == OperandField::ExecLo && inst.src[0].field == OperandField::ScalarGPR) { // Exec context pop exec_contexts[inst.src[0].code] = false; - } else if (inst.src[0].field != OperandField::ConstZero) { + ir.SetExec(ir.GetThreadBitScalarReg(IR::ScalarReg(inst.src[0].code))); + } else if (inst.dst[0].field == OperandField::ExecLo && + inst.src[0].field == OperandField::ConstZero) { + ir.SetExec(ir.Imm1(false)); + } else { UNREACHABLE(); } - SetDst(inst.dst[0], src0); } void Translator::S_OR_B64(bool negate, const GcnInst& inst) { diff --git a/src/shader_recompiler/frontend/translate/translate.cpp b/src/shader_recompiler/frontend/translate/translate.cpp index 73e3a98b..2abc87a6 100644 --- a/src/shader_recompiler/frontend/translate/translate.cpp +++ b/src/shader_recompiler/frontend/translate/translate.cpp @@ -58,16 +58,13 @@ void Translator::EmitPrologue() { } } -IR::U1U32F32 Translator::GetSrc(const InstOperand& operand, bool force_flt) { +IR::U32F32 Translator::GetSrc(const InstOperand& operand, bool force_flt) { // Input modifiers work on float values. force_flt |= operand.input_modifier.abs | operand.input_modifier.neg; - IR::U1U32F32 value{}; + IR::U32F32 value{}; switch (operand.field) { case OperandField::ScalarGPR: - if (exec_contexts[operand.code]) { - value = ir.GetThreadBitScalarReg(IR::ScalarReg(operand.code)); - } if (operand.type == ScalarType::Float32 || force_flt) { value = ir.GetScalarReg(IR::ScalarReg(operand.code)); } else { @@ -124,9 +121,6 @@ IR::U1U32F32 Translator::GetSrc(const InstOperand& operand, bool force_flt) { case OperandField::ConstFloatNeg_2_0: value = ir.Imm32(-2.0f); break; - case OperandField::ExecLo: - value = ir.GetExec(); - break; case OperandField::VccLo: if (force_flt) { value = ir.BitCast(ir.GetVccLo()); @@ -150,8 +144,8 @@ IR::U1U32F32 Translator::GetSrc(const InstOperand& operand, bool force_flt) { return value; } -void Translator::SetDst(const InstOperand& operand, const IR::U1U32F32& value) { - IR::U1U32F32 result = value; +void Translator::SetDst(const InstOperand& operand, const IR::U32F32& value) { + IR::U32F32 result = value; if (operand.output_modifier.multiplier != 0.f) { result = ir.FPMul(result, ir.Imm32(operand.output_modifier.multiplier)); } @@ -160,14 +154,9 @@ void Translator::SetDst(const InstOperand& operand, const IR::U1U32F32& value) { } switch (operand.field) { case OperandField::ScalarGPR: - if (value.Type() == IR::Type::U1) { - return ir.SetThreadBitScalarReg(IR::ScalarReg(operand.code), result); - } return ir.SetScalarReg(IR::ScalarReg(operand.code), result); case OperandField::VectorGPR: return ir.SetVectorReg(IR::VectorReg(operand.code), result); - case OperandField::ExecLo: - return ir.SetExec(result); case OperandField::VccLo: return ir.SetVccLo(result); case OperandField::VccHi: diff --git a/src/shader_recompiler/frontend/translate/translate.h b/src/shader_recompiler/frontend/translate/translate.h index c5f34cc6..6fd8e3f5 100644 --- a/src/shader_recompiler/frontend/translate/translate.h +++ b/src/shader_recompiler/frontend/translate/translate.h @@ -124,8 +124,8 @@ public: void EXP(const GcnInst& inst); private: - IR::U1U32F32 GetSrc(const InstOperand& operand, bool flt_zero = false); - void SetDst(const InstOperand& operand, const IR::U1U32F32& value); + IR::U32F32 GetSrc(const InstOperand& operand, bool flt_zero = false); + void SetDst(const InstOperand& operand, const IR::U32F32& value); private: IR::IREmitter ir; diff --git a/src/shader_recompiler/ir/passes/dead_code_elimination_pass.cpp b/src/shader_recompiler/ir/passes/dead_code_elimination_pass.cpp index 24c6b548..32479730 100644 --- a/src/shader_recompiler/ir/passes/dead_code_elimination_pass.cpp +++ b/src/shader_recompiler/ir/passes/dead_code_elimination_pass.cpp @@ -5,10 +5,10 @@ namespace Shader::Optimization { -void DeadCodeEliminationPass(IR::BlockList& program) { +void DeadCodeEliminationPass(IR::Program& program) { // We iterate over the instructions in reverse order. // This is because removing an instruction reduces the number of uses for earlier instructions. - for (IR::Block* const block : program) { + for (IR::Block* const block : program.post_order_blocks) { auto it{block->end()}; while (it != block->begin()) { --it; @@ -20,4 +20,4 @@ void DeadCodeEliminationPass(IR::BlockList& program) { } } -} // namespace Shader::Optimization \ No newline at end of file +} // namespace Shader::Optimization diff --git a/src/shader_recompiler/ir/passes/ir_passes.h b/src/shader_recompiler/ir/passes/ir_passes.h index 915bb80e..bf2ba4d6 100644 --- a/src/shader_recompiler/ir/passes/ir_passes.h +++ b/src/shader_recompiler/ir/passes/ir_passes.h @@ -10,7 +10,7 @@ namespace Shader::Optimization { void SsaRewritePass(IR::BlockList& program); void IdentityRemovalPass(IR::BlockList& program); -void DeadCodeEliminationPass(IR::BlockList& program); +void DeadCodeEliminationPass(IR::Program& program); void ConstantPropagationPass(IR::BlockList& program); void ResourceTrackingPass(IR::Program& program); void CollectShaderInfoPass(IR::Program& program); diff --git a/src/shader_recompiler/ir/value.h b/src/shader_recompiler/ir/value.h index 82d40a9b..8c97f495 100644 --- a/src/shader_recompiler/ir/value.h +++ b/src/shader_recompiler/ir/value.h @@ -219,7 +219,6 @@ using U64 = TypedValue; using F16 = TypedValue; using F32 = TypedValue; using F64 = TypedValue; -using U1U32F32 = TypedValue; using U32F32 = TypedValue; using U32U64 = TypedValue; using F32F64 = TypedValue; diff --git a/src/shader_recompiler/recompiler.cpp b/src/shader_recompiler/recompiler.cpp index 189a2ab1..f2834abf 100644 --- a/src/shader_recompiler/recompiler.cpp +++ b/src/shader_recompiler/recompiler.cpp @@ -58,7 +58,7 @@ IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool(image.cpu_addr); - for (u32 l = 0; l < image.info.resources.layers; l++) { + for (u32 m = 0; m < image.info.resources.levels; m++) { + const u32 width = image.info.size.width >> m; + const u32 height = image.info.size.height >> m; + const u32 map_size = width * height * image.info.resources.layers; + // Upload data to the staging buffer. - for (u32 m = 0; m < image.info.resources.levels; m++) { - const u32 width = image.info.size.width >> m; - const u32 height = image.info.size.height >> m; - const u32 map_size = width * height; - const auto [data, offset, _] = staging.Map(map_size, 16); - if (image.info.is_tiled) { - ConvertTileToLinear(data, image_data, width, height, Config::isNeoMode()); - } else { - std::memcpy(data, image_data, map_size); - } - staging.Commit(map_size); - image_data += map_size; - - // Copy to the image. - const vk::BufferImageCopy image_copy = { - .bufferOffset = offset, - .bufferRowLength = 0, - .bufferImageHeight = 0, - .imageSubresource{ - .aspectMask = vk::ImageAspectFlagBits::eColor, - .mipLevel = m, - .baseArrayLayer = l, - .layerCount = 1, - }, - .imageOffset = {0, 0, 0}, - .imageExtent = {width, height, 1}, - }; - - const auto cmdbuf = scheduler.CommandBuffer(); - image.Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits::eTransferWrite); - - cmdbuf.copyBufferToImage(staging.Handle(), image.image, - vk::ImageLayout::eTransferDstOptimal, image_copy); - - image.Transit(vk::ImageLayout::eGeneral, - vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eTransferRead); + const auto [data, offset, _] = staging.Map(map_size, 16); + if (image.info.is_tiled) { + ConvertTileToLinear(data, image_data, width, height, Config::isNeoMode()); + } else { + std::memcpy(data, image_data, map_size); } + staging.Commit(map_size); + image_data += map_size; + + // Copy to the image. + const vk::BufferImageCopy image_copy = { + .bufferOffset = offset, + .bufferRowLength = 0, + .bufferImageHeight = 0, + .imageSubresource{ + .aspectMask = vk::ImageAspectFlagBits::eColor, + .mipLevel = m, + .baseArrayLayer = 0, + .layerCount = u32(image.info.resources.layers), + }, + .imageOffset = {0, 0, 0}, + .imageExtent = {width, height, 1}, + }; + + const auto cmdbuf = scheduler.CommandBuffer(); + image.Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits::eTransferWrite); + + cmdbuf.copyBufferToImage(staging.Handle(), image.image, + vk::ImageLayout::eTransferDstOptimal, image_copy); + + image.Transit(vk::ImageLayout::eGeneral, + vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eTransferRead); } }