diff --git a/CMakeLists.txt b/CMakeLists.txt index b49a6c8d..f9024b99 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -419,7 +419,10 @@ set(SHADER_RECOMPILER src/shader_recompiler/exception.h src/shader_recompiler/profile.h src/shader_recompiler/recompiler.cpp src/shader_recompiler/recompiler.h + src/shader_recompiler/info.h + src/shader_recompiler/params.h src/shader_recompiler/runtime_info.h + src/shader_recompiler/specialization.h src/shader_recompiler/backend/spirv/emit_spirv.cpp src/shader_recompiler/backend/spirv/emit_spirv.h src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp @@ -533,8 +536,6 @@ set(VIDEO_CORE src/video_core/amdgpu/liverpool.cpp src/video_core/renderer_vulkan/vk_resource_pool.h src/video_core/renderer_vulkan/vk_scheduler.cpp src/video_core/renderer_vulkan/vk_scheduler.h - src/video_core/renderer_vulkan/vk_shader_cache.cpp - src/video_core/renderer_vulkan/vk_shader_cache.h src/video_core/renderer_vulkan/vk_shader_util.cpp src/video_core/renderer_vulkan/vk_shader_util.h src/video_core/renderer_vulkan/vk_swapchain.cpp diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 98eac081..c681be97 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -210,7 +210,7 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) { } switch (program.info.stage) { case Stage::Compute: { - const std::array workgroup_size{program.info.workgroup_size}; + const std::array workgroup_size{ctx.runtime_info.cs_info.workgroup_size}; execution_model = spv::ExecutionModel::GLCompute; ctx.AddExecutionMode(main, spv::ExecutionMode::LocalSize, workgroup_size[0], workgroup_size[1], workgroup_size[2]); @@ -258,8 +258,9 @@ void PatchPhiNodes(const IR::Program& program, EmitContext& ctx) { } } // Anonymous namespace -std::vector EmitSPIRV(const Profile& profile, const IR::Program& program, u32& binding) { - EmitContext ctx{profile, program.info, binding}; +std::vector EmitSPIRV(const Profile& profile, const RuntimeInfo& runtime_info, + const IR::Program& program, u32& binding) { + EmitContext ctx{profile, runtime_info, program.info, binding}; const Id main{DefineMain(ctx, program)}; DefineEntryPoint(program, ctx, main); if (program.info.stage == Stage::Vertex) { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index 4c862185..aada0ff6 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -9,7 +9,7 @@ namespace Shader::Backend::SPIRV { -[[nodiscard]] std::vector EmitSPIRV(const Profile& profile, const IR::Program& program, - u32& binding); +[[nodiscard]] std::vector EmitSPIRV(const Profile& profile, const RuntimeInfo& runtime_info, + const IR::Program& program, u32& binding); } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 7bdc98de..39a214fa 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -59,7 +59,7 @@ Id OutputAttrPointer(EmitContext& ctx, IR::Attribute attr, u32 element) { case IR::Attribute::Position2: case IR::Attribute::Position3: { const u32 index = u32(attr) - u32(IR::Attribute::Position1); - return VsOutputAttrPointer(ctx, ctx.info.vs_outputs[index][element]); + return VsOutputAttrPointer(ctx, ctx.runtime_info.vs_info.outputs[index][element]); } case IR::Attribute::RenderTarget0: case IR::Attribute::RenderTarget1: diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index 51315139..b65cbdf4 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -41,9 +41,10 @@ void Name(EmitContext& ctx, Id object, std::string_view format_str, Args&&... ar } // Anonymous namespace -EmitContext::EmitContext(const Profile& profile_, const Shader::Info& info_, u32& binding_) - : Sirit::Module(profile_.supported_spirv), info{info_}, profile{profile_}, stage{info.stage}, - binding{binding_} { +EmitContext::EmitContext(const Profile& profile_, const RuntimeInfo& runtime_info_, + const Info& info_, u32& binding_) + : Sirit::Module(profile_.supported_spirv), info{info_}, runtime_info{runtime_info_}, + profile{profile_}, stage{info.stage}, binding{binding_} { AddCapability(spv::Capability::Shader); DefineArithmeticTypes(); DefineInterfaces(); @@ -168,7 +169,7 @@ EmitContext::SpirvAttribute EmitContext::GetAttributeInfo(AmdGpu::NumberFormat f void EmitContext::DefineBufferOffsets() { for (auto& buffer : buffers) { const u32 binding = buffer.binding; - const u32 half = Shader::PushData::BufOffsetIndex + (binding >> 4); + const u32 half = PushData::BufOffsetIndex + (binding >> 4); const u32 comp = (binding & 0xf) >> 2; const u32 offset = (binding & 0x3) << 3; const Id ptr{OpAccessChain(TypePointer(spv::StorageClass::PushConstant, U32[1]), @@ -179,7 +180,7 @@ void EmitContext::DefineBufferOffsets() { } for (auto& tex_buffer : texture_buffers) { const u32 binding = tex_buffer.binding; - const u32 half = Shader::PushData::BufOffsetIndex + (binding >> 4); + const u32 half = PushData::BufOffsetIndex + (binding >> 4); const u32 comp = (binding & 0xf) >> 2; const u32 offset = (binding & 0x3) << 3; const Id ptr{OpAccessChain(TypePointer(spv::StorageClass::PushConstant, U32[1]), @@ -247,7 +248,7 @@ void EmitContext::DefineInputs() { frag_coord = DefineVariable(F32[4], spv::BuiltIn::FragCoord, spv::StorageClass::Input); frag_depth = DefineVariable(F32[1], spv::BuiltIn::FragDepth, spv::StorageClass::Output); front_facing = DefineVariable(U1[1], spv::BuiltIn::FrontFacing, spv::StorageClass::Input); - for (const auto& input : info.ps_inputs) { + for (const auto& input : runtime_info.fs_info.inputs) { const u32 semantic = input.param_index; if (input.is_default && !input.is_flat) { input_params[semantic] = {MakeDefaultValue(*this, input.default_value), F32[1], @@ -554,7 +555,7 @@ void EmitContext::DefineSharedMemory() { if (!info.uses_shared) { return; } - u32 shared_memory_size = info.shared_memory_size; + u32 shared_memory_size = runtime_info.cs_info.shared_memory_size; if (shared_memory_size == 0) { shared_memory_size = DefaultSharedMemSize; } diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.h b/src/shader_recompiler/backend/spirv/spirv_emit_context.h index d3646382..0908b7f8 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.h +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.h @@ -6,9 +6,9 @@ #include #include +#include "shader_recompiler/info.h" #include "shader_recompiler/ir/program.h" #include "shader_recompiler/profile.h" -#include "shader_recompiler/runtime_info.h" namespace Shader::Backend::SPIRV { @@ -36,7 +36,8 @@ struct VectorIds { class EmitContext final : public Sirit::Module { public: - explicit EmitContext(const Profile& profile, const Shader::Info& info, u32& binding); + explicit EmitContext(const Profile& profile, const RuntimeInfo& runtime_info, const Info& info, + u32& binding); ~EmitContext(); Id Def(const IR::Value& value); @@ -125,6 +126,7 @@ public: } const Info& info; + const RuntimeInfo& runtime_info; const Profile& profile; Stage stage{}; diff --git a/src/shader_recompiler/frontend/structured_control_flow.cpp b/src/shader_recompiler/frontend/structured_control_flow.cpp index fefc623f..bf5ba6bc 100644 --- a/src/shader_recompiler/frontend/structured_control_flow.cpp +++ b/src/shader_recompiler/frontend/structured_control_flow.cpp @@ -602,13 +602,14 @@ public: Common::ObjectPool& block_pool_, Common::ObjectPool& stmt_pool_, Statement& root_stmt, IR::AbstractSyntaxList& syntax_list_, std::span inst_list_, - Info& info_, const Profile& profile_) + Info& info_, const RuntimeInfo& runtime_info_, const Profile& profile_) : stmt_pool{stmt_pool_}, inst_pool{inst_pool_}, block_pool{block_pool_}, - syntax_list{syntax_list_}, inst_list{inst_list_}, info{info_}, profile{profile_} { + syntax_list{syntax_list_}, inst_list{inst_list_}, info{info_}, + runtime_info{runtime_info_}, profile{profile_} { Visit(root_stmt, nullptr, nullptr); IR::Block& first_block{*syntax_list.front().data.block}; - Translator{&first_block, info, profile}.EmitPrologue(); + Translator{&first_block, info, runtime_info, profile}.EmitPrologue(); } private: @@ -637,7 +638,7 @@ private: const u32 start = stmt.block->begin_index; const u32 size = stmt.block->end_index - start + 1; Translate(current_block, stmt.block->begin, inst_list.subspan(start, size), - info, profile); + info, runtime_info, profile); } break; } @@ -817,19 +818,20 @@ private: const Block dummy_flow_block{.is_dummy = true}; std::span inst_list; Info& info; + const RuntimeInfo& runtime_info; const Profile& profile; }; } // Anonymous namespace IR::AbstractSyntaxList BuildASL(Common::ObjectPool& inst_pool, Common::ObjectPool& block_pool, CFG& cfg, Info& info, - const Profile& profile) { + const RuntimeInfo& runtime_info, const Profile& profile) { Common::ObjectPool stmt_pool{64}; GotoPass goto_pass{cfg, stmt_pool}; Statement& root{goto_pass.RootStatement()}; IR::AbstractSyntaxList syntax_list; - TranslatePass{inst_pool, block_pool, stmt_pool, root, - syntax_list, cfg.inst_list, info, profile}; + TranslatePass{inst_pool, block_pool, stmt_pool, root, syntax_list, + cfg.inst_list, info, runtime_info, profile}; ASSERT_MSG(!info.translation_failed, "Shader translation has failed"); return syntax_list; } diff --git a/src/shader_recompiler/frontend/structured_control_flow.h b/src/shader_recompiler/frontend/structured_control_flow.h index f5a54051..2119484e 100644 --- a/src/shader_recompiler/frontend/structured_control_flow.h +++ b/src/shader_recompiler/frontend/structured_control_flow.h @@ -11,12 +11,14 @@ namespace Shader { struct Info; struct Profile; +struct RuntimeInfo; } // namespace Shader namespace Shader::Gcn { [[nodiscard]] IR::AbstractSyntaxList BuildASL(Common::ObjectPool& inst_pool, Common::ObjectPool& block_pool, CFG& cfg, - Info& info, const Profile& profile); + Info& info, const RuntimeInfo& runtime_info, + const Profile& profile); } // namespace Shader::Gcn diff --git a/src/shader_recompiler/frontend/translate/export.cpp b/src/shader_recompiler/frontend/translate/export.cpp index d80de002..d4db09a6 100644 --- a/src/shader_recompiler/frontend/translate/export.cpp +++ b/src/shader_recompiler/frontend/translate/export.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include "shader_recompiler/frontend/translate/translate.h" +#include "shader_recompiler/runtime_info.h" namespace Shader::Gcn { @@ -19,12 +20,28 @@ void Translator::EmitExport(const GcnInst& inst) { IR::VectorReg(inst.src[3].code), }; + const auto swizzle = [&](u32 comp) { + if (!IR::IsMrt(attrib)) { + return comp; + } + const u32 index = u32(attrib) - u32(IR::Attribute::RenderTarget0); + switch (runtime_info.fs_info.mrt_swizzles[index]) { + case MrtSwizzle::Identity: + return comp; + case MrtSwizzle::Alt: + static constexpr std::array AltSwizzle = {2, 1, 0, 3}; + return AltSwizzle[comp]; + default: + UNREACHABLE(); + } + }; + const auto unpack = [&](u32 idx) { const IR::Value value = ir.UnpackHalf2x16(ir.GetVectorReg(vsrc[idx])); const IR::F32 r = IR::F32{ir.CompositeExtract(value, 0)}; const IR::F32 g = IR::F32{ir.CompositeExtract(value, 1)}; - ir.SetAttribute(attrib, r, idx * 2); - ir.SetAttribute(attrib, g, idx * 2 + 1); + ir.SetAttribute(attrib, r, swizzle(idx * 2)); + ir.SetAttribute(attrib, g, swizzle(idx * 2 + 1)); }; // Components are float16 packed into a VGPR @@ -45,7 +62,7 @@ void Translator::EmitExport(const GcnInst& inst) { continue; } const IR::F32 comp = ir.GetVectorReg(vsrc[i]); - ir.SetAttribute(attrib, comp, i); + ir.SetAttribute(attrib, comp, swizzle(i)); } } } diff --git a/src/shader_recompiler/frontend/translate/translate.cpp b/src/shader_recompiler/frontend/translate/translate.cpp index eb86310b..b33746c7 100644 --- a/src/shader_recompiler/frontend/translate/translate.cpp +++ b/src/shader_recompiler/frontend/translate/translate.cpp @@ -7,6 +7,7 @@ #include "shader_recompiler/exception.h" #include "shader_recompiler/frontend/fetch_shader.h" #include "shader_recompiler/frontend/translate/translate.h" +#include "shader_recompiler/info.h" #include "shader_recompiler/runtime_info.h" #include "video_core/amdgpu/resource.h" @@ -16,8 +17,9 @@ namespace Shader::Gcn { -Translator::Translator(IR::Block* block_, Info& info_, const Profile& profile_) - : ir{*block_, block_->begin()}, info{info_}, profile{profile_} {} +Translator::Translator(IR::Block* block_, Info& info_, const RuntimeInfo& runtime_info_, + const Profile& profile_) + : ir{*block_, block_->begin()}, info{info_}, runtime_info{runtime_info_}, profile{profile_} {} void Translator::EmitPrologue() { ir.Prologue(); @@ -25,7 +27,7 @@ void Translator::EmitPrologue() { // Initialize user data. IR::ScalarReg dst_sreg = IR::ScalarReg::S0; - for (u32 i = 0; i < info.num_user_data; i++) { + for (u32 i = 0; i < runtime_info.num_user_data; i++) { ir.SetScalarReg(dst_sreg, ir.GetUserData(dst_sreg)); ++dst_sreg; } @@ -36,15 +38,15 @@ void Translator::EmitPrologue() { // v0: vertex ID, always present ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::VertexId)); // v1: instance ID, step rate 0 - if (info.num_input_vgprs > 0) { + if (runtime_info.num_input_vgprs > 0) { ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::InstanceId0)); } // v2: instance ID, step rate 1 - if (info.num_input_vgprs > 1) { + if (runtime_info.num_input_vgprs > 1) { ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::InstanceId1)); } // v3: instance ID, plain - if (info.num_input_vgprs > 2) { + if (runtime_info.num_input_vgprs > 2) { ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::InstanceId)); } break; @@ -64,13 +66,13 @@ void Translator::EmitPrologue() { ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::LocalInvocationId, 1)); ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::LocalInvocationId, 2)); - if (info.tgid_enable[0]) { + if (runtime_info.cs_info.tgid_enable[0]) { ir.SetScalarReg(dst_sreg++, ir.GetAttributeU32(IR::Attribute::WorkgroupId, 0)); } - if (info.tgid_enable[1]) { + if (runtime_info.cs_info.tgid_enable[1]) { ir.SetScalarReg(dst_sreg++, ir.GetAttributeU32(IR::Attribute::WorkgroupId, 1)); } - if (info.tgid_enable[2]) { + if (runtime_info.cs_info.tgid_enable[2]) { ir.SetScalarReg(dst_sreg++, ir.GetAttributeU32(IR::Attribute::WorkgroupId, 2)); } break; @@ -445,7 +447,6 @@ void Translator::EmitFlowControl(u32 pc, const GcnInst& inst) { } void Translator::LogMissingOpcode(const GcnInst& inst) { - const u32 opcode = u32(inst.opcode); LOG_ERROR(Render_Recompiler, "Unknown opcode {} ({}, category = {})", magic_enum::enum_name(inst.opcode), u32(inst.opcode), magic_enum::enum_name(inst.category)); @@ -453,11 +454,11 @@ void Translator::LogMissingOpcode(const GcnInst& inst) { } void Translate(IR::Block* block, u32 pc, std::span inst_list, Info& info, - const Profile& profile) { + const RuntimeInfo& runtime_info, const Profile& profile) { if (inst_list.empty()) { return; } - Translator translator{block, info, profile}; + Translator translator{block, info, runtime_info, profile}; for (const auto& inst : inst_list) { pc += inst.length; diff --git a/src/shader_recompiler/frontend/translate/translate.h b/src/shader_recompiler/frontend/translate/translate.h index f1619e81..0c1f3a58 100644 --- a/src/shader_recompiler/frontend/translate/translate.h +++ b/src/shader_recompiler/frontend/translate/translate.h @@ -5,9 +5,9 @@ #include #include "shader_recompiler/frontend/instruction.h" +#include "shader_recompiler/info.h" #include "shader_recompiler/ir/basic_block.h" #include "shader_recompiler/ir/ir_emitter.h" -#include "shader_recompiler/runtime_info.h" namespace Shader { struct Info; @@ -55,7 +55,8 @@ enum class NegateMode : u32 { class Translator { public: - explicit Translator(IR::Block* block_, Info& info, const Profile& profile); + explicit Translator(IR::Block* block_, Info& info, const RuntimeInfo& runtime_info, + const Profile& profile); // Instruction categories void EmitPrologue(); @@ -237,12 +238,13 @@ private: private: IR::IREmitter ir; Info& info; + const RuntimeInfo& runtime_info; const Profile& profile; IR::U32 m0_value; bool opcode_missing = false; }; void Translate(IR::Block* block, u32 block_base, std::span inst_list, Info& info, - const Profile& profile); + const RuntimeInfo& runtime_info, const Profile& profile); } // namespace Shader::Gcn diff --git a/src/shader_recompiler/frontend/translate/vector_alu.cpp b/src/shader_recompiler/frontend/translate/vector_alu.cpp index 7fef9137..5d306b29 100644 --- a/src/shader_recompiler/frontend/translate/vector_alu.cpp +++ b/src/shader_recompiler/frontend/translate/vector_alu.cpp @@ -479,10 +479,11 @@ void Translator::V_ADD_F32(const GcnInst& inst) { void Translator::V_CVT_OFF_F32_I4(const GcnInst& inst) { const IR::U32 src0{GetSrc(inst.src[0])}; const IR::VectorReg dst_reg{inst.dst[0].code}; - ir.SetVectorReg( - dst_reg, - ir.FPMul(ir.ConvertUToF(32, 32, ir.ISub(ir.BitwiseAnd(src0, ir.Imm32(0xF)), ir.Imm32(8))), - ir.Imm32(1.f / 16.f))); + ASSERT(src0.IsImmediate()); + static constexpr std::array IntToFloat = { + 0.0f, 0.0625f, 0.1250f, 0.1875f, 0.2500f, 0.3125f, 0.3750f, 0.4375f, + -0.5000f, -0.4375f, -0.3750f, -0.3125f, -0.2500f, -0.1875f, -0.1250f, -0.0625f}; + ir.SetVectorReg(dst_reg, ir.Imm32(IntToFloat[src0.U32()])); } void Translator::V_MED3_F32(const GcnInst& inst) { diff --git a/src/shader_recompiler/frontend/translate/vector_interpolation.cpp b/src/shader_recompiler/frontend/translate/vector_interpolation.cpp index 4ff846cf..c12ae8f5 100644 --- a/src/shader_recompiler/frontend/translate/vector_interpolation.cpp +++ b/src/shader_recompiler/frontend/translate/vector_interpolation.cpp @@ -7,14 +7,14 @@ namespace Shader::Gcn { void Translator::V_INTERP_P2_F32(const GcnInst& inst) { const IR::VectorReg dst_reg{inst.dst[0].code}; - auto& attr = info.ps_inputs.at(inst.control.vintrp.attr); + auto& attr = runtime_info.fs_info.inputs.at(inst.control.vintrp.attr); const IR::Attribute attrib{IR::Attribute::Param0 + attr.param_index}; ir.SetVectorReg(dst_reg, ir.GetAttribute(attrib, inst.control.vintrp.chan)); } void Translator::V_INTERP_MOV_F32(const GcnInst& inst) { const IR::VectorReg dst_reg{inst.dst[0].code}; - auto& attr = info.ps_inputs.at(inst.control.vintrp.attr); + auto& attr = runtime_info.fs_info.inputs.at(inst.control.vintrp.attr); const IR::Attribute attrib{IR::Attribute::Param0 + attr.param_index}; ir.SetVectorReg(dst_reg, ir.GetAttribute(attrib, inst.control.vintrp.chan)); } diff --git a/src/shader_recompiler/info.h b/src/shader_recompiler/info.h new file mode 100644 index 00000000..cdc17304 --- /dev/null +++ b/src/shader_recompiler/info.h @@ -0,0 +1,232 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include +#include +#include +#include "common/assert.h" +#include "common/types.h" +#include "shader_recompiler/ir/attribute.h" +#include "shader_recompiler/ir/reg.h" +#include "shader_recompiler/ir/type.h" +#include "shader_recompiler/params.h" +#include "shader_recompiler/runtime_info.h" +#include "video_core/amdgpu/resource.h" + +namespace Shader { + +static constexpr size_t NumUserDataRegs = 16; + +enum class TextureType : u32 { + Color1D, + ColorArray1D, + Color2D, + ColorArray2D, + Color3D, + ColorCube, + Buffer, +}; +constexpr u32 NUM_TEXTURE_TYPES = 7; + +struct Info; + +struct BufferResource { + u32 sgpr_base; + u32 dword_offset; + IR::Type used_types; + AmdGpu::Buffer inline_cbuf; + bool is_instance_data{}; + bool is_written{}; + + bool IsStorage(AmdGpu::Buffer buffer) const noexcept { + static constexpr size_t MaxUboSize = 65536; + return buffer.GetSize() > MaxUboSize || is_written; + } + + constexpr AmdGpu::Buffer GetSharp(const Info& info) const noexcept; +}; +using BufferResourceList = boost::container::small_vector; + +struct TextureBufferResource { + u32 sgpr_base; + u32 dword_offset; + AmdGpu::NumberFormat nfmt; + bool is_written{}; + + constexpr AmdGpu::Buffer GetSharp(const Info& info) const noexcept; +}; +using TextureBufferResourceList = boost::container::small_vector; + +struct ImageResource { + u32 sgpr_base; + u32 dword_offset; + AmdGpu::ImageType type; + AmdGpu::NumberFormat nfmt; + bool is_storage; + bool is_depth; + bool is_atomic{}; + + constexpr AmdGpu::Image GetSharp(const Info& info) const noexcept; +}; +using ImageResourceList = boost::container::small_vector; + +struct SamplerResource { + u32 sgpr_base; + u32 dword_offset; + AmdGpu::Sampler inline_sampler{}; + u32 associated_image : 4; + u32 disable_aniso : 1; + + constexpr AmdGpu::Sampler GetSharp(const Info& info) const noexcept; +}; +using SamplerResourceList = boost::container::small_vector; + +struct PushData { + static constexpr size_t BufOffsetIndex = 2; + + u32 step0; + u32 step1; + std::array buf_offsets; + + void AddOffset(u32 binding, u32 offset) { + ASSERT(offset < 256 && binding < buf_offsets.size()); + buf_offsets[binding] = offset; + } +}; + +/** + * Contains general information generated by the shader recompiler for an input program. + */ +struct Info { + struct VsInput { + enum InstanceIdType : u8 { + None = 0, + OverStepRate0 = 1, + OverStepRate1 = 2, + Plain = 3, + }; + + AmdGpu::NumberFormat fmt; + u16 binding; + u16 num_components; + u8 sgpr_base; + u8 dword_offset; + InstanceIdType instance_step_rate; + s32 instance_data_buf; + }; + boost::container::static_vector vs_inputs{}; + + struct AttributeFlags { + bool Get(IR::Attribute attrib, u32 comp = 0) const { + return flags[Index(attrib)] & (1 << comp); + } + + bool GetAny(IR::Attribute attrib) const { + return flags[Index(attrib)]; + } + + void Set(IR::Attribute attrib, u32 comp = 0) { + flags[Index(attrib)] |= (1 << comp); + } + + u32 NumComponents(IR::Attribute attrib) const { + return 4; + } + + static size_t Index(IR::Attribute attrib) { + return static_cast(attrib); + } + + std::array flags; + }; + AttributeFlags loads{}; + AttributeFlags stores{}; + + s8 vertex_offset_sgpr = -1; + s8 instance_offset_sgpr = -1; + + BufferResourceList buffers; + TextureBufferResourceList texture_buffers; + ImageResourceList images; + SamplerResourceList samplers; + + std::span user_data; + Stage stage; + + u64 pgm_hash{}; + VAddr pgm_base; + bool has_storage_images{}; + bool has_image_buffers{}; + bool has_texel_buffers{}; + bool has_discard{}; + bool has_image_gather{}; + bool has_image_query{}; + bool uses_lane_id{}; + bool uses_group_quad{}; + bool uses_shared{}; + bool uses_fp16{}; + bool uses_step_rates{}; + bool translation_failed{}; // indicates that shader has unsupported instructions + + explicit Info(Stage stage_, ShaderParams params) + : stage{stage_}, pgm_hash{params.hash}, pgm_base{params.Base()}, + user_data{params.user_data} {} + + template + T ReadUd(u32 ptr_index, u32 dword_offset) const noexcept { + T data; + const u32* base = user_data.data(); + if (ptr_index != IR::NumScalarRegs) { + std::memcpy(&base, &user_data[ptr_index], sizeof(base)); + } + std::memcpy(&data, base + dword_offset, sizeof(T)); + return data; + } + + size_t NumBindings() const noexcept { + return buffers.size() + texture_buffers.size() + images.size() + samplers.size(); + } + + [[nodiscard]] std::pair GetDrawOffsets() const noexcept { + u32 vertex_offset = 0; + u32 instance_offset = 0; + if (vertex_offset_sgpr != -1) { + vertex_offset = user_data[vertex_offset_sgpr]; + } + if (instance_offset_sgpr != -1) { + instance_offset = user_data[instance_offset_sgpr]; + } + return {vertex_offset, instance_offset}; + } +}; + +constexpr AmdGpu::Buffer BufferResource::GetSharp(const Info& info) const noexcept { + return inline_cbuf ? inline_cbuf : info.ReadUd(sgpr_base, dword_offset); +} + +constexpr AmdGpu::Buffer TextureBufferResource::GetSharp(const Info& info) const noexcept { + return info.ReadUd(sgpr_base, dword_offset); +} + +constexpr AmdGpu::Image ImageResource::GetSharp(const Info& info) const noexcept { + return info.ReadUd(sgpr_base, dword_offset); +} + +constexpr AmdGpu::Sampler SamplerResource::GetSharp(const Info& info) const noexcept { + return inline_sampler ? inline_sampler : info.ReadUd(sgpr_base, dword_offset); +} + +} // namespace Shader + +template <> +struct fmt::formatter { + constexpr auto parse(format_parse_context& ctx) { + return ctx.begin(); + } + auto format(const Shader::Stage stage, format_context& ctx) const { + constexpr static std::array names = {"fs", "vs", "gs", "es", "hs", "ls", "cs"}; + return fmt::format_to(ctx.out(), "{}", names[static_cast(stage)]); + } +}; diff --git a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp index f446ac47..025bb98c 100644 --- a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp +++ b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp @@ -4,11 +4,11 @@ #include #include #include "common/alignment.h" +#include "shader_recompiler/info.h" #include "shader_recompiler/ir/basic_block.h" #include "shader_recompiler/ir/breadth_first_search.h" #include "shader_recompiler/ir/ir_emitter.h" #include "shader_recompiler/ir/program.h" -#include "shader_recompiler/runtime_info.h" #include "video_core/amdgpu/resource.h" namespace Shader::Optimization { @@ -471,14 +471,11 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip // Read image sharp. const auto tsharp = TrackSharp(tsharp_handle); - const auto image = info.ReadUd(tsharp.sgpr_base, tsharp.dword_offset); const auto inst_info = inst.Flags(); + auto image = info.ReadUd(tsharp.sgpr_base, tsharp.dword_offset); if (!image.Valid()) { LOG_ERROR(Render_Vulkan, "Shader compiled with unbound image!"); - IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; - inst.ReplaceUsesWith( - ir.CompositeConstruct(ir.Imm32(0.f), ir.Imm32(0.f), ir.Imm32(0.f), ir.Imm32(0.f))); - return; + image = AmdGpu::Image::Null(); } ASSERT(image.GetType() != AmdGpu::ImageType::Invalid); const bool is_storage = IsImageStorageInstruction(inst); diff --git a/src/shader_recompiler/ir/program.h b/src/shader_recompiler/ir/program.h index f7abba64..84a1a2d4 100644 --- a/src/shader_recompiler/ir/program.h +++ b/src/shader_recompiler/ir/program.h @@ -5,9 +5,9 @@ #include #include "shader_recompiler/frontend/instruction.h" +#include "shader_recompiler/info.h" #include "shader_recompiler/ir/abstract_syntax_list.h" #include "shader_recompiler/ir/basic_block.h" -#include "shader_recompiler/runtime_info.h" namespace Shader::IR { diff --git a/src/shader_recompiler/params.h b/src/shader_recompiler/params.h new file mode 100644 index 00000000..0dce9a0f --- /dev/null +++ b/src/shader_recompiler/params.h @@ -0,0 +1,26 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include +#include "common/types.h" + +namespace Shader { + +/** + * Compilation parameters used to identify and locate a guest shader program. + */ +struct ShaderParams { + static constexpr u32 NumShaderUserData = 16; + + std::span user_data; + std::span code; + u64 hash; + + VAddr Base() const noexcept { + return reinterpret_cast(code.data()); + } +}; + +} // namespace Shader diff --git a/src/shader_recompiler/recompiler.cpp b/src/shader_recompiler/recompiler.cpp index dfcf9ed1..12dbc6c1 100644 --- a/src/shader_recompiler/recompiler.cpp +++ b/src/shader_recompiler/recompiler.cpp @@ -6,6 +6,7 @@ #include "shader_recompiler/frontend/structured_control_flow.h" #include "shader_recompiler/ir/passes/ir_passes.h" #include "shader_recompiler/ir/post_order.h" +#include "shader_recompiler/recompiler.h" namespace Shader { @@ -27,29 +28,32 @@ IR::BlockList GenerateBlocks(const IR::AbstractSyntaxList& syntax_list) { return blocks; } -IR::Program TranslateProgram(Common::ObjectPool& inst_pool, - Common::ObjectPool& block_pool, std::span token, - Info& info, const Profile& profile) { +IR::Program TranslateProgram(std::span code, Pools& pools, Info& info, + const RuntimeInfo& runtime_info, const Profile& profile) { // Ensure first instruction is expected. constexpr u32 token_mov_vcchi = 0xBEEB03FF; - ASSERT_MSG(token[0] == token_mov_vcchi, "First instruction is not s_mov_b32 vcc_hi, #imm"); + ASSERT_MSG(code[0] == token_mov_vcchi, "First instruction is not s_mov_b32 vcc_hi, #imm"); - Gcn::GcnCodeSlice slice(token.data(), token.data() + token.size()); + Gcn::GcnCodeSlice slice(code.data(), code.data() + code.size()); Gcn::GcnDecodeContext decoder; // Decode and save instructions IR::Program program{info}; - program.ins_list.reserve(token.size()); + program.ins_list.reserve(code.size()); while (!slice.atEnd()) { program.ins_list.emplace_back(decoder.decodeInstruction(slice)); } + // Clear any previous pooled data. + pools.ReleaseContents(); + // Create control flow graph Common::ObjectPool gcn_block_pool{64}; Gcn::CFG cfg{gcn_block_pool, program.ins_list}; // Structurize control flow graph and create program. - program.syntax_list = Shader::Gcn::BuildASL(inst_pool, block_pool, cfg, program.info, profile); + program.syntax_list = Shader::Gcn::BuildASL(pools.inst_pool, pools.block_pool, cfg, + program.info, runtime_info, profile); program.blocks = GenerateBlocks(program.syntax_list); program.post_order_blocks = Shader::IR::PostOrder(program.syntax_list.front()); @@ -63,7 +67,6 @@ IR::Program TranslateProgram(Common::ObjectPool& inst_pool, Shader::Optimization::IdentityRemovalPass(program.blocks); Shader::Optimization::DeadCodeEliminationPass(program); Shader::Optimization::CollectShaderInfoPass(program); - LOG_DEBUG(Render_Vulkan, "{}", Shader::IR::DumpProgram(program)); return program; } diff --git a/src/shader_recompiler/recompiler.h b/src/shader_recompiler/recompiler.h index 3a229518..f8acf6c9 100644 --- a/src/shader_recompiler/recompiler.h +++ b/src/shader_recompiler/recompiler.h @@ -10,10 +10,24 @@ namespace Shader { struct Profile; +struct RuntimeInfo; -[[nodiscard]] IR::Program TranslateProgram(Common::ObjectPool& inst_pool, - Common::ObjectPool& block_pool, - std::span code, Info& info, - const Profile& profile); +struct Pools { + static constexpr u32 InstPoolSize = 8192; + static constexpr u32 BlockPoolSize = 32; + + Common::ObjectPool inst_pool; + Common::ObjectPool block_pool; + + explicit Pools() : inst_pool{InstPoolSize}, block_pool{BlockPoolSize} {} + + void ReleaseContents() { + inst_pool.ReleaseContents(); + block_pool.ReleaseContents(); + } +}; + +[[nodiscard]] IR::Program TranslateProgram(std::span code, Pools& pools, Info& info, + const RuntimeInfo& runtime_info, const Profile& profile); } // namespace Shader diff --git a/src/shader_recompiler/runtime_info.h b/src/shader_recompiler/runtime_info.h index 77c57e94..37fd64bb 100644 --- a/src/shader_recompiler/runtime_info.h +++ b/src/shader_recompiler/runtime_info.h @@ -3,20 +3,14 @@ #pragma once -#include -#include +#include #include + #include "common/assert.h" #include "common/types.h" -#include "shader_recompiler/ir/attribute.h" -#include "shader_recompiler/ir/reg.h" -#include "shader_recompiler/ir/type.h" -#include "video_core/amdgpu/resource.h" namespace Shader { -static constexpr size_t NumUserDataRegs = 16; - enum class Stage : u32 { Fragment, Vertex, @@ -29,21 +23,18 @@ enum class Stage : u32 { constexpr u32 MaxStageTypes = 6; [[nodiscard]] constexpr Stage StageFromIndex(size_t index) noexcept { - return static_cast(static_cast(Stage::Vertex) + index); + return static_cast(index); } -enum class TextureType : u32 { - Color1D, - ColorArray1D, - Color2D, - ColorArray2D, - Color3D, - ColorCube, - Buffer, +enum class MrtSwizzle : u8 { + Identity = 0, + Alt = 1, + Reverse = 2, + ReverseAlt = 3, }; -constexpr u32 NUM_TEXTURE_TYPES = 7; +static constexpr u32 MaxColorBuffers = 8; -enum class VsOutput : u32 { +enum class VsOutput : u8 { None, PointSprite, EdgeFlag, @@ -70,211 +61,69 @@ enum class VsOutput : u32 { }; using VsOutputMap = std::array; -struct Info; +struct VertexRuntimeInfo { + boost::container::static_vector outputs; -struct BufferResource { - u32 sgpr_base; - u32 dword_offset; - IR::Type used_types; - AmdGpu::Buffer inline_cbuf; - bool is_instance_data{}; - bool is_written{}; - - bool IsStorage(AmdGpu::Buffer buffer) const noexcept { - static constexpr size_t MaxUboSize = 65536; - return buffer.GetSize() > MaxUboSize || is_written; - } - - constexpr AmdGpu::Buffer GetSharp(const Info& info) const noexcept; -}; -using BufferResourceList = boost::container::small_vector; - -struct TextureBufferResource { - u32 sgpr_base; - u32 dword_offset; - AmdGpu::NumberFormat nfmt; - bool is_written{}; - - constexpr AmdGpu::Buffer GetSharp(const Info& info) const noexcept; -}; -using TextureBufferResourceList = boost::container::small_vector; - -struct ImageResource { - u32 sgpr_base; - u32 dword_offset; - AmdGpu::ImageType type; - AmdGpu::NumberFormat nfmt; - bool is_storage; - bool is_depth; - bool is_atomic{}; - - constexpr AmdGpu::Image GetSharp(const Info& info) const noexcept; -}; -using ImageResourceList = boost::container::small_vector; - -struct SamplerResource { - u32 sgpr_base; - u32 dword_offset; - AmdGpu::Sampler inline_sampler{}; - u32 associated_image : 4; - u32 disable_aniso : 1; - - constexpr AmdGpu::Sampler GetSharp(const Info& info) const noexcept; -}; -using SamplerResourceList = boost::container::small_vector; - -struct PushData { - static constexpr size_t BufOffsetIndex = 2; - - u32 step0; - u32 step1; - std::array buf_offsets; - - void AddOffset(u32 binding, u32 offset) { - ASSERT(offset < 256 && binding < buf_offsets.size()); - buf_offsets[binding] = offset; + bool operator==(const VertexRuntimeInfo& other) const noexcept { + return true; } }; -struct Info { - struct VsInput { - enum InstanceIdType : u8 { - None = 0, - OverStepRate0 = 1, - OverStepRate1 = 2, - Plain = 3, - }; - - AmdGpu::NumberFormat fmt; - u16 binding; - u16 num_components; - u8 sgpr_base; - u8 dword_offset; - InstanceIdType instance_step_rate; - s32 instance_data_buf; - }; - boost::container::static_vector vs_inputs{}; - +struct FragmentRuntimeInfo { struct PsInput { - u32 param_index; + u8 param_index; bool is_default; bool is_flat; - u32 default_value; + u8 default_value; + + auto operator<=>(const PsInput&) const noexcept = default; }; - boost::container::static_vector ps_inputs{}; + boost::container::static_vector inputs; + std::array mrt_swizzles; - struct AttributeFlags { - bool Get(IR::Attribute attrib, u32 comp = 0) const { - return flags[Index(attrib)] & (1 << comp); - } + bool operator==(const FragmentRuntimeInfo& other) const noexcept { + return std::ranges::equal(mrt_swizzles, other.mrt_swizzles) && + std::ranges::equal(inputs, other.inputs); + } +}; - bool GetAny(IR::Attribute attrib) const { - return flags[Index(attrib)]; - } - - void Set(IR::Attribute attrib, u32 comp = 0) { - flags[Index(attrib)] |= (1 << comp); - } - - u32 NumComponents(IR::Attribute attrib) const { - return 4; - } - - static size_t Index(IR::Attribute attrib) { - return static_cast(attrib); - } - - std::array flags; - }; - AttributeFlags loads{}; - AttributeFlags stores{}; - boost::container::static_vector vs_outputs; - - s8 vertex_offset_sgpr = -1; - s8 instance_offset_sgpr = -1; - - BufferResourceList buffers; - TextureBufferResourceList texture_buffers; - ImageResourceList images; - SamplerResourceList samplers; - - std::array workgroup_size{}; +struct ComputeRuntimeInfo { + u32 shared_memory_size; + std::array workgroup_size; std::array tgid_enable; + bool operator==(const ComputeRuntimeInfo& other) const noexcept { + return workgroup_size == other.workgroup_size && tgid_enable == other.tgid_enable; + } +}; + +/** + * Stores information relevant to shader compilation sourced from liverpool registers. + * It may potentially differ with the same shader module so must be checked. + * It's also possible to store any other custom information that needs to be part of shader key. + */ +struct RuntimeInfo { + Stage stage; u32 num_user_data; u32 num_input_vgprs; - std::span user_data; - Stage stage; + VertexRuntimeInfo vs_info; + FragmentRuntimeInfo fs_info; + ComputeRuntimeInfo cs_info; - uintptr_t pgm_base{}; - u64 pgm_hash{}; - u32 shared_memory_size{}; - bool has_storage_images{}; - bool has_image_buffers{}; - bool has_texel_buffers{}; - bool has_discard{}; - bool has_image_gather{}; - bool has_image_query{}; - bool uses_lane_id{}; - bool uses_group_quad{}; - bool uses_shared{}; - bool uses_fp16{}; - bool uses_step_rates{}; - bool translation_failed{}; // indicates that shader has unsupported instructions + RuntimeInfo(Stage stage_) : stage{stage_} {} - template - T ReadUd(u32 ptr_index, u32 dword_offset) const noexcept { - T data; - const u32* base = user_data.data(); - if (ptr_index != IR::NumScalarRegs) { - std::memcpy(&base, &user_data[ptr_index], sizeof(base)); + bool operator==(const RuntimeInfo& other) const noexcept { + switch (stage) { + case Stage::Fragment: + return fs_info == other.fs_info; + case Stage::Vertex: + return vs_info == other.vs_info; + case Stage::Compute: + return cs_info == other.cs_info; + default: + return true; } - std::memcpy(&data, base + dword_offset, sizeof(T)); - return data; - } - - size_t NumBindings() const noexcept { - return buffers.size() + texture_buffers.size() + images.size() + samplers.size(); - } - - [[nodiscard]] std::pair GetDrawOffsets() const noexcept { - u32 vertex_offset = 0; - u32 instance_offset = 0; - if (vertex_offset_sgpr != -1) { - vertex_offset = user_data[vertex_offset_sgpr]; - } - if (instance_offset_sgpr != -1) { - instance_offset = user_data[instance_offset_sgpr]; - } - return {vertex_offset, instance_offset}; } }; -constexpr AmdGpu::Buffer BufferResource::GetSharp(const Info& info) const noexcept { - return inline_cbuf ? inline_cbuf : info.ReadUd(sgpr_base, dword_offset); -} - -constexpr AmdGpu::Buffer TextureBufferResource::GetSharp(const Info& info) const noexcept { - return info.ReadUd(sgpr_base, dword_offset); -} - -constexpr AmdGpu::Image ImageResource::GetSharp(const Info& info) const noexcept { - return info.ReadUd(sgpr_base, dword_offset); -} - -constexpr AmdGpu::Sampler SamplerResource::GetSharp(const Info& info) const noexcept { - return inline_sampler ? inline_sampler : info.ReadUd(sgpr_base, dword_offset); -} - } // namespace Shader - -template <> -struct fmt::formatter { - constexpr auto parse(format_parse_context& ctx) { - return ctx.begin(); - } - auto format(const Shader::Stage stage, format_context& ctx) const { - constexpr static std::array names = {"fs", "vs", "gs", "es", "hs", "ls", "cs"}; - return fmt::format_to(ctx.out(), "{}", names[static_cast(stage)]); - } -}; diff --git a/src/video_core/renderer_vulkan/vk_shader_cache.h b/src/shader_recompiler/specialization.h similarity index 56% rename from src/video_core/renderer_vulkan/vk_shader_cache.h rename to src/shader_recompiler/specialization.h index 191e1b08..3dd75dbd 100644 --- a/src/video_core/renderer_vulkan/vk_shader_cache.h +++ b/src/shader_recompiler/specialization.h @@ -4,18 +4,11 @@ #pragma once #include -#include -#include -#include "common/object_pool.h" -#include "shader_recompiler/ir/basic_block.h" -#include "shader_recompiler/profile.h" -#include "shader_recompiler/runtime_info.h" -#include "video_core/amdgpu/liverpool.h" -#include "video_core/renderer_vulkan/vk_common.h" -namespace Vulkan { +#include "common/types.h" +#include "shader_recompiler/info.h" -class Instance; +namespace Shader { struct BufferSpecialization { u16 stride : 14; @@ -25,43 +18,38 @@ struct BufferSpecialization { }; struct TextureBufferSpecialization { - bool is_integer; + bool is_integer = false; auto operator<=>(const TextureBufferSpecialization&) const = default; }; struct ImageSpecialization { - AmdGpu::ImageType type; - bool is_integer; + AmdGpu::ImageType type = AmdGpu::ImageType::Color2D; + bool is_integer = false; auto operator<=>(const ImageSpecialization&) const = default; }; +/** + * Alongside runtime information, this structure also checks bound resources + * for compatibility. Can be used as a key for storing shader permutations. + * Is separate from runtime information, because resource layout can only be deduced + * after the first compilation of a module. + */ struct StageSpecialization { static constexpr size_t MaxStageResources = 32; const Shader::Info* info; + RuntimeInfo runtime_info; std::bitset bitset{}; boost::container::small_vector buffers; boost::container::small_vector tex_buffers; boost::container::small_vector images; u32 start_binding{}; - void ForEachSharp(u32& binding, auto& spec_list, auto& desc_list, auto&& func) { - for (const auto& desc : desc_list) { - auto& spec = spec_list.emplace_back(); - const auto sharp = desc.GetSharp(*info); - if (!sharp) { - binding++; - continue; - } - bitset.set(binding++); - func(spec, desc, sharp); - } - } - - StageSpecialization(const Shader::Info& info_, u32 start_binding_) - : info{&info_}, start_binding{start_binding_} { + explicit StageSpecialization(const Shader::Info& info_, RuntimeInfo runtime_info_, + u32 start_binding_) + : info{&info_}, runtime_info{runtime_info_}, start_binding{start_binding_} { u32 binding{}; ForEachSharp(binding, buffers, info->buffers, [](auto& spec, const auto& desc, AmdGpu::Buffer sharp) { @@ -79,10 +67,26 @@ struct StageSpecialization { }); } + void ForEachSharp(u32& binding, auto& spec_list, auto& desc_list, auto&& func) { + for (const auto& desc : desc_list) { + auto& spec = spec_list.emplace_back(); + const auto sharp = desc.GetSharp(*info); + if (!sharp) { + binding++; + continue; + } + bitset.set(binding++); + func(spec, desc, sharp); + } + } + bool operator==(const StageSpecialization& other) const { if (start_binding != other.start_binding) { return false; } + if (runtime_info != other.runtime_info) { + return false; + } u32 binding{}; for (u32 i = 0; i < buffers.size(); i++) { if (other.bitset[binding++] && buffers[i] != other.buffers[i]) { @@ -103,54 +107,4 @@ struct StageSpecialization { } }; -struct Program { - struct Module { - vk::ShaderModule module; - StageSpecialization spec; - }; - - Shader::Info info; - boost::container::small_vector modules; - - explicit Program(const Shader::Info& info_) : info{info_} {} -}; - -struct GuestProgram { - Shader::Stage stage; - std::span user_data; - std::span code; - u64 hash; - - explicit GuestProgram(const auto* pgm, Shader::Stage stage_) - : stage{stage_}, user_data{pgm->user_data}, code{pgm->Code()} { - const auto* bininfo = AmdGpu::Liverpool::GetBinaryInfo(*pgm); - hash = bininfo->shader_hash; - } -}; - -class ShaderCache { -public: - explicit ShaderCache(const Instance& instance, AmdGpu::Liverpool* liverpool); - ~ShaderCache() = default; - - std::tuple GetProgram(const GuestProgram& pgm, - u32& binding); - -private: - void DumpShader(std::span code, u64 hash, Shader::Stage stage, size_t perm_idx, - std::string_view ext); - vk::ShaderModule CompileModule(Shader::Info& info, std::span code, size_t perm_idx, - u32& binding); - Program* CreateProgram(const GuestProgram& pgm, u32& binding); - -private: - const Instance& instance; - AmdGpu::Liverpool* liverpool; - Shader::Profile profile{}; - tsl::robin_map program_cache; - Common::ObjectPool inst_pool; - Common::ObjectPool block_pool; - Common::ObjectPool program_pool; -}; - -} // namespace Vulkan +} // namespace Shader diff --git a/src/video_core/amdgpu/liverpool.h b/src/video_core/amdgpu/liverpool.h index 7f262e1f..37720168 100644 --- a/src/video_core/amdgpu/liverpool.h +++ b/src/video_core/amdgpu/liverpool.h @@ -18,6 +18,7 @@ #include "common/polyfill_thread.h" #include "common/types.h" #include "common/unique_function.h" +#include "shader_recompiler/params.h" #include "video_core/amdgpu/pixel_format.h" #include "video_core/amdgpu/resource.h" @@ -171,6 +172,15 @@ struct Liverpool { return bininfo; } + static constexpr Shader::ShaderParams GetParams(const auto& sh) { + auto* bininfo = GetBinaryInfo(sh); + return { + .user_data = sh.user_data, + .code = sh.Code(), + .hash = bininfo->shader_hash, + }; + } + union PsInputControl { u32 raw; BitField<0, 5, u32> input_offset; diff --git a/src/video_core/amdgpu/resource.h b/src/video_core/amdgpu/resource.h index b85a3788..1721c1ae 100644 --- a/src/video_core/amdgpu/resource.h +++ b/src/video_core/amdgpu/resource.h @@ -176,6 +176,18 @@ struct Image { u64 lod_hw_cnt_en : 1; u64 : 43; + static constexpr Image Null() { + Image image{}; + image.data_format = u64(DataFormat::Format8_8_8_8); + image.dst_sel_x = 4; + image.dst_sel_y = 5; + image.dst_sel_z = 6; + image.dst_sel_w = 7; + image.tiling_index = u64(TilingMode::Texture_MicroTiled); + image.type = u64(ImageType::Color2D); + return image; + } + bool Valid() const { return (type & 0x8u) != 0; } diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp index 71228786..93e05085 100644 --- a/src/video_core/buffer_cache/buffer_cache.cpp +++ b/src/video_core/buffer_cache/buffer_cache.cpp @@ -4,7 +4,7 @@ #include #include "common/alignment.h" #include "common/scope_exit.h" -#include "shader_recompiler/runtime_info.h" +#include "shader_recompiler/info.h" #include "video_core/amdgpu/liverpool.h" #include "video_core/buffer_cache/buffer_cache.h" #include "video_core/renderer_vulkan/liverpool_to_vk.h" diff --git a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp index 358a0044..40a1124a 100644 --- a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp +++ b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp @@ -600,6 +600,8 @@ vk::Format AdjustColorBufferFormat(vk::Format base_format, return is_vo_surface ? vk::Format::eB8G8R8A8Unorm : vk::Format::eB8G8R8A8Srgb; case vk::Format::eB8G8R8A8Srgb: return is_vo_surface ? vk::Format::eR8G8B8A8Unorm : vk::Format::eR8G8B8A8Srgb; + case vk::Format::eA2B10G10R10UnormPack32: + return vk::Format::eA2R10G10B10UnormPack32; default: break; } diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.h b/src/video_core/renderer_vulkan/vk_compute_pipeline.h index 0132066c..54eaf653 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.h @@ -4,7 +4,7 @@ #pragma once #include -#include "shader_recompiler/runtime_info.h" +#include "shader_recompiler/info.h" #include "video_core/renderer_vulkan/vk_common.h" namespace VideoCore { diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index 3e51e652..c06ddd20 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -25,6 +25,7 @@ using Liverpool = AmdGpu::Liverpool; struct GraphicsPipelineKey { std::array stage_hashes; std::array color_formats; + std::array mrt_swizzles; vk::Format depth_format; vk::Format stencil_format; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index f8de5ffe..364c2b4f 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -1,21 +1,124 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later -#include "shader_recompiler/runtime_info.h" +#include + +#include "common/config.h" +#include "common/io_file.h" +#include "common/path_util.h" +#include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/info.h" #include "video_core/renderer_vulkan/renderer_vulkan.h" #include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_pipeline_cache.h" #include "video_core/renderer_vulkan/vk_scheduler.h" -#include "video_core/renderer_vulkan/vk_shader_cache.h" +#include "video_core/renderer_vulkan/vk_shader_util.h" extern std::unique_ptr renderer; namespace Vulkan { +using Shader::VsOutput; + +[[nodiscard]] inline u64 HashCombine(const u64 seed, const u64 hash) { + return seed ^ (hash + 0x9e3779b9 + (seed << 6) + (seed >> 2)); +} + +void GatherVertexOutputs(Shader::VertexRuntimeInfo& info, + const AmdGpu::Liverpool::VsOutputControl& ctl) { + const auto add_output = [&](VsOutput x, VsOutput y, VsOutput z, VsOutput w) { + if (x != VsOutput::None || y != VsOutput::None || z != VsOutput::None || + w != VsOutput::None) { + info.outputs.emplace_back(Shader::VsOutputMap{x, y, z, w}); + } + }; + // VS_OUT_MISC_VEC + add_output(ctl.use_vtx_point_size ? VsOutput::PointSprite : VsOutput::None, + ctl.use_vtx_edge_flag + ? VsOutput::EdgeFlag + : (ctl.use_vtx_gs_cut_flag ? VsOutput::GsCutFlag : VsOutput::None), + ctl.use_vtx_kill_flag + ? VsOutput::KillFlag + : (ctl.use_vtx_render_target_idx ? VsOutput::GsMrtIndex : VsOutput::None), + ctl.use_vtx_viewport_idx ? VsOutput::GsVpIndex : VsOutput::None); + // VS_OUT_CCDIST0 + add_output(ctl.IsClipDistEnabled(0) + ? VsOutput::ClipDist0 + : (ctl.IsCullDistEnabled(0) ? VsOutput::CullDist0 : VsOutput::None), + ctl.IsClipDistEnabled(1) + ? VsOutput::ClipDist1 + : (ctl.IsCullDistEnabled(1) ? VsOutput::CullDist1 : VsOutput::None), + ctl.IsClipDistEnabled(2) + ? VsOutput::ClipDist2 + : (ctl.IsCullDistEnabled(2) ? VsOutput::CullDist2 : VsOutput::None), + ctl.IsClipDistEnabled(3) + ? VsOutput::ClipDist3 + : (ctl.IsCullDistEnabled(3) ? VsOutput::CullDist3 : VsOutput::None)); + // VS_OUT_CCDIST1 + add_output(ctl.IsClipDistEnabled(4) + ? VsOutput::ClipDist4 + : (ctl.IsCullDistEnabled(4) ? VsOutput::CullDist4 : VsOutput::None), + ctl.IsClipDistEnabled(5) + ? VsOutput::ClipDist5 + : (ctl.IsCullDistEnabled(5) ? VsOutput::CullDist5 : VsOutput::None), + ctl.IsClipDistEnabled(6) + ? VsOutput::ClipDist6 + : (ctl.IsCullDistEnabled(6) ? VsOutput::CullDist6 : VsOutput::None), + ctl.IsClipDistEnabled(7) + ? VsOutput::ClipDist7 + : (ctl.IsCullDistEnabled(7) ? VsOutput::CullDist7 : VsOutput::None)); +} + +Shader::RuntimeInfo BuildRuntimeInfo(Shader::Stage stage, const GraphicsPipelineKey& key, + const AmdGpu::Liverpool::Regs& regs) { + auto info = Shader::RuntimeInfo{stage}; + switch (stage) { + case Shader::Stage::Vertex: { + info.num_user_data = regs.vs_program.settings.num_user_regs; + info.num_input_vgprs = regs.vs_program.settings.vgpr_comp_cnt; + GatherVertexOutputs(info.vs_info, regs.vs_output_control); + break; + } + case Shader::Stage::Fragment: { + info.num_user_data = regs.ps_program.settings.num_user_regs; + std::ranges::transform(key.mrt_swizzles, info.fs_info.mrt_swizzles.begin(), + [](Liverpool::ColorBuffer::SwapMode mode) { + return static_cast(mode); + }); + for (u32 i = 0; i < regs.num_interp; i++) { + info.fs_info.inputs.push_back({ + .param_index = u8(regs.ps_inputs[i].input_offset.Value()), + .is_default = bool(regs.ps_inputs[i].use_default), + .is_flat = bool(regs.ps_inputs[i].flat_shade), + .default_value = u8(regs.ps_inputs[i].default_value), + }); + } + break; + } + case Shader::Stage::Compute: { + const auto& cs_pgm = regs.cs_program; + info.num_user_data = cs_pgm.settings.num_user_regs; + info.cs_info.workgroup_size = {cs_pgm.num_thread_x.full, cs_pgm.num_thread_y.full, + cs_pgm.num_thread_z.full}; + info.cs_info.tgid_enable = {cs_pgm.IsTgidEnabled(0), cs_pgm.IsTgidEnabled(1), + cs_pgm.IsTgidEnabled(2)}; + info.cs_info.shared_memory_size = cs_pgm.SharedMemSize(); + break; + } + default: + break; + } + return info; +} + PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_, AmdGpu::Liverpool* liverpool_) - : instance{instance_}, scheduler{scheduler_}, liverpool{liverpool_}, - shader_cache{std::make_unique(instance, liverpool)} { + : instance{instance_}, scheduler{scheduler_}, liverpool{liverpool_} { + profile = Shader::Profile{ + .supported_spirv = instance.ApiVersion() >= VK_API_VERSION_1_3 ? 0x00010600U : 0x00010500U, + .subgroup_size = instance.SubgroupSize(), + .support_explicit_workgroup_layout = true, + }; pipeline_cache = instance.GetDevice().createPipelineCacheUnique({}); } @@ -134,6 +237,7 @@ bool PipelineCache::RefreshGraphicsKey() { key.color_formats.fill(vk::Format::eUndefined); key.blend_controls.fill({}); key.write_masks.fill({}); + key.mrt_swizzles.fill(Liverpool::ColorBuffer::SwapMode::Standard); int remapped_cb{}; for (auto cb = 0u; cb < Liverpool::NumColorBuffers; ++cb) { auto const& col_buf = regs.color_buffers[cb]; @@ -142,9 +246,12 @@ bool PipelineCache::RefreshGraphicsKey() { } const auto base_format = LiverpoolToVK::SurfaceFormat(col_buf.info.format, col_buf.NumFormat()); - const auto is_vo_surface = renderer->IsVideoOutSurface(col_buf); + const bool is_vo_surface = renderer->IsVideoOutSurface(col_buf); key.color_formats[remapped_cb] = LiverpoolToVK::AdjustColorBufferFormat( base_format, col_buf.info.comp_swap.Value(), false /*is_vo_surface*/); + if (base_format == key.color_formats[remapped_cb]) { + key.mrt_swizzles[remapped_cb] = col_buf.info.comp_swap.Value(); + } key.blend_controls[remapped_cb] = regs.blend_control[cb]; key.blend_controls[remapped_cb].enable.Assign(key.blend_controls[remapped_cb].enable && !col_buf.info.blend_bypass); @@ -169,6 +276,7 @@ bool PipelineCache::RefreshGraphicsKey() { } const auto* bininfo = Liverpool::GetBinaryInfo(*pgm); if (!bininfo->Valid()) { + LOG_WARNING(Render_Vulkan, "Invalid binary info structure!"); key.stage_hashes[i] = 0; infos[i] = nullptr; continue; @@ -176,10 +284,9 @@ bool PipelineCache::RefreshGraphicsKey() { if (ShouldSkipShader(bininfo->shader_hash, "graphics")) { return false; } - const auto stage = Shader::Stage{i}; - const GuestProgram guest_pgm{pgm, stage}; - std::tie(infos[i], modules[i], key.stage_hashes[i]) = - shader_cache->GetProgram(guest_pgm, binding); + const auto stage = Shader::StageFromIndex(i); + const auto params = Liverpool::GetParams(*pgm); + std::tie(infos[i], modules[i], key.stage_hashes[i]) = GetProgram(stage, params, binding); } return true; } @@ -187,12 +294,80 @@ bool PipelineCache::RefreshGraphicsKey() { bool PipelineCache::RefreshComputeKey() { u32 binding{}; const auto* cs_pgm = &liverpool->regs.cs_program; - const GuestProgram guest_pgm{cs_pgm, Shader::Stage::Compute}; - if (ShouldSkipShader(guest_pgm.hash, "compute")) { + const auto cs_params = Liverpool::GetParams(*cs_pgm); + if (ShouldSkipShader(cs_params.hash, "compute")) { return false; } - std::tie(infos[0], modules[0], compute_key) = shader_cache->GetProgram(guest_pgm, binding); + std::tie(infos[0], modules[0], compute_key) = + GetProgram(Shader::Stage::Compute, cs_params, binding); return true; } +vk::ShaderModule PipelineCache::CompileModule(Shader::Info& info, + const Shader::RuntimeInfo& runtime_info, + std::span code, size_t perm_idx, + u32& binding) { + LOG_INFO(Render_Vulkan, "Compiling {} shader {:#x} {}", info.stage, info.pgm_hash, + perm_idx != 0 ? "(permutation)" : ""); + if (Config::dumpShaders()) { + DumpShader(code, info.pgm_hash, info.stage, perm_idx, "bin"); + } + + const auto ir_program = Shader::TranslateProgram(code, pools, info, runtime_info, profile); + const auto spv = Shader::Backend::SPIRV::EmitSPIRV(profile, runtime_info, ir_program, binding); + if (Config::dumpShaders()) { + DumpShader(spv, info.pgm_hash, info.stage, perm_idx, "spv"); + } + + const auto module = CompileSPV(spv, instance.GetDevice()); + const auto name = fmt::format("{}_{:#x}_{}", info.stage, info.pgm_hash, perm_idx); + Vulkan::SetObjectName(instance.GetDevice(), module, name); + return module; +} + +std::tuple PipelineCache::GetProgram( + Shader::Stage stage, Shader::ShaderParams params, u32& binding) { + const auto runtime_info = BuildRuntimeInfo(stage, graphics_key, liverpool->regs); + auto [it_pgm, new_program] = program_cache.try_emplace(params.hash); + if (new_program) { + Program* program = program_pool.Create(stage, params); + u32 start_binding = binding; + const auto module = CompileModule(program->info, runtime_info, params.code, 0, binding); + const auto spec = Shader::StageSpecialization(program->info, runtime_info, start_binding); + program->AddPermut(module, std::move(spec)); + it_pgm.value() = program; + return std::make_tuple(&program->info, module, HashCombine(params.hash, 0)); + } + + Program* program = it_pgm->second; + const auto& info = program->info; + const auto spec = Shader::StageSpecialization(info, runtime_info, binding); + size_t perm_idx = program->modules.size(); + vk::ShaderModule module{}; + + const auto it = std::ranges::find(program->modules, spec, &Program::Module::spec); + if (it == program->modules.end()) { + auto new_info = Shader::Info(stage, params); + module = CompileModule(new_info, runtime_info, params.code, perm_idx, binding); + program->AddPermut(module, std::move(spec)); + } else { + binding += info.NumBindings(); + module = it->module; + perm_idx = std::distance(program->modules.begin(), it); + } + return std::make_tuple(&info, module, HashCombine(params.hash, perm_idx)); +} + +void PipelineCache::DumpShader(std::span code, u64 hash, Shader::Stage stage, + size_t perm_idx, std::string_view ext) { + using namespace Common::FS; + const auto dump_dir = GetUserPath(PathType::ShaderDir) / "dumps"; + if (!std::filesystem::exists(dump_dir)) { + std::filesystem::create_directories(dump_dir); + } + const auto filename = fmt::format("{}_{:#018x}_{}.{}", stage, hash, perm_idx, ext); + const auto file = IOFile{dump_dir / filename, FileAccessMode::Write}; + file.WriteSpan(code); +} + } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index 40853b74..26130994 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -4,6 +4,9 @@ #pragma once #include +#include "shader_recompiler/profile.h" +#include "shader_recompiler/recompiler.h" +#include "shader_recompiler/specialization.h" #include "video_core/renderer_vulkan/vk_compute_pipeline.h" #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" @@ -17,6 +20,22 @@ class Instance; class Scheduler; class ShaderCache; +struct Program { + struct Module { + vk::ShaderModule module; + Shader::StageSpecialization spec; + }; + + Shader::Info info; + boost::container::small_vector modules; + + explicit Program(Shader::Stage stage, Shader::ShaderParams params) : info{stage, params} {} + + void AddPermut(vk::ShaderModule module, const Shader::StageSpecialization&& spec) { + modules.emplace_back(module, std::move(spec)); + } +}; + class PipelineCache { static constexpr size_t MaxShaderStages = 5; @@ -29,17 +48,29 @@ public: const ComputePipeline* GetComputePipeline(); + std::tuple GetProgram(Shader::Stage stage, + Shader::ShaderParams params, + u32& binding); + private: bool RefreshGraphicsKey(); bool RefreshComputeKey(); + void DumpShader(std::span code, u64 hash, Shader::Stage stage, size_t perm_idx, + std::string_view ext); + vk::ShaderModule CompileModule(Shader::Info& info, const Shader::RuntimeInfo& runtime_info, + std::span code, size_t perm_idx, u32& binding); + private: const Instance& instance; Scheduler& scheduler; AmdGpu::Liverpool* liverpool; vk::UniquePipelineCache pipeline_cache; vk::UniquePipelineLayout pipeline_layout; - std::unique_ptr shader_cache; + Shader::Profile profile{}; + Shader::Pools pools; + tsl::robin_map program_cache; + Common::ObjectPool program_pool; tsl::robin_map> compute_pipelines; tsl::robin_map> graphics_pipelines; std::array infos{}; diff --git a/src/video_core/renderer_vulkan/vk_shader_cache.cpp b/src/video_core/renderer_vulkan/vk_shader_cache.cpp deleted file mode 100644 index 9250f84c..00000000 --- a/src/video_core/renderer_vulkan/vk_shader_cache.cpp +++ /dev/null @@ -1,192 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project -// SPDX-License-Identifier: GPL-2.0-or-later - -#include "common/config.h" -#include "common/io_file.h" -#include "common/path_util.h" -#include "shader_recompiler/backend/spirv/emit_spirv.h" -#include "shader_recompiler/recompiler.h" -#include "video_core/renderer_vulkan/vk_instance.h" -#include "video_core/renderer_vulkan/vk_platform.h" -#include "video_core/renderer_vulkan/vk_shader_cache.h" -#include "video_core/renderer_vulkan/vk_shader_util.h" - -namespace Vulkan { - -using Shader::VsOutput; - -void BuildVsOutputs(Shader::Info& info, const AmdGpu::Liverpool::VsOutputControl& ctl) { - const auto add_output = [&](VsOutput x, VsOutput y, VsOutput z, VsOutput w) { - if (x != VsOutput::None || y != VsOutput::None || z != VsOutput::None || - w != VsOutput::None) { - info.vs_outputs.emplace_back(Shader::VsOutputMap{x, y, z, w}); - } - }; - // VS_OUT_MISC_VEC - add_output(ctl.use_vtx_point_size ? VsOutput::PointSprite : VsOutput::None, - ctl.use_vtx_edge_flag - ? VsOutput::EdgeFlag - : (ctl.use_vtx_gs_cut_flag ? VsOutput::GsCutFlag : VsOutput::None), - ctl.use_vtx_kill_flag - ? VsOutput::KillFlag - : (ctl.use_vtx_render_target_idx ? VsOutput::GsMrtIndex : VsOutput::None), - ctl.use_vtx_viewport_idx ? VsOutput::GsVpIndex : VsOutput::None); - // VS_OUT_CCDIST0 - add_output(ctl.IsClipDistEnabled(0) - ? VsOutput::ClipDist0 - : (ctl.IsCullDistEnabled(0) ? VsOutput::CullDist0 : VsOutput::None), - ctl.IsClipDistEnabled(1) - ? VsOutput::ClipDist1 - : (ctl.IsCullDistEnabled(1) ? VsOutput::CullDist1 : VsOutput::None), - ctl.IsClipDistEnabled(2) - ? VsOutput::ClipDist2 - : (ctl.IsCullDistEnabled(2) ? VsOutput::CullDist2 : VsOutput::None), - ctl.IsClipDistEnabled(3) - ? VsOutput::ClipDist3 - : (ctl.IsCullDistEnabled(3) ? VsOutput::CullDist3 : VsOutput::None)); - // VS_OUT_CCDIST1 - add_output(ctl.IsClipDistEnabled(4) - ? VsOutput::ClipDist4 - : (ctl.IsCullDistEnabled(4) ? VsOutput::CullDist4 : VsOutput::None), - ctl.IsClipDistEnabled(5) - ? VsOutput::ClipDist5 - : (ctl.IsCullDistEnabled(5) ? VsOutput::CullDist5 : VsOutput::None), - ctl.IsClipDistEnabled(6) - ? VsOutput::ClipDist6 - : (ctl.IsCullDistEnabled(6) ? VsOutput::CullDist6 : VsOutput::None), - ctl.IsClipDistEnabled(7) - ? VsOutput::ClipDist7 - : (ctl.IsCullDistEnabled(7) ? VsOutput::CullDist7 : VsOutput::None)); -} - -Shader::Info MakeShaderInfo(const GuestProgram& pgm, const AmdGpu::Liverpool::Regs& regs) { - Shader::Info info{}; - info.user_data = pgm.user_data; - info.pgm_base = VAddr(pgm.code.data()); - info.pgm_hash = pgm.hash; - info.stage = pgm.stage; - switch (pgm.stage) { - case Shader::Stage::Vertex: { - info.num_user_data = regs.vs_program.settings.num_user_regs; - info.num_input_vgprs = regs.vs_program.settings.vgpr_comp_cnt; - BuildVsOutputs(info, regs.vs_output_control); - break; - } - case Shader::Stage::Fragment: { - info.num_user_data = regs.ps_program.settings.num_user_regs; - for (u32 i = 0; i < regs.num_interp; i++) { - info.ps_inputs.push_back({ - .param_index = regs.ps_inputs[i].input_offset.Value(), - .is_default = bool(regs.ps_inputs[i].use_default), - .is_flat = bool(regs.ps_inputs[i].flat_shade), - .default_value = regs.ps_inputs[i].default_value, - }); - } - break; - } - case Shader::Stage::Compute: { - const auto& cs_pgm = regs.cs_program; - info.num_user_data = cs_pgm.settings.num_user_regs; - info.workgroup_size = {cs_pgm.num_thread_x.full, cs_pgm.num_thread_y.full, - cs_pgm.num_thread_z.full}; - info.tgid_enable = {cs_pgm.IsTgidEnabled(0), cs_pgm.IsTgidEnabled(1), - cs_pgm.IsTgidEnabled(2)}; - info.shared_memory_size = cs_pgm.SharedMemSize(); - break; - } - default: - break; - } - return info; -} - -[[nodiscard]] inline u64 HashCombine(const u64 seed, const u64 hash) { - return seed ^ (hash + 0x9e3779b9 + (seed << 6) + (seed >> 2)); -} - -ShaderCache::ShaderCache(const Instance& instance_, AmdGpu::Liverpool* liverpool_) - : instance{instance_}, liverpool{liverpool_}, inst_pool{8192}, block_pool{512} { - profile = Shader::Profile{ - .supported_spirv = instance.ApiVersion() >= VK_API_VERSION_1_3 ? 0x00010600U : 0x00010500U, - .subgroup_size = instance.SubgroupSize(), - .support_explicit_workgroup_layout = true, - }; -} - -vk::ShaderModule ShaderCache::CompileModule(Shader::Info& info, std::span code, - size_t perm_idx, u32& binding) { - LOG_INFO(Render_Vulkan, "Compiling {} shader {:#x} {}", info.stage, info.pgm_hash, - perm_idx != 0 ? "(permutation)" : ""); - - if (Config::dumpShaders()) { - DumpShader(code, info.pgm_hash, info.stage, perm_idx, "bin"); - } - - block_pool.ReleaseContents(); - inst_pool.ReleaseContents(); - const auto ir_program = Shader::TranslateProgram(inst_pool, block_pool, code, info, profile); - - // Compile IR to SPIR-V - const auto spv = Shader::Backend::SPIRV::EmitSPIRV(profile, ir_program, binding); - if (Config::dumpShaders()) { - DumpShader(spv, info.pgm_hash, info.stage, perm_idx, "spv"); - } - - // Create module and set name to hash in renderdoc - const auto module = CompileSPV(spv, instance.GetDevice()); - ASSERT(module != VK_NULL_HANDLE); - const auto name = fmt::format("{}_{:#x}_{}", info.stage, info.pgm_hash, perm_idx); - Vulkan::SetObjectName(instance.GetDevice(), module, name); - return module; -} - -Program* ShaderCache::CreateProgram(const GuestProgram& pgm, u32& binding) { - Program* program = program_pool.Create(MakeShaderInfo(pgm, liverpool->regs)); - u32 start_binding = binding; - const auto module = CompileModule(program->info, pgm.code, 0, binding); - program->modules.emplace_back(module, StageSpecialization{program->info, start_binding}); - return program; -} - -std::tuple ShaderCache::GetProgram( - const GuestProgram& pgm, u32& binding) { - auto [it_pgm, new_program] = program_cache.try_emplace(pgm.hash); - if (new_program) { - auto program = CreateProgram(pgm, binding); - const auto module = program->modules.back().module; - it_pgm.value() = program; - return std::make_tuple(&program->info, module, HashCombine(pgm.hash, 0)); - } - - Program* program = it_pgm->second; - const auto& info = program->info; - size_t perm_idx = program->modules.size(); - StageSpecialization spec{info, binding}; - vk::ShaderModule module{}; - - const auto it = std::ranges::find(program->modules, spec, &Program::Module::spec); - if (it == program->modules.end()) { - auto new_info = MakeShaderInfo(pgm, liverpool->regs); - module = CompileModule(new_info, pgm.code, perm_idx, binding); - program->modules.emplace_back(module, std::move(spec)); - } else { - binding += info.NumBindings(); - module = it->module; - perm_idx = std::distance(program->modules.begin(), it); - } - return std::make_tuple(&info, module, HashCombine(pgm.hash, perm_idx)); -} - -void ShaderCache::DumpShader(std::span code, u64 hash, Shader::Stage stage, - size_t perm_idx, std::string_view ext) { - using namespace Common::FS; - const auto dump_dir = GetUserPath(PathType::ShaderDir) / "dumps"; - if (!std::filesystem::exists(dump_dir)) { - std::filesystem::create_directories(dump_dir); - } - const auto filename = fmt::format("{}_{:#018x}_{}.{}", stage, hash, perm_idx, ext); - const auto file = IOFile{dump_dir / filename, FileAccessMode::Write}; - file.WriteSpan(code); -} - -} // namespace Vulkan