From c7adad8bf217104ce61575c6eebb0b6fdcd74e8b Mon Sep 17 00:00:00 2001 From: IndecisiveTurtle <47210458+raphaelthegreat@users.noreply.github.com> Date: Wed, 9 Oct 2024 01:45:16 +0300 Subject: [PATCH] spirv: Flush denormals if possible --- .../backend/spirv/emit_spirv.cpp | 50 ++++++++++------ .../backend/spirv/spirv_emit_context.cpp | 7 +-- .../frontend/copy_shader.cpp | 2 +- src/shader_recompiler/frontend/copy_shader.h | 2 +- src/shader_recompiler/info.h | 4 +- .../ir/passes/ring_access_elimination.cpp | 15 +++-- src/shader_recompiler/ir/reg.h | 21 ------- src/shader_recompiler/profile.h | 5 -- src/shader_recompiler/runtime_info.h | 34 ++++++----- src/video_core/amdgpu/liverpool.h | 6 ++ src/video_core/amdgpu/types.h | 16 +++++- .../renderer_vulkan/vk_instance.cpp | 3 +- src/video_core/renderer_vulkan/vk_instance.h | 6 ++ .../renderer_vulkan/vk_pipeline_cache.cpp | 57 ++++++++++--------- 14 files changed, 130 insertions(+), 98 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index f90e9db7..e84908a5 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -206,10 +206,7 @@ Id DefineMain(EmitContext& ctx, const IR::Program& program) { return main; } -void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) { - const auto& info = program.info; - const std::span interfaces(ctx.interfaces.data(), ctx.interfaces.size()); - spv::ExecutionModel execution_model{}; +void SetupCapabilities(const Info& info, EmitContext& ctx) { ctx.AddCapability(spv::Capability::Image1D); ctx.AddCapability(spv::Capability::Sampled1D); ctx.AddCapability(spv::Capability::ImageQuery); @@ -247,6 +244,19 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) { if (info.uses_group_ballot) { ctx.AddCapability(spv::Capability::GroupNonUniformBallot); } + if (info.stage == Stage::Export || info.stage == Stage::Vertex) { + ctx.AddExtension("SPV_KHR_shader_draw_parameters"); + ctx.AddCapability(spv::Capability::DrawParameters); + } + if (info.stage == Stage::Geometry) { + ctx.AddCapability(spv::Capability::Geometry); + } +} + +void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) { + const auto& info = program.info; + const std::span interfaces(ctx.interfaces.data(), ctx.interfaces.size()); + spv::ExecutionModel execution_model{}; switch (program.info.stage) { case Stage::Compute: { const std::array workgroup_size{ctx.runtime_info.cs_info.workgroup_size}; @@ -290,6 +300,24 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) { ctx.AddEntryPoint(execution_model, main, "main", interfaces); } +void SetupFloatMode(EmitContext& ctx, const Profile& profile, const RuntimeInfo& runtime_info, + Id main_func) { + ctx.AddExtension("SPV_KHR_float_controls"); + const auto fp_denorm_mode = runtime_info.fp_denorm_mode32; + if (fp_denorm_mode == AmdGpu::FpDenormMode::InOutFlush) { + if (profile.support_fp32_denorm_flush) { + ctx.AddCapability(spv::Capability::DenormFlushToZero); + ctx.AddExecutionMode(main_func, spv::ExecutionMode::DenormFlushToZero, 32U); + } + } else { + LOG_WARNING(Render_Vulkan, "Unknown FP denorm mode {}", u32(fp_denorm_mode)); + } + const auto fp_round_mode = runtime_info.fp_round_mode32; + if (fp_round_mode != AmdGpu::FpRoundMode::NearestEven) { + LOG_WARNING(Render_Vulkan, "Unknown FP rounding mode {}", u32(fp_round_mode)); + } +} + void PatchPhiNodes(const IR::Program& program, EmitContext& ctx) { auto inst{program.blocks.front()->begin()}; size_t block_index{0}; @@ -314,18 +342,8 @@ std::vector EmitSPIRV(const Profile& profile, const RuntimeInfo& runtime_in EmitContext ctx{profile, runtime_info, program.info, binding}; const Id main{DefineMain(ctx, program)}; DefineEntryPoint(program, ctx, main); - switch (program.info.stage) { - case Stage::Export: - case Stage::Vertex: - ctx.AddExtension("SPV_KHR_shader_draw_parameters"); - ctx.AddCapability(spv::Capability::DrawParameters); - break; - case Stage::Geometry: - ctx.AddCapability(spv::Capability::Geometry); - break; - default: - break; - } + SetupCapabilities(program.info, ctx); + SetupFloatMode(ctx, profile, runtime_info, main); PatchPhiNodes(program, ctx); binding.user_data += program.info.ud_mask.NumRegs(); return ctx.Assemble(); diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index 5eee656d..6581a7a5 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -284,7 +284,8 @@ void EmitContext::DefineInputs() { frag_coord = DefineVariable(F32[4], spv::BuiltIn::FragCoord, spv::StorageClass::Input); frag_depth = DefineVariable(F32[1], spv::BuiltIn::FragDepth, spv::StorageClass::Output); front_facing = DefineVariable(U1[1], spv::BuiltIn::FrontFacing, spv::StorageClass::Input); - for (const auto& input : runtime_info.fs_info.inputs) { + for (s32 i = 0; i < runtime_info.fs_info.num_inputs; i++) { + const auto& input = runtime_info.fs_info.inputs[i]; const u32 semantic = input.param_index; ASSERT(semantic < IR::NumParams); if (input.is_default && !input.is_flat) { @@ -333,7 +334,6 @@ void EmitContext::DefineInputs() { const auto num_params = runtime_info.gs_info.in_vertex_data_size / 4 - 1u; for (int param_id = 0; param_id < num_params; ++param_id) { - const IR::Attribute param{IR::Attribute::Param0 + param_id}; const Id type{TypeArray(F32[4], ConstU32(num_verts_in))}; const Id id{DefineInput(type, param_id)}; Name(id, fmt::format("in_attr{}", param_id)); @@ -394,8 +394,7 @@ void EmitContext::DefineOutputs() { case Stage::Geometry: { output_position = DefineVariable(F32[4], spv::BuiltIn::Position, spv::StorageClass::Output); - for (u32 attr_id = 0; attr_id < runtime_info.gs_info.copy_data.num_attrs; attr_id++) { - const IR::Attribute param{IR::Attribute::Param0 + attr_id}; + for (u32 attr_id = 0; attr_id < info.gs_copy_data.num_attrs; attr_id++) { const Id id{DefineOutput(F32[4], attr_id)}; Name(id, fmt::format("out_attr{}", attr_id)); output_params[attr_id] = {id, output_f32, F32[1], 4u}; diff --git a/src/shader_recompiler/frontend/copy_shader.cpp b/src/shader_recompiler/frontend/copy_shader.cpp index 363c1c82..b2c79566 100644 --- a/src/shader_recompiler/frontend/copy_shader.cpp +++ b/src/shader_recompiler/frontend/copy_shader.cpp @@ -7,7 +7,7 @@ namespace Shader { -CopyShaderData ParseCopyShader(const std::span& code) { +CopyShaderData ParseCopyShader(std::span code) { Gcn::GcnCodeSlice code_slice{code.data(), code.data() + code.size()}; Gcn::GcnDecodeContext decoder; diff --git a/src/shader_recompiler/frontend/copy_shader.h b/src/shader_recompiler/frontend/copy_shader.h index ca3e1ac3..55cc31eb 100644 --- a/src/shader_recompiler/frontend/copy_shader.h +++ b/src/shader_recompiler/frontend/copy_shader.h @@ -16,6 +16,6 @@ struct CopyShaderData { u32 num_attrs{0}; }; -CopyShaderData ParseCopyShader(const std::span& code); +CopyShaderData ParseCopyShader(std::span code); } // namespace Shader diff --git a/src/shader_recompiler/info.h b/src/shader_recompiler/info.h index 78a6805f..e727c8a0 100644 --- a/src/shader_recompiler/info.h +++ b/src/shader_recompiler/info.h @@ -3,12 +3,12 @@ #pragma once #include -#include #include #include #include "common/assert.h" #include "common/types.h" #include "shader_recompiler/backend/bindings.h" +#include "shader_recompiler/frontend/copy_shader.h" #include "shader_recompiler/ir/attribute.h" #include "shader_recompiler/ir/reg.h" #include "shader_recompiler/ir/type.h" @@ -170,6 +170,8 @@ struct Info { }; UserDataMask ud_mask{}; + CopyShaderData gs_copy_data; + s8 vertex_offset_sgpr = -1; s8 instance_offset_sgpr = -1; diff --git a/src/shader_recompiler/ir/passes/ring_access_elimination.cpp b/src/shader_recompiler/ir/passes/ring_access_elimination.cpp index 857921b1..eb1be296 100644 --- a/src/shader_recompiler/ir/passes/ring_access_elimination.cpp +++ b/src/shader_recompiler/ir/passes/ring_access_elimination.cpp @@ -1,7 +1,7 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later -#include "shader_recompiler/frontend/translate/translate.h" +#include "shader_recompiler/ir/ir_emitter.h" #include "shader_recompiler/ir/opcodes.h" #include "shader_recompiler/ir/program.h" #include "shader_recompiler/ir/reg.h" @@ -11,6 +11,8 @@ namespace Shader::Optimization { void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtime_info, Stage stage) { + auto& info = program.info; + const auto& ForEachInstruction = [&](auto func) { for (IR::Block* block : program.blocks) { for (IR::Inst& inst : block->Instructions()) { @@ -52,6 +54,9 @@ void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtim break; } case Stage::Geometry: { + const auto& gs_info = runtime_info.gs_info; + info.gs_copy_data = Shader::ParseCopyShader(gs_info.vs_copy); + ForEachInstruction([&](IR::IREmitter& ir, IR::Inst& inst) { const auto opcode = inst.GetOpcode(); switch (opcode) { @@ -81,12 +86,12 @@ void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtim const auto offset = inst.Flags().inst_offset.Value(); const auto data = ir.BitCast(IR::U32{inst.Arg(2)}); - const auto comp_ofs = runtime_info.gs_info.output_vertices * 4u; - const auto output_size = comp_ofs * runtime_info.gs_info.out_vertex_data_size; + const auto comp_ofs = gs_info.output_vertices * 4u; + const auto output_size = comp_ofs * gs_info.out_vertex_data_size; const auto vc_read_ofs = (((offset / comp_ofs) * comp_ofs) % output_size) * 16u; - const auto& it = runtime_info.gs_info.copy_data.attr_map.find(vc_read_ofs); - ASSERT(it != runtime_info.gs_info.copy_data.attr_map.cend()); + const auto& it = info.gs_copy_data.attr_map.find(vc_read_ofs); + ASSERT(it != info.gs_copy_data.attr_map.cend()); const auto& [attr, comp] = it->second; inst.ReplaceOpcode(IR::Opcode::SetAttribute); diff --git a/src/shader_recompiler/ir/reg.h b/src/shader_recompiler/ir/reg.h index 9ec77e5f..5facaf5c 100644 --- a/src/shader_recompiler/ir/reg.h +++ b/src/shader_recompiler/ir/reg.h @@ -10,20 +10,6 @@ namespace Shader::IR { -enum class FpRoundMode : u32 { - NearestEven = 0, - PlusInf = 1, - MinInf = 2, - ToZero = 3, -}; - -enum class FpDenormMode : u32 { - InOutFlush = 0, - InAllowOutFlush = 1, - InFlushOutAllow = 2, - InOutAllow = 3, -}; - enum class FloatClassFunc : u32 { SignalingNan = 1 << 0, QuietNan = 1 << 1, @@ -41,13 +27,6 @@ enum class FloatClassFunc : u32 { }; DECLARE_ENUM_FLAG_OPERATORS(FloatClassFunc) -union Mode { - BitField<0, 4, FpRoundMode> fp_round; - BitField<4, 2, FpDenormMode> fp_denorm_single; - BitField<6, 2, FpDenormMode> fp_denorm_double; - BitField<8, 1, u32> dx10_clamp; -}; - union TextureInstInfo { u32 raw; BitField<0, 1, u32> is_depth; diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index badd5455..bbda731e 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -19,13 +19,8 @@ struct Profile { bool support_float_controls{}; bool support_separate_denorm_behavior{}; bool support_separate_rounding_mode{}; - bool support_fp16_denorm_preserve{}; bool support_fp32_denorm_preserve{}; - bool support_fp16_denorm_flush{}; bool support_fp32_denorm_flush{}; - bool support_fp16_signed_zero_nan_preserve{}; - bool support_fp32_signed_zero_nan_preserve{}; - bool support_fp64_signed_zero_nan_preserve{}; bool support_explicit_workgroup_layout{}; bool has_broken_spirv_clamp{}; bool lower_left_origin_mode{}; diff --git a/src/shader_recompiler/runtime_info.h b/src/shader_recompiler/runtime_info.h index 8c0838c9..4d15c207 100644 --- a/src/shader_recompiler/runtime_info.h +++ b/src/shader_recompiler/runtime_info.h @@ -4,11 +4,9 @@ #pragma once #include +#include #include - -#include "common/assert.h" #include "common/types.h" -#include "frontend/copy_shader.h" #include "video_core/amdgpu/types.h" namespace Shader { @@ -62,7 +60,8 @@ enum class VsOutput : u8 { using VsOutputMap = std::array; struct VertexRuntimeInfo { - boost::container::static_vector outputs; + u32 num_outputs; + std::array outputs; bool emulate_depth_negative_one_to_one{}; bool operator==(const VertexRuntimeInfo& other) const noexcept { @@ -79,13 +78,13 @@ struct GeometryRuntimeInfo { u32 out_vertex_data_size{}; AmdGpu::PrimitiveType in_primitive; GsOutputPrimTypes out_primitive; - CopyShaderData copy_data; + std::span vs_copy; + u64 vs_copy_hash; bool operator==(const GeometryRuntimeInfo& other) const noexcept { return num_invocations && other.num_invocations && output_vertices == other.output_vertices && in_primitive == other.in_primitive && - std::ranges::equal(out_primitive, other.out_primitive) && - std::ranges::equal(copy_data.attr_map, other.copy_data.attr_map); + std::ranges::equal(out_primitive, other.out_primitive); } }; @@ -106,7 +105,8 @@ struct FragmentRuntimeInfo { auto operator<=>(const PsInput&) const noexcept = default; }; - boost::container::static_vector inputs; + u32 num_inputs; + std::array inputs; struct PsColorBuffer { AmdGpu::NumberFormat num_format; MrtSwizzle mrt_swizzle; @@ -117,7 +117,9 @@ struct FragmentRuntimeInfo { bool operator==(const FragmentRuntimeInfo& other) const noexcept { return std::ranges::equal(color_buffers, other.color_buffers) && - std::ranges::equal(inputs, other.inputs); + num_inputs == other.num_inputs && + std::ranges::equal(inputs.begin(), inputs.begin() + num_inputs, other.inputs.begin(), + other.inputs.begin() + num_inputs); } }; @@ -141,11 +143,15 @@ struct RuntimeInfo { u32 num_user_data; u32 num_input_vgprs; u32 num_allocated_vgprs; - ExportRuntimeInfo es_info; - VertexRuntimeInfo vs_info; - GeometryRuntimeInfo gs_info; - FragmentRuntimeInfo fs_info; - ComputeRuntimeInfo cs_info; + AmdGpu::FpDenormMode fp_denorm_mode32; + AmdGpu::FpRoundMode fp_round_mode32; + union { + ExportRuntimeInfo es_info; + VertexRuntimeInfo vs_info; + GeometryRuntimeInfo gs_info; + FragmentRuntimeInfo fs_info; + ComputeRuntimeInfo cs_info; + }; RuntimeInfo(Stage stage_) : stage{stage_} {} diff --git a/src/video_core/amdgpu/liverpool.h b/src/video_core/amdgpu/liverpool.h index 508420bc..1c994d0a 100644 --- a/src/video_core/amdgpu/liverpool.h +++ b/src/video_core/amdgpu/liverpool.h @@ -92,6 +92,12 @@ struct Liverpool { union { BitField<0, 6, u64> num_vgprs; BitField<6, 4, u64> num_sgprs; + BitField<10, 2, u64> priority; + BitField<12, 2, FpRoundMode> fp_round_mode32; + BitField<14, 2, FpRoundMode> fp_round_mode64; + BitField<16, 2, FpDenormMode> fp_denorm_mode32; + BitField<18, 2, FpDenormMode> fp_denorm_mode64; + BitField<12, 8, u64> float_mode; BitField<24, 2, u64> vgpr_comp_cnt; // SPI provided per-thread inputs BitField<33, 5, u64> num_user_regs; } settings; diff --git a/src/video_core/amdgpu/types.h b/src/video_core/amdgpu/types.h index 8cc023a7..6b95ed91 100644 --- a/src/video_core/amdgpu/types.h +++ b/src/video_core/amdgpu/types.h @@ -7,6 +7,20 @@ namespace AmdGpu { +enum class FpRoundMode : u32 { + NearestEven = 0, + PlusInf = 1, + MinInf = 2, + ToZero = 3, +}; + +enum class FpDenormMode : u32 { + InOutFlush = 0, + InAllowOutFlush = 1, + InFlushOutAllow = 2, + InOutAllow = 3, +}; + // See `VGT_PRIMITIVE_TYPE` description in [Radeon Sea Islands 3D/Compute Register Reference Guide] enum class PrimitiveType : u32 { None = 0, @@ -103,4 +117,4 @@ enum class NumberFormat : u32 { Ubscaled = 13, }; -} // namespace AmdGpu \ No newline at end of file +} // namespace AmdGpu diff --git a/src/video_core/renderer_vulkan/vk_instance.cpp b/src/video_core/renderer_vulkan/vk_instance.cpp index 7a1d784f..d3843c76 100644 --- a/src/video_core/renderer_vulkan/vk_instance.cpp +++ b/src/video_core/renderer_vulkan/vk_instance.cpp @@ -217,9 +217,10 @@ bool Instance::CreateDevice() { const vk::StructureChain properties_chain = physical_device.getProperties2< vk::PhysicalDeviceProperties2, vk::PhysicalDevicePortabilitySubsetPropertiesKHR, vk::PhysicalDeviceExternalMemoryHostPropertiesEXT, vk::PhysicalDeviceVulkan11Properties, - vk::PhysicalDevicePushDescriptorPropertiesKHR>(); + vk::PhysicalDevicePushDescriptorPropertiesKHR, vk::PhysicalDeviceVulkan12Properties>(); subgroup_size = properties_chain.get().subgroupSize; push_descriptor_props = properties_chain.get(); + vk12_props = properties_chain.get(); LOG_INFO(Render_Vulkan, "Physical device subgroup size {}", subgroup_size); features = feature_chain.get().features; diff --git a/src/video_core/renderer_vulkan/vk_instance.h b/src/video_core/renderer_vulkan/vk_instance.h index d77d0c20..474b86e9 100644 --- a/src/video_core/renderer_vulkan/vk_instance.h +++ b/src/video_core/renderer_vulkan/vk_instance.h @@ -242,6 +242,11 @@ public: return push_descriptor_props.maxPushDescriptors; } + /// Returns the vulkan 1.2 physical device properties. + const vk::PhysicalDeviceVulkan12Properties& GetVk12Properties() const noexcept { + return vk12_props; + } + /// Returns true if shaders can declare the ClipDistance attribute bool IsShaderClipDistanceSupported() const { return features.shaderClipDistance; @@ -279,6 +284,7 @@ private: vk::UniqueDevice device; vk::PhysicalDeviceProperties properties; vk::PhysicalDevicePushDescriptorPropertiesKHR push_descriptor_props; + vk::PhysicalDeviceVulkan12Properties vk12_props; vk::PhysicalDeviceFeatures features; vk::DriverIdKHR driver_id; vk::UniqueDebugUtilsMessengerEXT debug_callback{}; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 929fa9cc..a06d82eb 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -7,7 +7,6 @@ #include "common/io_file.h" #include "common/path_util.h" #include "shader_recompiler/backend/spirv/emit_spirv.h" -#include "shader_recompiler/frontend/copy_shader.h" #include "shader_recompiler/info.h" #include "shader_recompiler/recompiler.h" #include "shader_recompiler/runtime_info.h" @@ -41,7 +40,7 @@ void GatherVertexOutputs(Shader::VertexRuntimeInfo& info, const auto add_output = [&](VsOutput x, VsOutput y, VsOutput z, VsOutput w) { if (x != VsOutput::None || y != VsOutput::None || z != VsOutput::None || w != VsOutput::None) { - info.outputs.emplace_back(Shader::VsOutputMap{x, y, z, w}); + info.outputs[info.num_outputs++] = Shader::VsOutputMap{x, y, z, w}; } }; // VS_OUT_MISC_VEC @@ -84,18 +83,21 @@ void GatherVertexOutputs(Shader::VertexRuntimeInfo& info, Shader::RuntimeInfo PipelineCache::BuildRuntimeInfo(Shader::Stage stage) { auto info = Shader::RuntimeInfo{stage}; const auto& regs = liverpool->regs; + const auto BuildCommon = [&](const auto& program) { + info.num_user_data = program.settings.num_user_regs; + info.num_input_vgprs = program.settings.vgpr_comp_cnt; + info.num_allocated_vgprs = program.settings.num_vgprs * 4; + info.fp_denorm_mode32 = program.settings.fp_denorm_mode32; + info.fp_round_mode32 = program.settings.fp_round_mode32; + }; switch (stage) { case Shader::Stage::Export: { - info.num_user_data = regs.es_program.settings.num_user_regs; - info.num_input_vgprs = regs.es_program.settings.vgpr_comp_cnt; - info.num_allocated_vgprs = regs.es_program.settings.num_vgprs * 4; + BuildCommon(regs.es_program); info.es_info.vertex_data_size = regs.vgt_esgs_ring_itemsize; break; } case Shader::Stage::Vertex: { - info.num_user_data = regs.vs_program.settings.num_user_regs; - info.num_input_vgprs = regs.vs_program.settings.vgpr_comp_cnt; - info.num_allocated_vgprs = regs.vs_program.settings.num_vgprs * 4; + BuildCommon(regs.vs_program); GatherVertexOutputs(info.vs_info, regs.vs_output_control); info.vs_info.emulate_depth_negative_one_to_one = !instance.IsDepthClipControlSupported() && @@ -103,39 +105,35 @@ Shader::RuntimeInfo PipelineCache::BuildRuntimeInfo(Shader::Stage stage) { break; } case Shader::Stage::Geometry: { - info.num_user_data = regs.gs_program.settings.num_user_regs; - info.num_input_vgprs = regs.gs_program.settings.vgpr_comp_cnt; - info.num_allocated_vgprs = regs.gs_program.settings.num_vgprs * 4; - info.gs_info.output_vertices = regs.vgt_gs_max_vert_out; - info.gs_info.num_invocations = + BuildCommon(regs.gs_program); + auto& gs_info = info.gs_info; + gs_info.output_vertices = regs.vgt_gs_max_vert_out; + gs_info.num_invocations = regs.vgt_gs_instance_cnt.IsEnabled() ? regs.vgt_gs_instance_cnt.count : 1; - info.gs_info.in_primitive = regs.primitive_type; + gs_info.in_primitive = regs.primitive_type; for (u32 stream_id = 0; stream_id < Shader::GsMaxOutputStreams; ++stream_id) { - info.gs_info.out_primitive[stream_id] = + gs_info.out_primitive[stream_id] = regs.vgt_gs_out_prim_type.GetPrimitiveType(stream_id); } - info.gs_info.in_vertex_data_size = regs.vgt_esgs_ring_itemsize; - info.gs_info.out_vertex_data_size = regs.vgt_gs_vert_itemsize[0]; - - // Extract semantics offsets from a copy shader - const auto vc_stage = Shader::Stage::Vertex; - const auto* pgm_vc = regs.ProgramForStage(static_cast(vc_stage)); - const auto params_vc = Liverpool::GetParams(*pgm_vc); - DumpShader(params_vc.code, params_vc.hash, Shader::Stage::Vertex, 0, "copy.bin"); - info.gs_info.copy_data = Shader::ParseCopyShader(params_vc.code); + gs_info.in_vertex_data_size = regs.vgt_esgs_ring_itemsize; + gs_info.out_vertex_data_size = regs.vgt_gs_vert_itemsize[0]; + const auto params_vc = Liverpool::GetParams(regs.vs_program); + gs_info.vs_copy = params_vc.code; + gs_info.vs_copy_hash = params_vc.hash; + DumpShader(gs_info.vs_copy, gs_info.vs_copy_hash, Shader::Stage::Vertex, 0, "copy.bin"); break; } case Shader::Stage::Fragment: { - info.num_user_data = regs.ps_program.settings.num_user_regs; - info.num_allocated_vgprs = regs.ps_program.settings.num_vgprs * 4; + BuildCommon(regs.ps_program); const auto& ps_inputs = regs.ps_inputs; + info.fs_info.num_inputs = regs.num_interp; for (u32 i = 0; i < regs.num_interp; i++) { - info.fs_info.inputs.push_back({ + info.fs_info.inputs[i] = { .param_index = u8(ps_inputs[i].input_offset.Value()), .is_default = bool(ps_inputs[i].use_default), .is_flat = bool(ps_inputs[i].flat_shade), .default_value = u8(ps_inputs[i].default_value), - }); + }; } for (u32 i = 0; i < Shader::MaxColorBuffers; i++) { info.fs_info.color_buffers[i] = { @@ -166,9 +164,12 @@ PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_, AmdGpu::Liverpool* liverpool_) : instance{instance_}, scheduler{scheduler_}, liverpool{liverpool_}, desc_heap{instance, scheduler.GetMasterSemaphore(), DescriptorHeapSizes} { + const auto& vk12_props = instance.GetVk12Properties(); profile = Shader::Profile{ .supported_spirv = instance.ApiVersion() >= VK_API_VERSION_1_3 ? 0x00010600U : 0x00010500U, .subgroup_size = instance.SubgroupSize(), + .support_fp32_denorm_preserve = bool(vk12_props.shaderDenormPreserveFloat32), + .support_fp32_denorm_flush = bool(vk12_props.shaderDenormFlushToZeroFloat32), .support_explicit_workgroup_layout = true, }; auto [cache_result, cache] = instance.GetDevice().createPipelineCacheUnique({});