From b09b28c7f3a2191a60d356eb3b7db87afa6adbcb Mon Sep 17 00:00:00 2001 From: TheTurtle <47210458+raphaelthegreat@users.noreply.github.com> Date: Sun, 15 Sep 2024 23:42:14 +0300 Subject: [PATCH] graphics_pipeline: Move some depth configuration to dynamic state (#931) * graphics_pipeline: More proper masking * pipeline_cache: Skip setting depth/stencil fields when test is disabled * pipeline_cache: More fixes to depth stencil state * vk_rasterizer: Use dynamic state for depth bounds and bias * pipeline_cache: Missed depth bias enable * vk_rasterizer: Add stencil dynamic states * thread: Reduce spammy log * Remove some leftover state * pipeline_cache: Allocate pipelines from pools * vk_graphics_pipeline: Remove bindings member Saves about 1KB from each pipeline --- .../libraries/kernel/thread_management.cpp | 2 +- src/video_core/amdgpu/liverpool.h | 10 +++ .../renderer_vulkan/vk_graphics_pipeline.cpp | 43 ++++------- .../renderer_vulkan/vk_graphics_pipeline.h | 16 +--- .../renderer_vulkan/vk_pipeline_cache.cpp | 77 ++++++++----------- .../renderer_vulkan/vk_pipeline_cache.h | 6 +- .../renderer_vulkan/vk_rasterizer.cpp | 37 +++++++++ 7 files changed, 104 insertions(+), 87 deletions(-) diff --git a/src/core/libraries/kernel/thread_management.cpp b/src/core/libraries/kernel/thread_management.cpp index 0da51425..2a44f853 100644 --- a/src/core/libraries/kernel/thread_management.cpp +++ b/src/core/libraries/kernel/thread_management.cpp @@ -654,7 +654,7 @@ int PS4_SYSV_ABI scePthreadCondInit(ScePthreadCond* cond, const ScePthreadCondat int result = pthread_cond_init(&(*cond)->cond, &(*attr)->cond_attr); if (name != nullptr) { - LOG_INFO(Kernel_Pthread, "name={}, result={}", (*cond)->name, result); + LOG_TRACE(Kernel_Pthread, "name={}, result={}", (*cond)->name, result); } switch (result) { diff --git a/src/video_core/amdgpu/liverpool.h b/src/video_core/amdgpu/liverpool.h index 89723ccb..411b25ed 100644 --- a/src/video_core/amdgpu/liverpool.h +++ b/src/video_core/amdgpu/liverpool.h @@ -473,6 +473,11 @@ struct Liverpool { CullMode CullingMode() const { return static_cast(cull_front | cull_back << 1); } + + bool NeedsBias() const { + return enable_polygon_offset_back || enable_polygon_offset_front || + enable_polygon_offset_para; + } }; union VsOutputConfig { @@ -506,6 +511,11 @@ struct Liverpool { u32 GetMask(int buf_id) const { return (raw >> (buf_id * 4)) & 0xfu; } + + void SetMask(int buf_id, u32 mask) { + raw &= ~(0xf << (buf_id * 4)); + raw |= (mask << (buf_id * 4)); + } }; struct IndexBufferBase { diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 283c8536..f7474b24 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -95,9 +95,6 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul ? vk::FrontFace::eClockwise : vk::FrontFace::eCounterClockwise, .depthBiasEnable = bool(key.depth_bias_enable), - .depthBiasConstantFactor = key.depth_bias_const_factor, - .depthBiasClamp = key.depth_bias_clamp, - .depthBiasSlopeFactor = key.depth_bias_slope_factor, .lineWidth = 1.0f, }; @@ -134,9 +131,10 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul }; boost::container::static_vector dynamic_states = { - vk::DynamicState::eViewport, - vk::DynamicState::eScissor, - vk::DynamicState::eBlendConstants, + vk::DynamicState::eViewport, vk::DynamicState::eScissor, + vk::DynamicState::eBlendConstants, vk::DynamicState::eDepthBounds, + vk::DynamicState::eDepthBias, vk::DynamicState::eStencilReference, + vk::DynamicState::eStencilCompareMask, vk::DynamicState::eStencilWriteMask, }; if (instance.IsColorWriteEnableSupported()) { @@ -153,39 +151,31 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul }; const vk::PipelineDepthStencilStateCreateInfo depth_info = { - .depthTestEnable = key.depth.depth_enable, - .depthWriteEnable = key.depth.depth_write_enable, - .depthCompareOp = LiverpoolToVK::CompareOp(key.depth.depth_func), - .depthBoundsTestEnable = key.depth.depth_bounds_enable, - .stencilTestEnable = key.depth.stencil_enable, + .depthTestEnable = key.depth_stencil.depth_enable, + .depthWriteEnable = key.depth_stencil.depth_write_enable, + .depthCompareOp = LiverpoolToVK::CompareOp(key.depth_stencil.depth_func), + .depthBoundsTestEnable = key.depth_stencil.depth_bounds_enable, + .stencilTestEnable = key.depth_stencil.stencil_enable, .front{ .failOp = LiverpoolToVK::StencilOp(key.stencil.stencil_fail_front), .passOp = LiverpoolToVK::StencilOp(key.stencil.stencil_zpass_front), .depthFailOp = LiverpoolToVK::StencilOp(key.stencil.stencil_zfail_front), - .compareOp = LiverpoolToVK::CompareOp(key.depth.stencil_ref_func), - .compareMask = key.stencil_ref_front.stencil_mask, - .writeMask = key.stencil_ref_front.stencil_write_mask, - .reference = key.stencil_ref_front.stencil_test_val, + .compareOp = LiverpoolToVK::CompareOp(key.depth_stencil.stencil_ref_func), }, .back{ - .failOp = LiverpoolToVK::StencilOp(key.depth.backface_enable + .failOp = LiverpoolToVK::StencilOp(key.depth_stencil.backface_enable ? key.stencil.stencil_fail_back.Value() : key.stencil.stencil_fail_front.Value()), - .passOp = LiverpoolToVK::StencilOp(key.depth.backface_enable + .passOp = LiverpoolToVK::StencilOp(key.depth_stencil.backface_enable ? key.stencil.stencil_zpass_back.Value() : key.stencil.stencil_zpass_front.Value()), - .depthFailOp = LiverpoolToVK::StencilOp(key.depth.backface_enable + .depthFailOp = LiverpoolToVK::StencilOp(key.depth_stencil.backface_enable ? key.stencil.stencil_zfail_back.Value() : key.stencil.stencil_zfail_front.Value()), - .compareOp = LiverpoolToVK::CompareOp(key.depth.backface_enable - ? key.depth.stencil_bf_func.Value() - : key.depth.stencil_ref_func.Value()), - .compareMask = key.stencil_ref_back.stencil_mask, - .writeMask = key.stencil_ref_back.stencil_write_mask, - .reference = key.stencil_ref_back.stencil_test_val, + .compareOp = LiverpoolToVK::CompareOp(key.depth_stencil.backface_enable + ? key.depth_stencil.stencil_bf_func.Value() + : key.depth_stencil.stencil_ref_func.Value()), }, - .minDepthBounds = key.depth_bounds_min, - .maxDepthBounds = key.depth_bounds_max, }; auto stage = u32(Shader::Stage::Vertex); @@ -302,6 +292,7 @@ GraphicsPipeline::~GraphicsPipeline() = default; void GraphicsPipeline::BuildDescSetLayout() { u32 binding{}; + boost::container::small_vector bindings; for (const auto* stage : stages) { if (!stage) { continue; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index 7778c417..c8a08b4f 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -30,17 +30,10 @@ struct GraphicsPipelineKey { vk::Format depth_format; vk::Format stencil_format; - Liverpool::DepthControl depth; - float depth_bounds_min; - float depth_bounds_max; - float depth_bias_const_factor; - float depth_bias_slope_factor; - float depth_bias_clamp; + Liverpool::DepthControl depth_stencil; u32 depth_bias_enable; - u32 num_samples = 1; + u32 num_samples; Liverpool::StencilControl stencil; - Liverpool::StencilRefMask stencil_ref_front; - Liverpool::StencilRefMask stencil_ref_back; Liverpool::PrimitiveType prim_type; u32 enable_primitive_restart; u32 primitive_restart_index; @@ -48,7 +41,7 @@ struct GraphicsPipelineKey { Liverpool::CullMode cull_mode; Liverpool::FrontFace front_face; Liverpool::ClipSpace clip_space; - Liverpool::ColorBufferMask cb_shader_mask{}; + Liverpool::ColorBufferMask cb_shader_mask; std::array blend_controls; std::array write_masks; @@ -91,7 +84,7 @@ public: } bool IsDepthEnabled() const { - return key.depth.depth_enable.Value(); + return key.depth_stencil.depth_enable.Value(); } private: @@ -107,7 +100,6 @@ private: std::array stages{}; GraphicsPipelineKey key; bool uses_push_descriptors{}; - boost::container::small_vector bindings; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index fc42d12f..7f6079a5 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -167,11 +167,10 @@ const GraphicsPipeline* PipelineCache::GetGraphicsPipeline() { } const auto [it, is_new] = graphics_pipelines.try_emplace(graphics_key); if (is_new) { - it.value() = std::make_unique( - instance, scheduler, desc_heap, graphics_key, *pipeline_cache, infos, modules); + it.value() = graphics_pipeline_pool.Create(instance, scheduler, desc_heap, graphics_key, + *pipeline_cache, infos, modules); } - const GraphicsPipeline* pipeline = it->second.get(); - return pipeline; + return it->second; } const ComputePipeline* PipelineCache::GetComputePipeline() { @@ -180,11 +179,10 @@ const ComputePipeline* PipelineCache::GetComputePipeline() { } const auto [it, is_new] = compute_pipelines.try_emplace(compute_key); if (is_new) { - it.value() = std::make_unique( - instance, scheduler, desc_heap, *pipeline_cache, compute_key, *infos[0], modules[0]); + it.value() = compute_pipeline_pool.Create(instance, scheduler, desc_heap, *pipeline_cache, + compute_key, *infos[0], modules[0]); } - const ComputePipeline* pipeline = it->second.get(); - return pipeline; + return it->second; } bool ShouldSkipShader(u64 shader_hash, const char* shader_type) { @@ -197,28 +195,36 @@ bool ShouldSkipShader(u64 shader_hash, const char* shader_type) { } bool PipelineCache::RefreshGraphicsKey() { + std::memset(&graphics_key, 0, sizeof(GraphicsPipelineKey)); + auto& regs = liverpool->regs; auto& key = graphics_key; - key.depth = regs.depth_control; - key.depth.depth_write_enable.Assign(regs.depth_control.depth_write_enable.Value() && - !regs.depth_render_control.depth_clear_enable); - key.depth_bounds_min = regs.depth_bounds_min; - key.depth_bounds_max = regs.depth_bounds_max; - key.depth_bias_enable = regs.polygon_control.enable_polygon_offset_back || - regs.polygon_control.enable_polygon_offset_front || - regs.polygon_control.enable_polygon_offset_para; - if (regs.polygon_control.enable_polygon_offset_front) { - key.depth_bias_const_factor = regs.poly_offset.front_offset; - key.depth_bias_slope_factor = regs.poly_offset.front_scale; + key.depth_stencil = regs.depth_control; + key.depth_stencil.depth_write_enable.Assign(regs.depth_control.depth_write_enable.Value() && + !regs.depth_render_control.depth_clear_enable); + key.depth_bias_enable = regs.polygon_control.NeedsBias(); + + const auto& db = regs.depth_buffer; + const auto ds_format = LiverpoolToVK::DepthFormat(db.z_info.format, db.stencil_info.format); + if (db.z_info.format != AmdGpu::Liverpool::DepthBuffer::ZFormat::Invalid) { + key.depth_format = ds_format; } else { - key.depth_bias_const_factor = regs.poly_offset.back_offset; - key.depth_bias_slope_factor = regs.poly_offset.back_scale; + key.depth_format = vk::Format::eUndefined; + } + if (regs.depth_control.depth_enable) { + key.depth_stencil.depth_enable.Assign(key.depth_format != vk::Format::eUndefined); } - key.depth_bias_clamp = regs.poly_offset.depth_bias; key.stencil = regs.stencil_control; - key.stencil_ref_front = regs.stencil_ref_front; - key.stencil_ref_back = regs.stencil_ref_back; + + if (db.stencil_info.format != AmdGpu::Liverpool::DepthBuffer::StencilFormat::Invalid) { + key.stencil_format = key.depth_format; + } else { + key.stencil_format = vk::Format::eUndefined; + } + if (key.depth_stencil.stencil_enable) { + key.depth_stencil.stencil_enable.Assign(key.stencil_format != vk::Format::eUndefined); + } key.prim_type = regs.primitive_type; key.enable_primitive_restart = regs.enable_primitive_restart & 1; key.primitive_restart_index = regs.primitive_restart_index; @@ -228,27 +234,6 @@ bool PipelineCache::RefreshGraphicsKey() { key.front_face = regs.polygon_control.front_face; key.num_samples = regs.aa_config.NumSamples(); - const auto& db = regs.depth_buffer; - const auto ds_format = LiverpoolToVK::DepthFormat(db.z_info.format, db.stencil_info.format); - - if (db.z_info.format != AmdGpu::Liverpool::DepthBuffer::ZFormat::Invalid) { - key.depth_format = ds_format; - } else { - key.depth_format = vk::Format::eUndefined; - } - if (key.depth.depth_enable) { - key.depth.depth_enable.Assign(key.depth_format != vk::Format::eUndefined); - } - - if (db.stencil_info.format != AmdGpu::Liverpool::DepthBuffer::StencilFormat::Invalid) { - key.stencil_format = key.depth_format; - } else { - key.stencil_format = vk::Format::eUndefined; - } - if (key.depth.stencil_enable) { - key.depth.stencil_enable.Assign(key.stencil_format != vk::Format::eUndefined); - } - const auto skip_cb_binding = regs.color_control.mode == AmdGpu::Liverpool::ColorControl::OperationMode::Disable; @@ -277,7 +262,7 @@ bool PipelineCache::RefreshGraphicsKey() { key.blend_controls[remapped_cb].enable.Assign(key.blend_controls[remapped_cb].enable && !col_buf.info.blend_bypass); key.write_masks[remapped_cb] = vk::ColorComponentFlags{regs.color_target_mask.GetMask(cb)}; - key.cb_shader_mask = regs.color_shader_mask; + key.cb_shader_mask.SetMask(remapped_cb, regs.color_shader_mask.GetMask(cb)); ++remapped_cb; } diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index 92dcf826..7f0064fb 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -74,8 +74,10 @@ private: Shader::Pools pools; tsl::robin_map program_cache; Common::ObjectPool program_pool; - tsl::robin_map> compute_pipelines; - tsl::robin_map> graphics_pipelines; + Common::ObjectPool graphics_pipeline_pool; + Common::ObjectPool compute_pipeline_pool; + tsl::robin_map compute_pipelines; + tsl::robin_map graphics_pipelines; std::array infos{}; std::array modules{}; GraphicsPipelineKey graphics_key{}; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 6344315a..23f60da1 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -297,6 +297,43 @@ void Rasterizer::UpdateDynamicState(const GraphicsPipeline& pipeline) { cmdbuf.setColorWriteEnableEXT(write_ens); cmdbuf.setColorWriteMaskEXT(0, write_masks); } + if (regs.depth_control.depth_bounds_enable) { + cmdbuf.setDepthBounds(regs.depth_bounds_min, regs.depth_bounds_max); + } + if (regs.polygon_control.NeedsBias()) { + if (regs.polygon_control.enable_polygon_offset_front) { + cmdbuf.setDepthBias(regs.poly_offset.front_offset, regs.poly_offset.depth_bias, + regs.poly_offset.front_scale); + } else { + cmdbuf.setDepthBias(regs.poly_offset.back_offset, regs.poly_offset.depth_bias, + regs.poly_offset.back_scale); + } + } + if (regs.depth_control.stencil_enable) { + const auto front = regs.stencil_ref_front; + const auto back = regs.stencil_ref_back; + if (front.stencil_test_val == back.stencil_test_val) { + cmdbuf.setStencilReference(vk::StencilFaceFlagBits::eFrontAndBack, + front.stencil_test_val); + } else { + cmdbuf.setStencilReference(vk::StencilFaceFlagBits::eFront, front.stencil_test_val); + cmdbuf.setStencilReference(vk::StencilFaceFlagBits::eBack, back.stencil_test_val); + } + if (front.stencil_write_mask == back.stencil_write_mask) { + cmdbuf.setStencilWriteMask(vk::StencilFaceFlagBits::eFrontAndBack, + front.stencil_write_mask); + } else { + cmdbuf.setStencilWriteMask(vk::StencilFaceFlagBits::eFront, front.stencil_write_mask); + cmdbuf.setStencilWriteMask(vk::StencilFaceFlagBits::eBack, back.stencil_write_mask); + } + if (front.stencil_mask == back.stencil_mask) { + cmdbuf.setStencilCompareMask(vk::StencilFaceFlagBits::eFrontAndBack, + front.stencil_mask); + } else { + cmdbuf.setStencilCompareMask(vk::StencilFaceFlagBits::eFront, front.stencil_mask); + cmdbuf.setStencilCompareMask(vk::StencilFaceFlagBits::eBack, back.stencil_mask); + } + } } void Rasterizer::UpdateViewportScissorState() {