From 5040be164090a10f79a505d2c709ca426fec69a4 Mon Sep 17 00:00:00 2001 From: squidbus <175574877+squidbus@users.noreply.github.com> Date: Tue, 14 Jan 2025 21:48:40 -0800 Subject: [PATCH] renderer_vulkan: Handle depth-stencil copies through depth render overrides. (#2134) --- src/core/devtools/widget/reg_popup.cpp | 3 +- src/core/devtools/widget/reg_view.cpp | 4 +- src/video_core/amdgpu/liverpool.h | 65 ++++++++++++++++- .../renderer_vulkan/vk_rasterizer.cpp | 73 +++++++++++++++++++ .../renderer_vulkan/vk_rasterizer.h | 1 + src/video_core/texture_cache/image_info.cpp | 7 +- src/video_core/texture_cache/image_info.h | 2 +- src/video_core/texture_cache/texture_cache.h | 4 +- 8 files changed, 146 insertions(+), 13 deletions(-) diff --git a/src/core/devtools/widget/reg_popup.cpp b/src/core/devtools/widget/reg_popup.cpp index fae620901..7bb38df24 100644 --- a/src/core/devtools/widget/reg_popup.cpp +++ b/src/core/devtools/widget/reg_popup.cpp @@ -105,7 +105,8 @@ void RegPopup::DrawDepthBuffer(const DepthBuffer& depth_data) { "DEPTH_SLICE.TILE_MAX", depth_buffer.depth_slice.tile_max, "Pitch()", depth_buffer.Pitch(), "Height()", depth_buffer.Height(), - "Address()", depth_buffer.Address(), + "DepthAddress()", depth_buffer.DepthAddress(), + "StencilAddress()", depth_buffer.StencilAddress(), "NumSamples()", depth_buffer.NumSamples(), "NumBits()", depth_buffer.NumBits(), "GetDepthSliceSize()", depth_buffer.GetDepthSliceSize() diff --git a/src/core/devtools/widget/reg_view.cpp b/src/core/devtools/widget/reg_view.cpp index a1b7937df..fa3c5e3e6 100644 --- a/src/core/devtools/widget/reg_view.cpp +++ b/src/core/devtools/widget/reg_view.cpp @@ -155,7 +155,7 @@ void RegView::DrawGraphicsRegs() { TableNextColumn(); TextUnformatted("Depth buffer"); TableNextColumn(); - if (regs.depth_buffer.Address() == 0 || !regs.depth_control.depth_enable) { + if (regs.depth_buffer.DepthAddress() == 0 || !regs.depth_control.depth_enable) { TextUnformatted("N/A"); } else { const char* text = last_selected_cb == depth_id && default_reg_popup.open ? "x" : "->"; @@ -241,7 +241,7 @@ void RegView::SetData(DebugStateType::RegDump _data, const std::string& base_tit default_reg_popup.open = false; if (last_selected_cb == depth_id) { const auto& has_depth = - regs.depth_buffer.Address() != 0 && regs.depth_control.depth_enable; + regs.depth_buffer.DepthAddress() != 0 && regs.depth_control.depth_enable; if (has_depth) { default_reg_popup.SetData(title, regs.depth_buffer, regs.depth_control); default_reg_popup.open = true; diff --git a/src/video_core/amdgpu/liverpool.h b/src/video_core/amdgpu/liverpool.h index 070253ca9..a29bde4ce 100644 --- a/src/video_core/amdgpu/liverpool.h +++ b/src/video_core/amdgpu/liverpool.h @@ -429,11 +429,19 @@ struct Liverpool { } depth_slice; bool DepthValid() const { - return Address() != 0 && z_info.format != ZFormat::Invalid; + return DepthAddress() != 0 && z_info.format != ZFormat::Invalid; } bool StencilValid() const { - return Address() != 0 && stencil_info.format != StencilFormat::Invalid; + return StencilAddress() != 0 && stencil_info.format != StencilFormat::Invalid; + } + + bool DepthWriteValid() const { + return DepthWriteAddress() != 0 && z_info.format != ZFormat::Invalid; + } + + bool StencilWriteValid() const { + return StencilWriteAddress() != 0 && stencil_info.format != StencilFormat::Invalid; } u32 Pitch() const { @@ -444,7 +452,7 @@ struct Liverpool { return (depth_size.height_tile_max + 1) << 3; } - u64 Address() const { + u64 DepthAddress() const { return u64(z_read_base) << 8; } @@ -452,6 +460,14 @@ struct Liverpool { return u64(stencil_read_base) << 8; } + u64 DepthWriteAddress() const { + return u64(z_write_base) << 8; + } + + u64 StencilWriteAddress() const { + return u64(stencil_write_base) << 8; + } + u32 NumSamples() const { return 1u << z_info.num_samples; // spec doesn't say it is a log2 } @@ -1008,6 +1024,46 @@ struct Liverpool { } }; + enum class ForceEnable : u32 { + Off = 0, + Enable = 1, + Disable = 2, + }; + + enum class ForceSumm : u32 { + Off = 0, + MinZ = 1, + MaxZ = 2, + Both = 3, + }; + + union DepthRenderOverride { + u32 raw; + BitField<0, 2, ForceEnable> force_hiz_enable; + BitField<2, 2, ForceEnable> force_his_enable0; + BitField<4, 2, ForceEnable> force_his_enable1; + BitField<6, 1, u32> force_shader_z_order; + BitField<7, 1, u32> fast_z_disable; + BitField<8, 1, u32> fast_stencil_disable; + BitField<9, 1, u32> noop_cull_disable; + BitField<10, 1, u32> force_color_kill; + BitField<11, 1, u32> force_z_read; + BitField<12, 1, u32> force_stencil_read; + BitField<13, 2, ForceEnable> force_full_z_range; + BitField<15, 1, u32> force_qc_smask_conflict; + BitField<16, 1, u32> disable_viewport_clamp; + BitField<17, 1, u32> ignore_sc_zrange; + BitField<18, 1, u32> disable_fully_covered; + BitField<19, 2, ForceSumm> force_z_limit_summ; + BitField<21, 5, u32> max_tiles_in_dtt; + BitField<26, 1, u32> disable_tile_rate_tiles; + BitField<27, 1, u32> force_z_dirty; + BitField<28, 1, u32> force_stencil_dirty; + BitField<29, 1, u32> force_z_valid; + BitField<30, 1, u32> force_stencil_valid; + BitField<31, 1, u32> preserve_compression; + }; + union AaConfig { BitField<0, 3, u32> msaa_num_samples; BitField<4, 1, u32> aa_mask_centroid_dtmn; @@ -1209,7 +1265,8 @@ struct Liverpool { DepthRenderControl depth_render_control; INSERT_PADDING_WORDS(1); DepthView depth_view; - INSERT_PADDING_WORDS(2); + DepthRenderOverride depth_render_override; + INSERT_PADDING_WORDS(1); Address depth_htile_data_base; INSERT_PADDING_WORDS(2); float depth_bounds_min; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 920e09131..06cfbedac 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -70,6 +70,26 @@ bool Rasterizer::FilterDraw() { return false; } + const bool cb_disabled = + regs.color_control.mode == AmdGpu::Liverpool::ColorControl::OperationMode::Disable; + const auto depth_copy = + regs.depth_render_override.force_z_dirty && regs.depth_render_override.force_z_valid && + regs.depth_buffer.DepthValid() && regs.depth_buffer.DepthWriteValid() && + regs.depth_buffer.DepthAddress() != regs.depth_buffer.DepthWriteAddress(); + const auto stencil_copy = + regs.depth_render_override.force_stencil_dirty && + regs.depth_render_override.force_stencil_valid && regs.depth_buffer.StencilValid() && + regs.depth_buffer.StencilWriteValid() && + regs.depth_buffer.StencilAddress() != regs.depth_buffer.StencilWriteAddress(); + if (cb_disabled && (depth_copy || stencil_copy)) { + // Games may disable color buffer and enable force depth/stencil dirty and valid to + // do a copy from one depth-stencil surface to another, without a pixel shader. + // We need to detect this case and perform the copy, otherwise it will have no effect. + LOG_TRACE(Render_Vulkan, "Performing depth-stencil override copy"); + DepthStencilCopy(depth_copy, stencil_copy); + return false; + } + return true; } @@ -899,6 +919,59 @@ void Rasterizer::Resolve() { } } +void Rasterizer::DepthStencilCopy(bool is_depth, bool is_stencil) { + auto& regs = liverpool->regs; + + auto read_desc = VideoCore::TextureCache::DepthTargetDesc( + regs.depth_buffer, regs.depth_view, regs.depth_control, + regs.depth_htile_data_base.GetAddress(), liverpool->last_db_extent, false); + auto write_desc = VideoCore::TextureCache::DepthTargetDesc( + regs.depth_buffer, regs.depth_view, regs.depth_control, + regs.depth_htile_data_base.GetAddress(), liverpool->last_db_extent, true); + + auto& read_image = texture_cache.GetImage(texture_cache.FindImage(read_desc)); + auto& write_image = texture_cache.GetImage(texture_cache.FindImage(write_desc)); + + VideoCore::SubresourceRange sub_range; + sub_range.base.layer = liverpool->regs.depth_view.slice_start; + sub_range.extent.layers = liverpool->regs.depth_view.NumSlices() - sub_range.base.layer; + + read_image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits2::eTransferRead, + sub_range); + write_image.Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits2::eTransferWrite, + sub_range); + + auto aspect_mask = vk::ImageAspectFlags(0); + if (is_depth) { + aspect_mask |= vk::ImageAspectFlagBits::eDepth; + } + if (is_stencil) { + aspect_mask |= vk::ImageAspectFlagBits::eStencil; + } + vk::ImageCopy region = { + .srcSubresource = + { + .aspectMask = aspect_mask, + .mipLevel = 0, + .baseArrayLayer = sub_range.base.layer, + .layerCount = sub_range.extent.layers, + }, + .srcOffset = {0, 0, 0}, + .dstSubresource = + { + .aspectMask = aspect_mask, + .mipLevel = 0, + .baseArrayLayer = sub_range.base.layer, + .layerCount = sub_range.extent.layers, + }, + .dstOffset = {0, 0, 0}, + .extent = {write_image.info.size.width, write_image.info.size.height, 1}, + }; + const auto cmdbuf = scheduler.CommandBuffer(); + cmdbuf.copyImage(read_image.image, vk::ImageLayout::eTransferSrcOptimal, write_image.image, + vk::ImageLayout::eTransferDstOptimal, region); +} + void Rasterizer::InlineData(VAddr address, const void* value, u32 num_bytes, bool is_gds) { buffer_cache.InlineData(address, value, num_bytes, is_gds); } diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 2905c5ddb..1e4a210bb 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -71,6 +71,7 @@ private: RenderState PrepareRenderState(u32 mrt_mask); void BeginRendering(const GraphicsPipeline& pipeline, RenderState& state); void Resolve(); + void DepthStencilCopy(bool is_depth, bool is_stencil); void EliminateFastClear(); void UpdateDynamicState(const GraphicsPipeline& pipeline); diff --git a/src/video_core/texture_cache/image_info.cpp b/src/video_core/texture_cache/image_info.cpp index 1992f1fb7..07a0488f3 100644 --- a/src/video_core/texture_cache/image_info.cpp +++ b/src/video_core/texture_cache/image_info.cpp @@ -98,7 +98,8 @@ ImageInfo::ImageInfo(const AmdGpu::Liverpool::ColorBuffer& buffer, } ImageInfo::ImageInfo(const AmdGpu::Liverpool::DepthBuffer& buffer, u32 num_slices, - VAddr htile_address, const AmdGpu::Liverpool::CbDbExtent& hint) noexcept { + VAddr htile_address, const AmdGpu::Liverpool::CbDbExtent& hint, + bool write_buffer) noexcept { props.is_tiled = false; pixel_format = LiverpoolToVK::DepthFormat(buffer.z_info.format, buffer.stencil_info.format); type = vk::ImageType::e2D; @@ -111,10 +112,10 @@ ImageInfo::ImageInfo(const AmdGpu::Liverpool::DepthBuffer& buffer, u32 num_slice resources.layers = num_slices; meta_info.htile_addr = buffer.z_info.tile_surface_en ? htile_address : 0; - stencil_addr = buffer.StencilAddress(); + stencil_addr = write_buffer ? buffer.StencilWriteAddress() : buffer.StencilAddress(); stencil_size = pitch * size.height * sizeof(u8); - guest_address = buffer.Address(); + guest_address = write_buffer ? buffer.DepthWriteAddress() : buffer.DepthAddress(); const auto depth_slice_sz = buffer.GetDepthSliceSize(); guest_size = depth_slice_sz * num_slices; mips_layout.emplace_back(depth_slice_sz, pitch, 0); diff --git a/src/video_core/texture_cache/image_info.h b/src/video_core/texture_cache/image_info.h index 123540c1e..dad0e751e 100644 --- a/src/video_core/texture_cache/image_info.h +++ b/src/video_core/texture_cache/image_info.h @@ -19,7 +19,7 @@ struct ImageInfo { ImageInfo(const AmdGpu::Liverpool::ColorBuffer& buffer, const AmdGpu::Liverpool::CbDbExtent& hint = {}) noexcept; ImageInfo(const AmdGpu::Liverpool::DepthBuffer& buffer, u32 num_slices, VAddr htile_address, - const AmdGpu::Liverpool::CbDbExtent& hint = {}) noexcept; + const AmdGpu::Liverpool::CbDbExtent& hint = {}, bool write_buffer = false) noexcept; ImageInfo(const AmdGpu::Image& image, const Shader::ImageResource& desc) noexcept; bool IsTiled() const { diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 69907f000..343a510e6 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -79,9 +79,9 @@ public: DepthTargetDesc(const AmdGpu::Liverpool::DepthBuffer& buffer, const AmdGpu::Liverpool::DepthView& view, const AmdGpu::Liverpool::DepthControl& ctl, VAddr htile_address, - const AmdGpu::Liverpool::CbDbExtent& hint = {}) + const AmdGpu::Liverpool::CbDbExtent& hint = {}, bool write_buffer = false) : BaseDesc{BindingType::DepthTarget, - ImageInfo{buffer, view.NumSlices(), htile_address, hint}, + ImageInfo{buffer, view.NumSlices(), htile_address, hint, write_buffer}, ImageViewInfo{buffer, view, ctl}} {} };