renderer_vulkan: Handle depth-stencil copies through depth render overrides. (#2134)

This commit is contained in:
squidbus 2025-01-14 21:48:40 -08:00 committed by GitHub
parent d94abffd9a
commit 5040be1640
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 146 additions and 13 deletions

View file

@ -105,7 +105,8 @@ void RegPopup::DrawDepthBuffer(const DepthBuffer& depth_data) {
"DEPTH_SLICE.TILE_MAX", depth_buffer.depth_slice.tile_max,
"Pitch()", depth_buffer.Pitch(),
"Height()", depth_buffer.Height(),
"Address()", depth_buffer.Address(),
"DepthAddress()", depth_buffer.DepthAddress(),
"StencilAddress()", depth_buffer.StencilAddress(),
"NumSamples()", depth_buffer.NumSamples(),
"NumBits()", depth_buffer.NumBits(),
"GetDepthSliceSize()", depth_buffer.GetDepthSliceSize()

View file

@ -155,7 +155,7 @@ void RegView::DrawGraphicsRegs() {
TableNextColumn();
TextUnformatted("Depth buffer");
TableNextColumn();
if (regs.depth_buffer.Address() == 0 || !regs.depth_control.depth_enable) {
if (regs.depth_buffer.DepthAddress() == 0 || !regs.depth_control.depth_enable) {
TextUnformatted("N/A");
} else {
const char* text = last_selected_cb == depth_id && default_reg_popup.open ? "x" : "->";
@ -241,7 +241,7 @@ void RegView::SetData(DebugStateType::RegDump _data, const std::string& base_tit
default_reg_popup.open = false;
if (last_selected_cb == depth_id) {
const auto& has_depth =
regs.depth_buffer.Address() != 0 && regs.depth_control.depth_enable;
regs.depth_buffer.DepthAddress() != 0 && regs.depth_control.depth_enable;
if (has_depth) {
default_reg_popup.SetData(title, regs.depth_buffer, regs.depth_control);
default_reg_popup.open = true;

View file

@ -429,11 +429,19 @@ struct Liverpool {
} depth_slice;
bool DepthValid() const {
return Address() != 0 && z_info.format != ZFormat::Invalid;
return DepthAddress() != 0 && z_info.format != ZFormat::Invalid;
}
bool StencilValid() const {
return Address() != 0 && stencil_info.format != StencilFormat::Invalid;
return StencilAddress() != 0 && stencil_info.format != StencilFormat::Invalid;
}
bool DepthWriteValid() const {
return DepthWriteAddress() != 0 && z_info.format != ZFormat::Invalid;
}
bool StencilWriteValid() const {
return StencilWriteAddress() != 0 && stencil_info.format != StencilFormat::Invalid;
}
u32 Pitch() const {
@ -444,7 +452,7 @@ struct Liverpool {
return (depth_size.height_tile_max + 1) << 3;
}
u64 Address() const {
u64 DepthAddress() const {
return u64(z_read_base) << 8;
}
@ -452,6 +460,14 @@ struct Liverpool {
return u64(stencil_read_base) << 8;
}
u64 DepthWriteAddress() const {
return u64(z_write_base) << 8;
}
u64 StencilWriteAddress() const {
return u64(stencil_write_base) << 8;
}
u32 NumSamples() const {
return 1u << z_info.num_samples; // spec doesn't say it is a log2
}
@ -1008,6 +1024,46 @@ struct Liverpool {
}
};
enum class ForceEnable : u32 {
Off = 0,
Enable = 1,
Disable = 2,
};
enum class ForceSumm : u32 {
Off = 0,
MinZ = 1,
MaxZ = 2,
Both = 3,
};
union DepthRenderOverride {
u32 raw;
BitField<0, 2, ForceEnable> force_hiz_enable;
BitField<2, 2, ForceEnable> force_his_enable0;
BitField<4, 2, ForceEnable> force_his_enable1;
BitField<6, 1, u32> force_shader_z_order;
BitField<7, 1, u32> fast_z_disable;
BitField<8, 1, u32> fast_stencil_disable;
BitField<9, 1, u32> noop_cull_disable;
BitField<10, 1, u32> force_color_kill;
BitField<11, 1, u32> force_z_read;
BitField<12, 1, u32> force_stencil_read;
BitField<13, 2, ForceEnable> force_full_z_range;
BitField<15, 1, u32> force_qc_smask_conflict;
BitField<16, 1, u32> disable_viewport_clamp;
BitField<17, 1, u32> ignore_sc_zrange;
BitField<18, 1, u32> disable_fully_covered;
BitField<19, 2, ForceSumm> force_z_limit_summ;
BitField<21, 5, u32> max_tiles_in_dtt;
BitField<26, 1, u32> disable_tile_rate_tiles;
BitField<27, 1, u32> force_z_dirty;
BitField<28, 1, u32> force_stencil_dirty;
BitField<29, 1, u32> force_z_valid;
BitField<30, 1, u32> force_stencil_valid;
BitField<31, 1, u32> preserve_compression;
};
union AaConfig {
BitField<0, 3, u32> msaa_num_samples;
BitField<4, 1, u32> aa_mask_centroid_dtmn;
@ -1209,7 +1265,8 @@ struct Liverpool {
DepthRenderControl depth_render_control;
INSERT_PADDING_WORDS(1);
DepthView depth_view;
INSERT_PADDING_WORDS(2);
DepthRenderOverride depth_render_override;
INSERT_PADDING_WORDS(1);
Address depth_htile_data_base;
INSERT_PADDING_WORDS(2);
float depth_bounds_min;

View file

@ -70,6 +70,26 @@ bool Rasterizer::FilterDraw() {
return false;
}
const bool cb_disabled =
regs.color_control.mode == AmdGpu::Liverpool::ColorControl::OperationMode::Disable;
const auto depth_copy =
regs.depth_render_override.force_z_dirty && regs.depth_render_override.force_z_valid &&
regs.depth_buffer.DepthValid() && regs.depth_buffer.DepthWriteValid() &&
regs.depth_buffer.DepthAddress() != regs.depth_buffer.DepthWriteAddress();
const auto stencil_copy =
regs.depth_render_override.force_stencil_dirty &&
regs.depth_render_override.force_stencil_valid && regs.depth_buffer.StencilValid() &&
regs.depth_buffer.StencilWriteValid() &&
regs.depth_buffer.StencilAddress() != regs.depth_buffer.StencilWriteAddress();
if (cb_disabled && (depth_copy || stencil_copy)) {
// Games may disable color buffer and enable force depth/stencil dirty and valid to
// do a copy from one depth-stencil surface to another, without a pixel shader.
// We need to detect this case and perform the copy, otherwise it will have no effect.
LOG_TRACE(Render_Vulkan, "Performing depth-stencil override copy");
DepthStencilCopy(depth_copy, stencil_copy);
return false;
}
return true;
}
@ -899,6 +919,59 @@ void Rasterizer::Resolve() {
}
}
void Rasterizer::DepthStencilCopy(bool is_depth, bool is_stencil) {
auto& regs = liverpool->regs;
auto read_desc = VideoCore::TextureCache::DepthTargetDesc(
regs.depth_buffer, regs.depth_view, regs.depth_control,
regs.depth_htile_data_base.GetAddress(), liverpool->last_db_extent, false);
auto write_desc = VideoCore::TextureCache::DepthTargetDesc(
regs.depth_buffer, regs.depth_view, regs.depth_control,
regs.depth_htile_data_base.GetAddress(), liverpool->last_db_extent, true);
auto& read_image = texture_cache.GetImage(texture_cache.FindImage(read_desc));
auto& write_image = texture_cache.GetImage(texture_cache.FindImage(write_desc));
VideoCore::SubresourceRange sub_range;
sub_range.base.layer = liverpool->regs.depth_view.slice_start;
sub_range.extent.layers = liverpool->regs.depth_view.NumSlices() - sub_range.base.layer;
read_image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits2::eTransferRead,
sub_range);
write_image.Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits2::eTransferWrite,
sub_range);
auto aspect_mask = vk::ImageAspectFlags(0);
if (is_depth) {
aspect_mask |= vk::ImageAspectFlagBits::eDepth;
}
if (is_stencil) {
aspect_mask |= vk::ImageAspectFlagBits::eStencil;
}
vk::ImageCopy region = {
.srcSubresource =
{
.aspectMask = aspect_mask,
.mipLevel = 0,
.baseArrayLayer = sub_range.base.layer,
.layerCount = sub_range.extent.layers,
},
.srcOffset = {0, 0, 0},
.dstSubresource =
{
.aspectMask = aspect_mask,
.mipLevel = 0,
.baseArrayLayer = sub_range.base.layer,
.layerCount = sub_range.extent.layers,
},
.dstOffset = {0, 0, 0},
.extent = {write_image.info.size.width, write_image.info.size.height, 1},
};
const auto cmdbuf = scheduler.CommandBuffer();
cmdbuf.copyImage(read_image.image, vk::ImageLayout::eTransferSrcOptimal, write_image.image,
vk::ImageLayout::eTransferDstOptimal, region);
}
void Rasterizer::InlineData(VAddr address, const void* value, u32 num_bytes, bool is_gds) {
buffer_cache.InlineData(address, value, num_bytes, is_gds);
}

View file

@ -71,6 +71,7 @@ private:
RenderState PrepareRenderState(u32 mrt_mask);
void BeginRendering(const GraphicsPipeline& pipeline, RenderState& state);
void Resolve();
void DepthStencilCopy(bool is_depth, bool is_stencil);
void EliminateFastClear();
void UpdateDynamicState(const GraphicsPipeline& pipeline);

View file

@ -98,7 +98,8 @@ ImageInfo::ImageInfo(const AmdGpu::Liverpool::ColorBuffer& buffer,
}
ImageInfo::ImageInfo(const AmdGpu::Liverpool::DepthBuffer& buffer, u32 num_slices,
VAddr htile_address, const AmdGpu::Liverpool::CbDbExtent& hint) noexcept {
VAddr htile_address, const AmdGpu::Liverpool::CbDbExtent& hint,
bool write_buffer) noexcept {
props.is_tiled = false;
pixel_format = LiverpoolToVK::DepthFormat(buffer.z_info.format, buffer.stencil_info.format);
type = vk::ImageType::e2D;
@ -111,10 +112,10 @@ ImageInfo::ImageInfo(const AmdGpu::Liverpool::DepthBuffer& buffer, u32 num_slice
resources.layers = num_slices;
meta_info.htile_addr = buffer.z_info.tile_surface_en ? htile_address : 0;
stencil_addr = buffer.StencilAddress();
stencil_addr = write_buffer ? buffer.StencilWriteAddress() : buffer.StencilAddress();
stencil_size = pitch * size.height * sizeof(u8);
guest_address = buffer.Address();
guest_address = write_buffer ? buffer.DepthWriteAddress() : buffer.DepthAddress();
const auto depth_slice_sz = buffer.GetDepthSliceSize();
guest_size = depth_slice_sz * num_slices;
mips_layout.emplace_back(depth_slice_sz, pitch, 0);

View file

@ -19,7 +19,7 @@ struct ImageInfo {
ImageInfo(const AmdGpu::Liverpool::ColorBuffer& buffer,
const AmdGpu::Liverpool::CbDbExtent& hint = {}) noexcept;
ImageInfo(const AmdGpu::Liverpool::DepthBuffer& buffer, u32 num_slices, VAddr htile_address,
const AmdGpu::Liverpool::CbDbExtent& hint = {}) noexcept;
const AmdGpu::Liverpool::CbDbExtent& hint = {}, bool write_buffer = false) noexcept;
ImageInfo(const AmdGpu::Image& image, const Shader::ImageResource& desc) noexcept;
bool IsTiled() const {

View file

@ -79,9 +79,9 @@ public:
DepthTargetDesc(const AmdGpu::Liverpool::DepthBuffer& buffer,
const AmdGpu::Liverpool::DepthView& view,
const AmdGpu::Liverpool::DepthControl& ctl, VAddr htile_address,
const AmdGpu::Liverpool::CbDbExtent& hint = {})
const AmdGpu::Liverpool::CbDbExtent& hint = {}, bool write_buffer = false)
: BaseDesc{BindingType::DepthTarget,
ImageInfo{buffer, view.NumSlices(), htile_address, hint},
ImageInfo{buffer, view.NumSlices(), htile_address, hint, write_buffer},
ImageViewInfo{buffer, view, ctl}} {}
};