mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-01-17 20:18:28 +00:00
renderer_vulkan: Handle depth-stencil copies through depth render overrides. (#2134)
This commit is contained in:
parent
d94abffd9a
commit
5040be1640
|
@ -105,7 +105,8 @@ void RegPopup::DrawDepthBuffer(const DepthBuffer& depth_data) {
|
||||||
"DEPTH_SLICE.TILE_MAX", depth_buffer.depth_slice.tile_max,
|
"DEPTH_SLICE.TILE_MAX", depth_buffer.depth_slice.tile_max,
|
||||||
"Pitch()", depth_buffer.Pitch(),
|
"Pitch()", depth_buffer.Pitch(),
|
||||||
"Height()", depth_buffer.Height(),
|
"Height()", depth_buffer.Height(),
|
||||||
"Address()", depth_buffer.Address(),
|
"DepthAddress()", depth_buffer.DepthAddress(),
|
||||||
|
"StencilAddress()", depth_buffer.StencilAddress(),
|
||||||
"NumSamples()", depth_buffer.NumSamples(),
|
"NumSamples()", depth_buffer.NumSamples(),
|
||||||
"NumBits()", depth_buffer.NumBits(),
|
"NumBits()", depth_buffer.NumBits(),
|
||||||
"GetDepthSliceSize()", depth_buffer.GetDepthSliceSize()
|
"GetDepthSliceSize()", depth_buffer.GetDepthSliceSize()
|
||||||
|
|
|
@ -155,7 +155,7 @@ void RegView::DrawGraphicsRegs() {
|
||||||
TableNextColumn();
|
TableNextColumn();
|
||||||
TextUnformatted("Depth buffer");
|
TextUnformatted("Depth buffer");
|
||||||
TableNextColumn();
|
TableNextColumn();
|
||||||
if (regs.depth_buffer.Address() == 0 || !regs.depth_control.depth_enable) {
|
if (regs.depth_buffer.DepthAddress() == 0 || !regs.depth_control.depth_enable) {
|
||||||
TextUnformatted("N/A");
|
TextUnformatted("N/A");
|
||||||
} else {
|
} else {
|
||||||
const char* text = last_selected_cb == depth_id && default_reg_popup.open ? "x" : "->";
|
const char* text = last_selected_cb == depth_id && default_reg_popup.open ? "x" : "->";
|
||||||
|
@ -241,7 +241,7 @@ void RegView::SetData(DebugStateType::RegDump _data, const std::string& base_tit
|
||||||
default_reg_popup.open = false;
|
default_reg_popup.open = false;
|
||||||
if (last_selected_cb == depth_id) {
|
if (last_selected_cb == depth_id) {
|
||||||
const auto& has_depth =
|
const auto& has_depth =
|
||||||
regs.depth_buffer.Address() != 0 && regs.depth_control.depth_enable;
|
regs.depth_buffer.DepthAddress() != 0 && regs.depth_control.depth_enable;
|
||||||
if (has_depth) {
|
if (has_depth) {
|
||||||
default_reg_popup.SetData(title, regs.depth_buffer, regs.depth_control);
|
default_reg_popup.SetData(title, regs.depth_buffer, regs.depth_control);
|
||||||
default_reg_popup.open = true;
|
default_reg_popup.open = true;
|
||||||
|
|
|
@ -429,11 +429,19 @@ struct Liverpool {
|
||||||
} depth_slice;
|
} depth_slice;
|
||||||
|
|
||||||
bool DepthValid() const {
|
bool DepthValid() const {
|
||||||
return Address() != 0 && z_info.format != ZFormat::Invalid;
|
return DepthAddress() != 0 && z_info.format != ZFormat::Invalid;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool StencilValid() const {
|
bool StencilValid() const {
|
||||||
return Address() != 0 && stencil_info.format != StencilFormat::Invalid;
|
return StencilAddress() != 0 && stencil_info.format != StencilFormat::Invalid;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool DepthWriteValid() const {
|
||||||
|
return DepthWriteAddress() != 0 && z_info.format != ZFormat::Invalid;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool StencilWriteValid() const {
|
||||||
|
return StencilWriteAddress() != 0 && stencil_info.format != StencilFormat::Invalid;
|
||||||
}
|
}
|
||||||
|
|
||||||
u32 Pitch() const {
|
u32 Pitch() const {
|
||||||
|
@ -444,7 +452,7 @@ struct Liverpool {
|
||||||
return (depth_size.height_tile_max + 1) << 3;
|
return (depth_size.height_tile_max + 1) << 3;
|
||||||
}
|
}
|
||||||
|
|
||||||
u64 Address() const {
|
u64 DepthAddress() const {
|
||||||
return u64(z_read_base) << 8;
|
return u64(z_read_base) << 8;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -452,6 +460,14 @@ struct Liverpool {
|
||||||
return u64(stencil_read_base) << 8;
|
return u64(stencil_read_base) << 8;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
u64 DepthWriteAddress() const {
|
||||||
|
return u64(z_write_base) << 8;
|
||||||
|
}
|
||||||
|
|
||||||
|
u64 StencilWriteAddress() const {
|
||||||
|
return u64(stencil_write_base) << 8;
|
||||||
|
}
|
||||||
|
|
||||||
u32 NumSamples() const {
|
u32 NumSamples() const {
|
||||||
return 1u << z_info.num_samples; // spec doesn't say it is a log2
|
return 1u << z_info.num_samples; // spec doesn't say it is a log2
|
||||||
}
|
}
|
||||||
|
@ -1008,6 +1024,46 @@ struct Liverpool {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
enum class ForceEnable : u32 {
|
||||||
|
Off = 0,
|
||||||
|
Enable = 1,
|
||||||
|
Disable = 2,
|
||||||
|
};
|
||||||
|
|
||||||
|
enum class ForceSumm : u32 {
|
||||||
|
Off = 0,
|
||||||
|
MinZ = 1,
|
||||||
|
MaxZ = 2,
|
||||||
|
Both = 3,
|
||||||
|
};
|
||||||
|
|
||||||
|
union DepthRenderOverride {
|
||||||
|
u32 raw;
|
||||||
|
BitField<0, 2, ForceEnable> force_hiz_enable;
|
||||||
|
BitField<2, 2, ForceEnable> force_his_enable0;
|
||||||
|
BitField<4, 2, ForceEnable> force_his_enable1;
|
||||||
|
BitField<6, 1, u32> force_shader_z_order;
|
||||||
|
BitField<7, 1, u32> fast_z_disable;
|
||||||
|
BitField<8, 1, u32> fast_stencil_disable;
|
||||||
|
BitField<9, 1, u32> noop_cull_disable;
|
||||||
|
BitField<10, 1, u32> force_color_kill;
|
||||||
|
BitField<11, 1, u32> force_z_read;
|
||||||
|
BitField<12, 1, u32> force_stencil_read;
|
||||||
|
BitField<13, 2, ForceEnable> force_full_z_range;
|
||||||
|
BitField<15, 1, u32> force_qc_smask_conflict;
|
||||||
|
BitField<16, 1, u32> disable_viewport_clamp;
|
||||||
|
BitField<17, 1, u32> ignore_sc_zrange;
|
||||||
|
BitField<18, 1, u32> disable_fully_covered;
|
||||||
|
BitField<19, 2, ForceSumm> force_z_limit_summ;
|
||||||
|
BitField<21, 5, u32> max_tiles_in_dtt;
|
||||||
|
BitField<26, 1, u32> disable_tile_rate_tiles;
|
||||||
|
BitField<27, 1, u32> force_z_dirty;
|
||||||
|
BitField<28, 1, u32> force_stencil_dirty;
|
||||||
|
BitField<29, 1, u32> force_z_valid;
|
||||||
|
BitField<30, 1, u32> force_stencil_valid;
|
||||||
|
BitField<31, 1, u32> preserve_compression;
|
||||||
|
};
|
||||||
|
|
||||||
union AaConfig {
|
union AaConfig {
|
||||||
BitField<0, 3, u32> msaa_num_samples;
|
BitField<0, 3, u32> msaa_num_samples;
|
||||||
BitField<4, 1, u32> aa_mask_centroid_dtmn;
|
BitField<4, 1, u32> aa_mask_centroid_dtmn;
|
||||||
|
@ -1209,7 +1265,8 @@ struct Liverpool {
|
||||||
DepthRenderControl depth_render_control;
|
DepthRenderControl depth_render_control;
|
||||||
INSERT_PADDING_WORDS(1);
|
INSERT_PADDING_WORDS(1);
|
||||||
DepthView depth_view;
|
DepthView depth_view;
|
||||||
INSERT_PADDING_WORDS(2);
|
DepthRenderOverride depth_render_override;
|
||||||
|
INSERT_PADDING_WORDS(1);
|
||||||
Address depth_htile_data_base;
|
Address depth_htile_data_base;
|
||||||
INSERT_PADDING_WORDS(2);
|
INSERT_PADDING_WORDS(2);
|
||||||
float depth_bounds_min;
|
float depth_bounds_min;
|
||||||
|
|
|
@ -70,6 +70,26 @@ bool Rasterizer::FilterDraw() {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const bool cb_disabled =
|
||||||
|
regs.color_control.mode == AmdGpu::Liverpool::ColorControl::OperationMode::Disable;
|
||||||
|
const auto depth_copy =
|
||||||
|
regs.depth_render_override.force_z_dirty && regs.depth_render_override.force_z_valid &&
|
||||||
|
regs.depth_buffer.DepthValid() && regs.depth_buffer.DepthWriteValid() &&
|
||||||
|
regs.depth_buffer.DepthAddress() != regs.depth_buffer.DepthWriteAddress();
|
||||||
|
const auto stencil_copy =
|
||||||
|
regs.depth_render_override.force_stencil_dirty &&
|
||||||
|
regs.depth_render_override.force_stencil_valid && regs.depth_buffer.StencilValid() &&
|
||||||
|
regs.depth_buffer.StencilWriteValid() &&
|
||||||
|
regs.depth_buffer.StencilAddress() != regs.depth_buffer.StencilWriteAddress();
|
||||||
|
if (cb_disabled && (depth_copy || stencil_copy)) {
|
||||||
|
// Games may disable color buffer and enable force depth/stencil dirty and valid to
|
||||||
|
// do a copy from one depth-stencil surface to another, without a pixel shader.
|
||||||
|
// We need to detect this case and perform the copy, otherwise it will have no effect.
|
||||||
|
LOG_TRACE(Render_Vulkan, "Performing depth-stencil override copy");
|
||||||
|
DepthStencilCopy(depth_copy, stencil_copy);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -899,6 +919,59 @@ void Rasterizer::Resolve() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Rasterizer::DepthStencilCopy(bool is_depth, bool is_stencil) {
|
||||||
|
auto& regs = liverpool->regs;
|
||||||
|
|
||||||
|
auto read_desc = VideoCore::TextureCache::DepthTargetDesc(
|
||||||
|
regs.depth_buffer, regs.depth_view, regs.depth_control,
|
||||||
|
regs.depth_htile_data_base.GetAddress(), liverpool->last_db_extent, false);
|
||||||
|
auto write_desc = VideoCore::TextureCache::DepthTargetDesc(
|
||||||
|
regs.depth_buffer, regs.depth_view, regs.depth_control,
|
||||||
|
regs.depth_htile_data_base.GetAddress(), liverpool->last_db_extent, true);
|
||||||
|
|
||||||
|
auto& read_image = texture_cache.GetImage(texture_cache.FindImage(read_desc));
|
||||||
|
auto& write_image = texture_cache.GetImage(texture_cache.FindImage(write_desc));
|
||||||
|
|
||||||
|
VideoCore::SubresourceRange sub_range;
|
||||||
|
sub_range.base.layer = liverpool->regs.depth_view.slice_start;
|
||||||
|
sub_range.extent.layers = liverpool->regs.depth_view.NumSlices() - sub_range.base.layer;
|
||||||
|
|
||||||
|
read_image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits2::eTransferRead,
|
||||||
|
sub_range);
|
||||||
|
write_image.Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits2::eTransferWrite,
|
||||||
|
sub_range);
|
||||||
|
|
||||||
|
auto aspect_mask = vk::ImageAspectFlags(0);
|
||||||
|
if (is_depth) {
|
||||||
|
aspect_mask |= vk::ImageAspectFlagBits::eDepth;
|
||||||
|
}
|
||||||
|
if (is_stencil) {
|
||||||
|
aspect_mask |= vk::ImageAspectFlagBits::eStencil;
|
||||||
|
}
|
||||||
|
vk::ImageCopy region = {
|
||||||
|
.srcSubresource =
|
||||||
|
{
|
||||||
|
.aspectMask = aspect_mask,
|
||||||
|
.mipLevel = 0,
|
||||||
|
.baseArrayLayer = sub_range.base.layer,
|
||||||
|
.layerCount = sub_range.extent.layers,
|
||||||
|
},
|
||||||
|
.srcOffset = {0, 0, 0},
|
||||||
|
.dstSubresource =
|
||||||
|
{
|
||||||
|
.aspectMask = aspect_mask,
|
||||||
|
.mipLevel = 0,
|
||||||
|
.baseArrayLayer = sub_range.base.layer,
|
||||||
|
.layerCount = sub_range.extent.layers,
|
||||||
|
},
|
||||||
|
.dstOffset = {0, 0, 0},
|
||||||
|
.extent = {write_image.info.size.width, write_image.info.size.height, 1},
|
||||||
|
};
|
||||||
|
const auto cmdbuf = scheduler.CommandBuffer();
|
||||||
|
cmdbuf.copyImage(read_image.image, vk::ImageLayout::eTransferSrcOptimal, write_image.image,
|
||||||
|
vk::ImageLayout::eTransferDstOptimal, region);
|
||||||
|
}
|
||||||
|
|
||||||
void Rasterizer::InlineData(VAddr address, const void* value, u32 num_bytes, bool is_gds) {
|
void Rasterizer::InlineData(VAddr address, const void* value, u32 num_bytes, bool is_gds) {
|
||||||
buffer_cache.InlineData(address, value, num_bytes, is_gds);
|
buffer_cache.InlineData(address, value, num_bytes, is_gds);
|
||||||
}
|
}
|
||||||
|
|
|
@ -71,6 +71,7 @@ private:
|
||||||
RenderState PrepareRenderState(u32 mrt_mask);
|
RenderState PrepareRenderState(u32 mrt_mask);
|
||||||
void BeginRendering(const GraphicsPipeline& pipeline, RenderState& state);
|
void BeginRendering(const GraphicsPipeline& pipeline, RenderState& state);
|
||||||
void Resolve();
|
void Resolve();
|
||||||
|
void DepthStencilCopy(bool is_depth, bool is_stencil);
|
||||||
void EliminateFastClear();
|
void EliminateFastClear();
|
||||||
|
|
||||||
void UpdateDynamicState(const GraphicsPipeline& pipeline);
|
void UpdateDynamicState(const GraphicsPipeline& pipeline);
|
||||||
|
|
|
@ -98,7 +98,8 @@ ImageInfo::ImageInfo(const AmdGpu::Liverpool::ColorBuffer& buffer,
|
||||||
}
|
}
|
||||||
|
|
||||||
ImageInfo::ImageInfo(const AmdGpu::Liverpool::DepthBuffer& buffer, u32 num_slices,
|
ImageInfo::ImageInfo(const AmdGpu::Liverpool::DepthBuffer& buffer, u32 num_slices,
|
||||||
VAddr htile_address, const AmdGpu::Liverpool::CbDbExtent& hint) noexcept {
|
VAddr htile_address, const AmdGpu::Liverpool::CbDbExtent& hint,
|
||||||
|
bool write_buffer) noexcept {
|
||||||
props.is_tiled = false;
|
props.is_tiled = false;
|
||||||
pixel_format = LiverpoolToVK::DepthFormat(buffer.z_info.format, buffer.stencil_info.format);
|
pixel_format = LiverpoolToVK::DepthFormat(buffer.z_info.format, buffer.stencil_info.format);
|
||||||
type = vk::ImageType::e2D;
|
type = vk::ImageType::e2D;
|
||||||
|
@ -111,10 +112,10 @@ ImageInfo::ImageInfo(const AmdGpu::Liverpool::DepthBuffer& buffer, u32 num_slice
|
||||||
resources.layers = num_slices;
|
resources.layers = num_slices;
|
||||||
meta_info.htile_addr = buffer.z_info.tile_surface_en ? htile_address : 0;
|
meta_info.htile_addr = buffer.z_info.tile_surface_en ? htile_address : 0;
|
||||||
|
|
||||||
stencil_addr = buffer.StencilAddress();
|
stencil_addr = write_buffer ? buffer.StencilWriteAddress() : buffer.StencilAddress();
|
||||||
stencil_size = pitch * size.height * sizeof(u8);
|
stencil_size = pitch * size.height * sizeof(u8);
|
||||||
|
|
||||||
guest_address = buffer.Address();
|
guest_address = write_buffer ? buffer.DepthWriteAddress() : buffer.DepthAddress();
|
||||||
const auto depth_slice_sz = buffer.GetDepthSliceSize();
|
const auto depth_slice_sz = buffer.GetDepthSliceSize();
|
||||||
guest_size = depth_slice_sz * num_slices;
|
guest_size = depth_slice_sz * num_slices;
|
||||||
mips_layout.emplace_back(depth_slice_sz, pitch, 0);
|
mips_layout.emplace_back(depth_slice_sz, pitch, 0);
|
||||||
|
|
|
@ -19,7 +19,7 @@ struct ImageInfo {
|
||||||
ImageInfo(const AmdGpu::Liverpool::ColorBuffer& buffer,
|
ImageInfo(const AmdGpu::Liverpool::ColorBuffer& buffer,
|
||||||
const AmdGpu::Liverpool::CbDbExtent& hint = {}) noexcept;
|
const AmdGpu::Liverpool::CbDbExtent& hint = {}) noexcept;
|
||||||
ImageInfo(const AmdGpu::Liverpool::DepthBuffer& buffer, u32 num_slices, VAddr htile_address,
|
ImageInfo(const AmdGpu::Liverpool::DepthBuffer& buffer, u32 num_slices, VAddr htile_address,
|
||||||
const AmdGpu::Liverpool::CbDbExtent& hint = {}) noexcept;
|
const AmdGpu::Liverpool::CbDbExtent& hint = {}, bool write_buffer = false) noexcept;
|
||||||
ImageInfo(const AmdGpu::Image& image, const Shader::ImageResource& desc) noexcept;
|
ImageInfo(const AmdGpu::Image& image, const Shader::ImageResource& desc) noexcept;
|
||||||
|
|
||||||
bool IsTiled() const {
|
bool IsTiled() const {
|
||||||
|
|
|
@ -79,9 +79,9 @@ public:
|
||||||
DepthTargetDesc(const AmdGpu::Liverpool::DepthBuffer& buffer,
|
DepthTargetDesc(const AmdGpu::Liverpool::DepthBuffer& buffer,
|
||||||
const AmdGpu::Liverpool::DepthView& view,
|
const AmdGpu::Liverpool::DepthView& view,
|
||||||
const AmdGpu::Liverpool::DepthControl& ctl, VAddr htile_address,
|
const AmdGpu::Liverpool::DepthControl& ctl, VAddr htile_address,
|
||||||
const AmdGpu::Liverpool::CbDbExtent& hint = {})
|
const AmdGpu::Liverpool::CbDbExtent& hint = {}, bool write_buffer = false)
|
||||||
: BaseDesc{BindingType::DepthTarget,
|
: BaseDesc{BindingType::DepthTarget,
|
||||||
ImageInfo{buffer, view.NumSlices(), htile_address, hint},
|
ImageInfo{buffer, view.NumSlices(), htile_address, hint, write_buffer},
|
||||||
ImageViewInfo{buffer, view, ctl}} {}
|
ImageViewInfo{buffer, view, ctl}} {}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue