renderer_vulkan: Various attachment cleanup and fixes. (#1795)

This commit is contained in:
squidbus 2024-12-22 06:08:48 -08:00 committed by GitHub
parent aba2b29074
commit 14dc136832
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 123 additions and 88 deletions

View file

@ -428,6 +428,14 @@ struct Liverpool {
BitField<0, 22, u32> tile_max;
} depth_slice;
bool DepthValid() const {
return Address() != 0 && z_info.format != ZFormat::Invalid;
}
bool StencilValid() const {
return Address() != 0 && stencil_info.format != StencilFormat::Invalid;
}
u32 Pitch() const {
return (depth_size.pitch_tile_max + 1) << 3;
}
@ -1275,6 +1283,26 @@ struct Liverpool {
return nullptr;
}
u32 NumSamples() const {
// It seems that the number of samples > 1 set in the AA config doesn't mean we're
// always rendering with MSAA, so we need to derive MS ratio from the CB and DB
// settings.
u32 num_samples = 1u;
if (color_control.mode != ColorControl::OperationMode::Disable) {
for (auto cb = 0u; cb < NumColorBuffers; ++cb) {
const auto& col_buf = color_buffers[cb];
if (!col_buf) {
continue;
}
num_samples = std::max(num_samples, col_buf.NumSamples());
}
}
if (depth_buffer.DepthValid() || depth_buffer.StencilValid()) {
num_samples = std::max(num_samples, depth_buffer.NumSamples());
}
return num_samples;
}
void SetDefaults();
};

View file

@ -85,10 +85,6 @@ public:
return key.mrt_mask;
}
bool IsDepthEnabled() const {
return key.depth_stencil.depth_enable.Value();
}
[[nodiscard]] bool IsPrimitiveListTopology() const {
return key.prim_type == AmdGpu::PrimitiveType::PointList ||
key.prim_type == AmdGpu::PrimitiveType::LineList ||

View file

@ -258,32 +258,28 @@ bool PipelineCache::RefreshGraphicsKey() {
auto& key = graphics_key;
key.depth_stencil = regs.depth_control;
key.stencil = regs.stencil_control;
key.depth_stencil.depth_write_enable.Assign(regs.depth_control.depth_write_enable.Value() &&
!regs.depth_render_control.depth_clear_enable);
key.depth_bias_enable = regs.polygon_control.NeedsBias();
const auto& db = regs.depth_buffer;
const auto ds_format = instance.GetSupportedFormat(
LiverpoolToVK::DepthFormat(db.z_info.format, db.stencil_info.format),
const auto depth_format = instance.GetSupportedFormat(
LiverpoolToVK::DepthFormat(regs.depth_buffer.z_info.format,
regs.depth_buffer.stencil_info.format),
vk::FormatFeatureFlagBits2::eDepthStencilAttachment);
if (db.z_info.format != AmdGpu::Liverpool::DepthBuffer::ZFormat::Invalid) {
key.depth_format = ds_format;
if (regs.depth_buffer.DepthValid()) {
key.depth_format = depth_format;
} else {
key.depth_format = vk::Format::eUndefined;
key.depth_stencil.depth_enable.Assign(false);
}
if (regs.depth_control.depth_enable) {
key.depth_stencil.depth_enable.Assign(key.depth_format != vk::Format::eUndefined);
}
key.stencil = regs.stencil_control;
if (db.stencil_info.format != AmdGpu::Liverpool::DepthBuffer::StencilFormat::Invalid) {
key.stencil_format = key.depth_format;
if (regs.depth_buffer.StencilValid()) {
key.stencil_format = depth_format;
} else {
key.stencil_format = vk::Format::eUndefined;
key.depth_stencil.stencil_enable.Assign(false);
}
if (key.depth_stencil.stencil_enable) {
key.depth_stencil.stencil_enable.Assign(key.stencil_format != vk::Format::eUndefined);
}
key.prim_type = regs.primitive_type;
key.enable_primitive_restart = regs.enable_primitive_restart & 1;
key.primitive_restart_index = regs.primitive_restart_index;
@ -291,7 +287,7 @@ bool PipelineCache::RefreshGraphicsKey() {
key.cull_mode = regs.polygon_control.CullingMode();
key.clip_space = regs.clipper_control.clip_space;
key.front_face = regs.polygon_control.front_face;
key.num_samples = regs.aa_config.NumSamples();
key.num_samples = regs.NumSamples();
const bool skip_cb_binding =
regs.color_control.mode == AmdGpu::Liverpool::ColorControl::OperationMode::Disable;
@ -437,8 +433,6 @@ bool PipelineCache::RefreshGraphicsKey() {
}
}
u32 num_samples = 1u;
// Second pass to fill remain CB pipeline key data
for (auto cb = 0u, remapped_cb = 0u; cb < Liverpool::NumColorBuffers; ++cb) {
auto const& col_buf = regs.color_buffers[cb];
@ -463,15 +457,8 @@ bool PipelineCache::RefreshGraphicsKey() {
key.write_masks[remapped_cb] = vk::ColorComponentFlags{regs.color_target_mask.GetMask(cb)};
key.cb_shader_mask.SetMask(remapped_cb, regs.color_shader_mask.GetMask(cb));
++remapped_cb;
num_samples = std::max(num_samples, 1u << col_buf.attrib.num_samples_log2);
}
// It seems that the number of samples > 1 set in the AA config doesn't mean we're always
// rendering with MSAA, so we need to derive MS ratio from the CB settings.
num_samples = std::max(num_samples, regs.depth_buffer.NumSamples());
key.num_samples = num_samples;
return true;
} // namespace Vulkan

View file

@ -87,9 +87,11 @@ RenderState Rasterizer::PrepareRenderState(u32 mrt_mask) {
LOG_WARNING(Render_Vulkan, "Color buffers require gamma correction");
}
const bool skip_cb_binding =
regs.color_control.mode == AmdGpu::Liverpool::ColorControl::OperationMode::Disable;
for (auto col_buf_id = 0u; col_buf_id < Liverpool::NumColorBuffers; ++col_buf_id) {
const auto& col_buf = regs.color_buffers[col_buf_id];
if (!col_buf) {
if (skip_cb_binding || !col_buf) {
continue;
}
@ -134,12 +136,8 @@ RenderState Rasterizer::PrepareRenderState(u32 mrt_mask) {
};
}
using ZFormat = AmdGpu::Liverpool::DepthBuffer::ZFormat;
using StencilFormat = AmdGpu::Liverpool::DepthBuffer::StencilFormat;
if (regs.depth_buffer.Address() != 0 &&
((regs.depth_control.depth_enable && regs.depth_buffer.z_info.format != ZFormat::Invalid) ||
(regs.depth_control.stencil_enable &&
regs.depth_buffer.stencil_info.format != StencilFormat::Invalid))) {
if ((regs.depth_control.depth_enable && regs.depth_buffer.DepthValid()) ||
(regs.depth_control.stencil_enable && regs.depth_buffer.StencilValid())) {
const auto htile_address = regs.depth_htile_data_base.GetAddress();
const auto& hint = liverpool->last_db_extent;
auto& [image_id, desc] =
@ -159,25 +157,29 @@ RenderState Rasterizer::PrepareRenderState(u32 mrt_mask) {
state.width = std::min<u32>(state.width, image.info.size.width);
state.height = std::min<u32>(state.height, image.info.size.height);
state.depth_attachment = {
.imageView = *image_view.image_view,
.imageLayout = vk::ImageLayout::eUndefined,
.loadOp = is_depth_clear ? vk::AttachmentLoadOp::eClear : vk::AttachmentLoadOp::eLoad,
.storeOp = vk::AttachmentStoreOp::eStore,
.clearValue = vk::ClearValue{.depthStencil = {.depth = regs.depth_clear}},
};
state.stencil_attachment = {
.imageView = *image_view.image_view,
.imageLayout = vk::ImageLayout::eUndefined,
.loadOp = is_stencil_clear ? vk::AttachmentLoadOp::eClear : vk::AttachmentLoadOp::eLoad,
.storeOp = vk::AttachmentStoreOp::eStore,
.clearValue = vk::ClearValue{.depthStencil = {.stencil = regs.stencil_clear}},
};
state.has_depth = regs.depth_buffer.DepthValid();
state.has_stencil = regs.depth_buffer.StencilValid();
if (state.has_depth) {
state.depth_attachment = {
.imageView = *image_view.image_view,
.imageLayout = vk::ImageLayout::eUndefined,
.loadOp =
is_depth_clear ? vk::AttachmentLoadOp::eClear : vk::AttachmentLoadOp::eLoad,
.storeOp = vk::AttachmentStoreOp::eStore,
.clearValue = vk::ClearValue{.depthStencil = {.depth = regs.depth_clear}},
};
}
if (state.has_stencil) {
state.stencil_attachment = {
.imageView = *image_view.image_view,
.imageLayout = vk::ImageLayout::eUndefined,
.loadOp =
is_stencil_clear ? vk::AttachmentLoadOp::eClear : vk::AttachmentLoadOp::eLoad,
.storeOp = vk::AttachmentStoreOp::eStore,
.clearValue = vk::ClearValue{.depthStencil = {.stencil = regs.stencil_clear}},
};
}
texture_cache.TouchMeta(htile_address, slice, false);
state.has_depth =
regs.depth_buffer.z_info.format != AmdGpu::Liverpool::DepthBuffer::ZFormat::Invalid;
state.has_stencil = regs.depth_buffer.stencil_info.format !=
AmdGpu::Liverpool::DepthBuffer::StencilFormat::Invalid;
}
return state;
@ -815,34 +817,60 @@ void Rasterizer::Resolve() {
mrt1_range.base.layer = liverpool->regs.color_buffers[1].view.slice_start;
mrt1_range.extent.layers = liverpool->regs.color_buffers[1].NumSlices() - mrt1_range.base.layer;
vk::ImageResolve region = {
.srcSubresource =
{
.aspectMask = vk::ImageAspectFlagBits::eColor,
.mipLevel = 0,
.baseArrayLayer = mrt0_range.base.layer,
.layerCount = mrt0_range.extent.layers,
},
.srcOffset = {0, 0, 0},
.dstSubresource =
{
.aspectMask = vk::ImageAspectFlagBits::eColor,
.mipLevel = 0,
.baseArrayLayer = mrt1_range.base.layer,
.layerCount = mrt1_range.extent.layers,
},
.dstOffset = {0, 0, 0},
.extent = {mrt1_image.info.size.width, mrt1_image.info.size.height, 1},
};
mrt0_image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits2::eTransferRead,
mrt0_range);
mrt1_image.Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits2::eTransferWrite,
mrt1_range);
cmdbuf.resolveImage(mrt0_image.image, vk::ImageLayout::eTransferSrcOptimal, mrt1_image.image,
vk::ImageLayout::eTransferDstOptimal, region);
if (mrt0_image.info.num_samples == 1) {
// Vulkan does not allow resolve from a single sample image, so change it to a copy.
// Note that resolving a single-sampled image doesn't really make sense, but a game might do
// it.
vk::ImageCopy region = {
.srcSubresource =
{
.aspectMask = vk::ImageAspectFlagBits::eColor,
.mipLevel = 0,
.baseArrayLayer = mrt0_range.base.layer,
.layerCount = mrt0_range.extent.layers,
},
.srcOffset = {0, 0, 0},
.dstSubresource =
{
.aspectMask = vk::ImageAspectFlagBits::eColor,
.mipLevel = 0,
.baseArrayLayer = mrt1_range.base.layer,
.layerCount = mrt1_range.extent.layers,
},
.dstOffset = {0, 0, 0},
.extent = {mrt1_image.info.size.width, mrt1_image.info.size.height, 1},
};
cmdbuf.copyImage(mrt0_image.image, vk::ImageLayout::eTransferSrcOptimal, mrt1_image.image,
vk::ImageLayout::eTransferDstOptimal, region);
} else {
vk::ImageResolve region = {
.srcSubresource =
{
.aspectMask = vk::ImageAspectFlagBits::eColor,
.mipLevel = 0,
.baseArrayLayer = mrt0_range.base.layer,
.layerCount = mrt0_range.extent.layers,
},
.srcOffset = {0, 0, 0},
.dstSubresource =
{
.aspectMask = vk::ImageAspectFlagBits::eColor,
.mipLevel = 0,
.baseArrayLayer = mrt1_range.base.layer,
.layerCount = mrt1_range.extent.layers,
},
.dstOffset = {0, 0, 0},
.extent = {mrt1_image.info.size.width, mrt1_image.info.size.height, 1},
};
cmdbuf.resolveImage(mrt0_image.image, vk::ImageLayout::eTransferSrcOptimal,
mrt1_image.image, vk::ImageLayout::eTransferDstOptimal, region);
}
}
void Rasterizer::InlineData(VAddr address, const void* value, u32 num_bytes, bool is_gds) {
@ -998,6 +1026,10 @@ void Rasterizer::UpdateViewportScissorState() {
enable_offset ? regs.window_offset.window_y_offset : 0);
for (u32 idx = 0; idx < Liverpool::NumViewports; idx++) {
if (regs.viewports[idx].xscale == 0) {
// Scissor and viewport counts should be equal.
continue;
}
auto vp_scsr = scsr;
if (regs.mode_control.vport_scissor_enable) {
vp_scsr.top_left_x =
@ -1020,13 +1052,6 @@ void Rasterizer::UpdateViewportScissorState() {
cmdbuf.setScissor(0, scissors);
}
void Rasterizer::UpdateDepthStencilState() {
auto& depth = liverpool->regs.depth_control;
const auto cmdbuf = scheduler.CommandBuffer();
cmdbuf.setDepthBoundsTestEnable(depth.depth_bounds_enable);
}
void Rasterizer::ScopeMarkerBegin(const std::string_view& str) {
if (Config::nullGpu() || !Config::vkMarkersEnabled()) {
return;

View file

@ -74,7 +74,6 @@ private:
void UpdateDynamicState(const GraphicsPipeline& pipeline);
void UpdateViewportScissorState();
void UpdateDepthStencilState();
bool FilterDraw();

View file

@ -30,7 +30,7 @@ void Scheduler::BeginRendering(const RenderState& new_state) {
is_rendering = true;
render_state = new_state;
const auto witdh =
const auto width =
render_state.width != std::numeric_limits<u32>::max() ? render_state.width : 1;
const auto height =
render_state.height != std::numeric_limits<u32>::max() ? render_state.height : 1;
@ -39,7 +39,7 @@ void Scheduler::BeginRendering(const RenderState& new_state) {
.renderArea =
{
.offset = {0, 0},
.extent = {witdh, height},
.extent = {width, height},
},
.layerCount = 1,
.colorAttachmentCount = render_state.num_color_attachments,

View file

@ -266,7 +266,7 @@ ImageInfo::ImageInfo(const AmdGpu::Liverpool::ColorBuffer& buffer,
props.is_tiled = buffer.IsTiled();
tiling_mode = buffer.GetTilingMode();
pixel_format = LiverpoolToVK::SurfaceFormat(buffer.info.format, buffer.NumFormat());
num_samples = 1 << buffer.attrib.num_fragments_log2;
num_samples = buffer.NumSamples();
num_bits = NumBits(buffer.info.format);
type = vk::ImageType::e2D;
size.width = hint.Valid() ? hint.width : buffer.Pitch();
@ -289,7 +289,7 @@ ImageInfo::ImageInfo(const AmdGpu::Liverpool::DepthBuffer& buffer, u32 num_slice
props.is_tiled = false;
pixel_format = LiverpoolToVK::DepthFormat(buffer.z_info.format, buffer.stencil_info.format);
type = vk::ImageType::e2D;
num_samples = 1 << buffer.z_info.num_samples; // spec doesn't say it is a log2
num_samples = buffer.NumSamples();
num_bits = buffer.NumBits();
size.width = hint.Valid() ? hint.width : buffer.Pitch();
size.height = hint.Valid() ? hint.height : buffer.Height();