mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2024-12-27 01:46:41 +00:00
renderer_vulkan: Various attachment cleanup and fixes. (#1795)
This commit is contained in:
parent
aba2b29074
commit
14dc136832
|
@ -428,6 +428,14 @@ struct Liverpool {
|
|||
BitField<0, 22, u32> tile_max;
|
||||
} depth_slice;
|
||||
|
||||
bool DepthValid() const {
|
||||
return Address() != 0 && z_info.format != ZFormat::Invalid;
|
||||
}
|
||||
|
||||
bool StencilValid() const {
|
||||
return Address() != 0 && stencil_info.format != StencilFormat::Invalid;
|
||||
}
|
||||
|
||||
u32 Pitch() const {
|
||||
return (depth_size.pitch_tile_max + 1) << 3;
|
||||
}
|
||||
|
@ -1275,6 +1283,26 @@ struct Liverpool {
|
|||
return nullptr;
|
||||
}
|
||||
|
||||
u32 NumSamples() const {
|
||||
// It seems that the number of samples > 1 set in the AA config doesn't mean we're
|
||||
// always rendering with MSAA, so we need to derive MS ratio from the CB and DB
|
||||
// settings.
|
||||
u32 num_samples = 1u;
|
||||
if (color_control.mode != ColorControl::OperationMode::Disable) {
|
||||
for (auto cb = 0u; cb < NumColorBuffers; ++cb) {
|
||||
const auto& col_buf = color_buffers[cb];
|
||||
if (!col_buf) {
|
||||
continue;
|
||||
}
|
||||
num_samples = std::max(num_samples, col_buf.NumSamples());
|
||||
}
|
||||
}
|
||||
if (depth_buffer.DepthValid() || depth_buffer.StencilValid()) {
|
||||
num_samples = std::max(num_samples, depth_buffer.NumSamples());
|
||||
}
|
||||
return num_samples;
|
||||
}
|
||||
|
||||
void SetDefaults();
|
||||
};
|
||||
|
||||
|
|
|
@ -85,10 +85,6 @@ public:
|
|||
return key.mrt_mask;
|
||||
}
|
||||
|
||||
bool IsDepthEnabled() const {
|
||||
return key.depth_stencil.depth_enable.Value();
|
||||
}
|
||||
|
||||
[[nodiscard]] bool IsPrimitiveListTopology() const {
|
||||
return key.prim_type == AmdGpu::PrimitiveType::PointList ||
|
||||
key.prim_type == AmdGpu::PrimitiveType::LineList ||
|
||||
|
|
|
@ -258,32 +258,28 @@ bool PipelineCache::RefreshGraphicsKey() {
|
|||
auto& key = graphics_key;
|
||||
|
||||
key.depth_stencil = regs.depth_control;
|
||||
key.stencil = regs.stencil_control;
|
||||
key.depth_stencil.depth_write_enable.Assign(regs.depth_control.depth_write_enable.Value() &&
|
||||
!regs.depth_render_control.depth_clear_enable);
|
||||
key.depth_bias_enable = regs.polygon_control.NeedsBias();
|
||||
|
||||
const auto& db = regs.depth_buffer;
|
||||
const auto ds_format = instance.GetSupportedFormat(
|
||||
LiverpoolToVK::DepthFormat(db.z_info.format, db.stencil_info.format),
|
||||
const auto depth_format = instance.GetSupportedFormat(
|
||||
LiverpoolToVK::DepthFormat(regs.depth_buffer.z_info.format,
|
||||
regs.depth_buffer.stencil_info.format),
|
||||
vk::FormatFeatureFlagBits2::eDepthStencilAttachment);
|
||||
if (db.z_info.format != AmdGpu::Liverpool::DepthBuffer::ZFormat::Invalid) {
|
||||
key.depth_format = ds_format;
|
||||
if (regs.depth_buffer.DepthValid()) {
|
||||
key.depth_format = depth_format;
|
||||
} else {
|
||||
key.depth_format = vk::Format::eUndefined;
|
||||
key.depth_stencil.depth_enable.Assign(false);
|
||||
}
|
||||
if (regs.depth_control.depth_enable) {
|
||||
key.depth_stencil.depth_enable.Assign(key.depth_format != vk::Format::eUndefined);
|
||||
}
|
||||
key.stencil = regs.stencil_control;
|
||||
|
||||
if (db.stencil_info.format != AmdGpu::Liverpool::DepthBuffer::StencilFormat::Invalid) {
|
||||
key.stencil_format = key.depth_format;
|
||||
if (regs.depth_buffer.StencilValid()) {
|
||||
key.stencil_format = depth_format;
|
||||
} else {
|
||||
key.stencil_format = vk::Format::eUndefined;
|
||||
key.depth_stencil.stencil_enable.Assign(false);
|
||||
}
|
||||
if (key.depth_stencil.stencil_enable) {
|
||||
key.depth_stencil.stencil_enable.Assign(key.stencil_format != vk::Format::eUndefined);
|
||||
}
|
||||
|
||||
key.prim_type = regs.primitive_type;
|
||||
key.enable_primitive_restart = regs.enable_primitive_restart & 1;
|
||||
key.primitive_restart_index = regs.primitive_restart_index;
|
||||
|
@ -291,7 +287,7 @@ bool PipelineCache::RefreshGraphicsKey() {
|
|||
key.cull_mode = regs.polygon_control.CullingMode();
|
||||
key.clip_space = regs.clipper_control.clip_space;
|
||||
key.front_face = regs.polygon_control.front_face;
|
||||
key.num_samples = regs.aa_config.NumSamples();
|
||||
key.num_samples = regs.NumSamples();
|
||||
|
||||
const bool skip_cb_binding =
|
||||
regs.color_control.mode == AmdGpu::Liverpool::ColorControl::OperationMode::Disable;
|
||||
|
@ -437,8 +433,6 @@ bool PipelineCache::RefreshGraphicsKey() {
|
|||
}
|
||||
}
|
||||
|
||||
u32 num_samples = 1u;
|
||||
|
||||
// Second pass to fill remain CB pipeline key data
|
||||
for (auto cb = 0u, remapped_cb = 0u; cb < Liverpool::NumColorBuffers; ++cb) {
|
||||
auto const& col_buf = regs.color_buffers[cb];
|
||||
|
@ -463,15 +457,8 @@ bool PipelineCache::RefreshGraphicsKey() {
|
|||
key.write_masks[remapped_cb] = vk::ColorComponentFlags{regs.color_target_mask.GetMask(cb)};
|
||||
key.cb_shader_mask.SetMask(remapped_cb, regs.color_shader_mask.GetMask(cb));
|
||||
++remapped_cb;
|
||||
|
||||
num_samples = std::max(num_samples, 1u << col_buf.attrib.num_samples_log2);
|
||||
}
|
||||
|
||||
// It seems that the number of samples > 1 set in the AA config doesn't mean we're always
|
||||
// rendering with MSAA, so we need to derive MS ratio from the CB settings.
|
||||
num_samples = std::max(num_samples, regs.depth_buffer.NumSamples());
|
||||
key.num_samples = num_samples;
|
||||
|
||||
return true;
|
||||
} // namespace Vulkan
|
||||
|
||||
|
|
|
@ -87,9 +87,11 @@ RenderState Rasterizer::PrepareRenderState(u32 mrt_mask) {
|
|||
LOG_WARNING(Render_Vulkan, "Color buffers require gamma correction");
|
||||
}
|
||||
|
||||
const bool skip_cb_binding =
|
||||
regs.color_control.mode == AmdGpu::Liverpool::ColorControl::OperationMode::Disable;
|
||||
for (auto col_buf_id = 0u; col_buf_id < Liverpool::NumColorBuffers; ++col_buf_id) {
|
||||
const auto& col_buf = regs.color_buffers[col_buf_id];
|
||||
if (!col_buf) {
|
||||
if (skip_cb_binding || !col_buf) {
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -134,12 +136,8 @@ RenderState Rasterizer::PrepareRenderState(u32 mrt_mask) {
|
|||
};
|
||||
}
|
||||
|
||||
using ZFormat = AmdGpu::Liverpool::DepthBuffer::ZFormat;
|
||||
using StencilFormat = AmdGpu::Liverpool::DepthBuffer::StencilFormat;
|
||||
if (regs.depth_buffer.Address() != 0 &&
|
||||
((regs.depth_control.depth_enable && regs.depth_buffer.z_info.format != ZFormat::Invalid) ||
|
||||
(regs.depth_control.stencil_enable &&
|
||||
regs.depth_buffer.stencil_info.format != StencilFormat::Invalid))) {
|
||||
if ((regs.depth_control.depth_enable && regs.depth_buffer.DepthValid()) ||
|
||||
(regs.depth_control.stencil_enable && regs.depth_buffer.StencilValid())) {
|
||||
const auto htile_address = regs.depth_htile_data_base.GetAddress();
|
||||
const auto& hint = liverpool->last_db_extent;
|
||||
auto& [image_id, desc] =
|
||||
|
@ -159,25 +157,29 @@ RenderState Rasterizer::PrepareRenderState(u32 mrt_mask) {
|
|||
|
||||
state.width = std::min<u32>(state.width, image.info.size.width);
|
||||
state.height = std::min<u32>(state.height, image.info.size.height);
|
||||
state.depth_attachment = {
|
||||
.imageView = *image_view.image_view,
|
||||
.imageLayout = vk::ImageLayout::eUndefined,
|
||||
.loadOp = is_depth_clear ? vk::AttachmentLoadOp::eClear : vk::AttachmentLoadOp::eLoad,
|
||||
.storeOp = vk::AttachmentStoreOp::eStore,
|
||||
.clearValue = vk::ClearValue{.depthStencil = {.depth = regs.depth_clear}},
|
||||
};
|
||||
state.stencil_attachment = {
|
||||
.imageView = *image_view.image_view,
|
||||
.imageLayout = vk::ImageLayout::eUndefined,
|
||||
.loadOp = is_stencil_clear ? vk::AttachmentLoadOp::eClear : vk::AttachmentLoadOp::eLoad,
|
||||
.storeOp = vk::AttachmentStoreOp::eStore,
|
||||
.clearValue = vk::ClearValue{.depthStencil = {.stencil = regs.stencil_clear}},
|
||||
};
|
||||
state.has_depth = regs.depth_buffer.DepthValid();
|
||||
state.has_stencil = regs.depth_buffer.StencilValid();
|
||||
if (state.has_depth) {
|
||||
state.depth_attachment = {
|
||||
.imageView = *image_view.image_view,
|
||||
.imageLayout = vk::ImageLayout::eUndefined,
|
||||
.loadOp =
|
||||
is_depth_clear ? vk::AttachmentLoadOp::eClear : vk::AttachmentLoadOp::eLoad,
|
||||
.storeOp = vk::AttachmentStoreOp::eStore,
|
||||
.clearValue = vk::ClearValue{.depthStencil = {.depth = regs.depth_clear}},
|
||||
};
|
||||
}
|
||||
if (state.has_stencil) {
|
||||
state.stencil_attachment = {
|
||||
.imageView = *image_view.image_view,
|
||||
.imageLayout = vk::ImageLayout::eUndefined,
|
||||
.loadOp =
|
||||
is_stencil_clear ? vk::AttachmentLoadOp::eClear : vk::AttachmentLoadOp::eLoad,
|
||||
.storeOp = vk::AttachmentStoreOp::eStore,
|
||||
.clearValue = vk::ClearValue{.depthStencil = {.stencil = regs.stencil_clear}},
|
||||
};
|
||||
}
|
||||
texture_cache.TouchMeta(htile_address, slice, false);
|
||||
state.has_depth =
|
||||
regs.depth_buffer.z_info.format != AmdGpu::Liverpool::DepthBuffer::ZFormat::Invalid;
|
||||
state.has_stencil = regs.depth_buffer.stencil_info.format !=
|
||||
AmdGpu::Liverpool::DepthBuffer::StencilFormat::Invalid;
|
||||
}
|
||||
|
||||
return state;
|
||||
|
@ -815,34 +817,60 @@ void Rasterizer::Resolve() {
|
|||
mrt1_range.base.layer = liverpool->regs.color_buffers[1].view.slice_start;
|
||||
mrt1_range.extent.layers = liverpool->regs.color_buffers[1].NumSlices() - mrt1_range.base.layer;
|
||||
|
||||
vk::ImageResolve region = {
|
||||
.srcSubresource =
|
||||
{
|
||||
.aspectMask = vk::ImageAspectFlagBits::eColor,
|
||||
.mipLevel = 0,
|
||||
.baseArrayLayer = mrt0_range.base.layer,
|
||||
.layerCount = mrt0_range.extent.layers,
|
||||
},
|
||||
.srcOffset = {0, 0, 0},
|
||||
.dstSubresource =
|
||||
{
|
||||
.aspectMask = vk::ImageAspectFlagBits::eColor,
|
||||
.mipLevel = 0,
|
||||
.baseArrayLayer = mrt1_range.base.layer,
|
||||
.layerCount = mrt1_range.extent.layers,
|
||||
},
|
||||
.dstOffset = {0, 0, 0},
|
||||
.extent = {mrt1_image.info.size.width, mrt1_image.info.size.height, 1},
|
||||
};
|
||||
|
||||
mrt0_image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits2::eTransferRead,
|
||||
mrt0_range);
|
||||
|
||||
mrt1_image.Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits2::eTransferWrite,
|
||||
mrt1_range);
|
||||
|
||||
cmdbuf.resolveImage(mrt0_image.image, vk::ImageLayout::eTransferSrcOptimal, mrt1_image.image,
|
||||
vk::ImageLayout::eTransferDstOptimal, region);
|
||||
if (mrt0_image.info.num_samples == 1) {
|
||||
// Vulkan does not allow resolve from a single sample image, so change it to a copy.
|
||||
// Note that resolving a single-sampled image doesn't really make sense, but a game might do
|
||||
// it.
|
||||
vk::ImageCopy region = {
|
||||
.srcSubresource =
|
||||
{
|
||||
.aspectMask = vk::ImageAspectFlagBits::eColor,
|
||||
.mipLevel = 0,
|
||||
.baseArrayLayer = mrt0_range.base.layer,
|
||||
.layerCount = mrt0_range.extent.layers,
|
||||
},
|
||||
.srcOffset = {0, 0, 0},
|
||||
.dstSubresource =
|
||||
{
|
||||
.aspectMask = vk::ImageAspectFlagBits::eColor,
|
||||
.mipLevel = 0,
|
||||
.baseArrayLayer = mrt1_range.base.layer,
|
||||
.layerCount = mrt1_range.extent.layers,
|
||||
},
|
||||
.dstOffset = {0, 0, 0},
|
||||
.extent = {mrt1_image.info.size.width, mrt1_image.info.size.height, 1},
|
||||
};
|
||||
cmdbuf.copyImage(mrt0_image.image, vk::ImageLayout::eTransferSrcOptimal, mrt1_image.image,
|
||||
vk::ImageLayout::eTransferDstOptimal, region);
|
||||
} else {
|
||||
vk::ImageResolve region = {
|
||||
.srcSubresource =
|
||||
{
|
||||
.aspectMask = vk::ImageAspectFlagBits::eColor,
|
||||
.mipLevel = 0,
|
||||
.baseArrayLayer = mrt0_range.base.layer,
|
||||
.layerCount = mrt0_range.extent.layers,
|
||||
},
|
||||
.srcOffset = {0, 0, 0},
|
||||
.dstSubresource =
|
||||
{
|
||||
.aspectMask = vk::ImageAspectFlagBits::eColor,
|
||||
.mipLevel = 0,
|
||||
.baseArrayLayer = mrt1_range.base.layer,
|
||||
.layerCount = mrt1_range.extent.layers,
|
||||
},
|
||||
.dstOffset = {0, 0, 0},
|
||||
.extent = {mrt1_image.info.size.width, mrt1_image.info.size.height, 1},
|
||||
};
|
||||
cmdbuf.resolveImage(mrt0_image.image, vk::ImageLayout::eTransferSrcOptimal,
|
||||
mrt1_image.image, vk::ImageLayout::eTransferDstOptimal, region);
|
||||
}
|
||||
}
|
||||
|
||||
void Rasterizer::InlineData(VAddr address, const void* value, u32 num_bytes, bool is_gds) {
|
||||
|
@ -998,6 +1026,10 @@ void Rasterizer::UpdateViewportScissorState() {
|
|||
enable_offset ? regs.window_offset.window_y_offset : 0);
|
||||
|
||||
for (u32 idx = 0; idx < Liverpool::NumViewports; idx++) {
|
||||
if (regs.viewports[idx].xscale == 0) {
|
||||
// Scissor and viewport counts should be equal.
|
||||
continue;
|
||||
}
|
||||
auto vp_scsr = scsr;
|
||||
if (regs.mode_control.vport_scissor_enable) {
|
||||
vp_scsr.top_left_x =
|
||||
|
@ -1020,13 +1052,6 @@ void Rasterizer::UpdateViewportScissorState() {
|
|||
cmdbuf.setScissor(0, scissors);
|
||||
}
|
||||
|
||||
void Rasterizer::UpdateDepthStencilState() {
|
||||
auto& depth = liverpool->regs.depth_control;
|
||||
|
||||
const auto cmdbuf = scheduler.CommandBuffer();
|
||||
cmdbuf.setDepthBoundsTestEnable(depth.depth_bounds_enable);
|
||||
}
|
||||
|
||||
void Rasterizer::ScopeMarkerBegin(const std::string_view& str) {
|
||||
if (Config::nullGpu() || !Config::vkMarkersEnabled()) {
|
||||
return;
|
||||
|
|
|
@ -74,7 +74,6 @@ private:
|
|||
|
||||
void UpdateDynamicState(const GraphicsPipeline& pipeline);
|
||||
void UpdateViewportScissorState();
|
||||
void UpdateDepthStencilState();
|
||||
|
||||
bool FilterDraw();
|
||||
|
||||
|
|
|
@ -30,7 +30,7 @@ void Scheduler::BeginRendering(const RenderState& new_state) {
|
|||
is_rendering = true;
|
||||
render_state = new_state;
|
||||
|
||||
const auto witdh =
|
||||
const auto width =
|
||||
render_state.width != std::numeric_limits<u32>::max() ? render_state.width : 1;
|
||||
const auto height =
|
||||
render_state.height != std::numeric_limits<u32>::max() ? render_state.height : 1;
|
||||
|
@ -39,7 +39,7 @@ void Scheduler::BeginRendering(const RenderState& new_state) {
|
|||
.renderArea =
|
||||
{
|
||||
.offset = {0, 0},
|
||||
.extent = {witdh, height},
|
||||
.extent = {width, height},
|
||||
},
|
||||
.layerCount = 1,
|
||||
.colorAttachmentCount = render_state.num_color_attachments,
|
||||
|
|
|
@ -266,7 +266,7 @@ ImageInfo::ImageInfo(const AmdGpu::Liverpool::ColorBuffer& buffer,
|
|||
props.is_tiled = buffer.IsTiled();
|
||||
tiling_mode = buffer.GetTilingMode();
|
||||
pixel_format = LiverpoolToVK::SurfaceFormat(buffer.info.format, buffer.NumFormat());
|
||||
num_samples = 1 << buffer.attrib.num_fragments_log2;
|
||||
num_samples = buffer.NumSamples();
|
||||
num_bits = NumBits(buffer.info.format);
|
||||
type = vk::ImageType::e2D;
|
||||
size.width = hint.Valid() ? hint.width : buffer.Pitch();
|
||||
|
@ -289,7 +289,7 @@ ImageInfo::ImageInfo(const AmdGpu::Liverpool::DepthBuffer& buffer, u32 num_slice
|
|||
props.is_tiled = false;
|
||||
pixel_format = LiverpoolToVK::DepthFormat(buffer.z_info.format, buffer.stencil_info.format);
|
||||
type = vk::ImageType::e2D;
|
||||
num_samples = 1 << buffer.z_info.num_samples; // spec doesn't say it is a log2
|
||||
num_samples = buffer.NumSamples();
|
||||
num_bits = buffer.NumBits();
|
||||
size.width = hint.Valid() ? hint.width : buffer.Pitch();
|
||||
size.height = hint.Valid() ? hint.height : buffer.Height();
|
||||
|
|
Loading…
Reference in a new issue