diff --git a/src/video_core/amdgpu/liverpool.h b/src/video_core/amdgpu/liverpool.h index 4c74d37d..83271a82 100644 --- a/src/video_core/amdgpu/liverpool.h +++ b/src/video_core/amdgpu/liverpool.h @@ -428,6 +428,14 @@ struct Liverpool { BitField<0, 22, u32> tile_max; } depth_slice; + bool DepthValid() const { + return Address() != 0 && z_info.format != ZFormat::Invalid; + } + + bool StencilValid() const { + return Address() != 0 && stencil_info.format != StencilFormat::Invalid; + } + u32 Pitch() const { return (depth_size.pitch_tile_max + 1) << 3; } @@ -1275,6 +1283,26 @@ struct Liverpool { return nullptr; } + u32 NumSamples() const { + // It seems that the number of samples > 1 set in the AA config doesn't mean we're + // always rendering with MSAA, so we need to derive MS ratio from the CB and DB + // settings. + u32 num_samples = 1u; + if (color_control.mode != ColorControl::OperationMode::Disable) { + for (auto cb = 0u; cb < NumColorBuffers; ++cb) { + const auto& col_buf = color_buffers[cb]; + if (!col_buf) { + continue; + } + num_samples = std::max(num_samples, col_buf.NumSamples()); + } + } + if (depth_buffer.DepthValid() || depth_buffer.StencilValid()) { + num_samples = std::max(num_samples, depth_buffer.NumSamples()); + } + return num_samples; + } + void SetDefaults(); }; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index 444c8517..f25341bb 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -85,10 +85,6 @@ public: return key.mrt_mask; } - bool IsDepthEnabled() const { - return key.depth_stencil.depth_enable.Value(); - } - [[nodiscard]] bool IsPrimitiveListTopology() const { return key.prim_type == AmdGpu::PrimitiveType::PointList || key.prim_type == AmdGpu::PrimitiveType::LineList || diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 4b88bd37..43e02dd9 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -258,32 +258,28 @@ bool PipelineCache::RefreshGraphicsKey() { auto& key = graphics_key; key.depth_stencil = regs.depth_control; + key.stencil = regs.stencil_control; key.depth_stencil.depth_write_enable.Assign(regs.depth_control.depth_write_enable.Value() && !regs.depth_render_control.depth_clear_enable); key.depth_bias_enable = regs.polygon_control.NeedsBias(); - const auto& db = regs.depth_buffer; - const auto ds_format = instance.GetSupportedFormat( - LiverpoolToVK::DepthFormat(db.z_info.format, db.stencil_info.format), + const auto depth_format = instance.GetSupportedFormat( + LiverpoolToVK::DepthFormat(regs.depth_buffer.z_info.format, + regs.depth_buffer.stencil_info.format), vk::FormatFeatureFlagBits2::eDepthStencilAttachment); - if (db.z_info.format != AmdGpu::Liverpool::DepthBuffer::ZFormat::Invalid) { - key.depth_format = ds_format; + if (regs.depth_buffer.DepthValid()) { + key.depth_format = depth_format; } else { key.depth_format = vk::Format::eUndefined; + key.depth_stencil.depth_enable.Assign(false); } - if (regs.depth_control.depth_enable) { - key.depth_stencil.depth_enable.Assign(key.depth_format != vk::Format::eUndefined); - } - key.stencil = regs.stencil_control; - - if (db.stencil_info.format != AmdGpu::Liverpool::DepthBuffer::StencilFormat::Invalid) { - key.stencil_format = key.depth_format; + if (regs.depth_buffer.StencilValid()) { + key.stencil_format = depth_format; } else { key.stencil_format = vk::Format::eUndefined; + key.depth_stencil.stencil_enable.Assign(false); } - if (key.depth_stencil.stencil_enable) { - key.depth_stencil.stencil_enable.Assign(key.stencil_format != vk::Format::eUndefined); - } + key.prim_type = regs.primitive_type; key.enable_primitive_restart = regs.enable_primitive_restart & 1; key.primitive_restart_index = regs.primitive_restart_index; @@ -291,7 +287,7 @@ bool PipelineCache::RefreshGraphicsKey() { key.cull_mode = regs.polygon_control.CullingMode(); key.clip_space = regs.clipper_control.clip_space; key.front_face = regs.polygon_control.front_face; - key.num_samples = regs.aa_config.NumSamples(); + key.num_samples = regs.NumSamples(); const bool skip_cb_binding = regs.color_control.mode == AmdGpu::Liverpool::ColorControl::OperationMode::Disable; @@ -437,8 +433,6 @@ bool PipelineCache::RefreshGraphicsKey() { } } - u32 num_samples = 1u; - // Second pass to fill remain CB pipeline key data for (auto cb = 0u, remapped_cb = 0u; cb < Liverpool::NumColorBuffers; ++cb) { auto const& col_buf = regs.color_buffers[cb]; @@ -463,15 +457,8 @@ bool PipelineCache::RefreshGraphicsKey() { key.write_masks[remapped_cb] = vk::ColorComponentFlags{regs.color_target_mask.GetMask(cb)}; key.cb_shader_mask.SetMask(remapped_cb, regs.color_shader_mask.GetMask(cb)); ++remapped_cb; - - num_samples = std::max(num_samples, 1u << col_buf.attrib.num_samples_log2); } - // It seems that the number of samples > 1 set in the AA config doesn't mean we're always - // rendering with MSAA, so we need to derive MS ratio from the CB settings. - num_samples = std::max(num_samples, regs.depth_buffer.NumSamples()); - key.num_samples = num_samples; - return true; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index b7cfb8cf..a0899f7c 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -87,9 +87,11 @@ RenderState Rasterizer::PrepareRenderState(u32 mrt_mask) { LOG_WARNING(Render_Vulkan, "Color buffers require gamma correction"); } + const bool skip_cb_binding = + regs.color_control.mode == AmdGpu::Liverpool::ColorControl::OperationMode::Disable; for (auto col_buf_id = 0u; col_buf_id < Liverpool::NumColorBuffers; ++col_buf_id) { const auto& col_buf = regs.color_buffers[col_buf_id]; - if (!col_buf) { + if (skip_cb_binding || !col_buf) { continue; } @@ -134,12 +136,8 @@ RenderState Rasterizer::PrepareRenderState(u32 mrt_mask) { }; } - using ZFormat = AmdGpu::Liverpool::DepthBuffer::ZFormat; - using StencilFormat = AmdGpu::Liverpool::DepthBuffer::StencilFormat; - if (regs.depth_buffer.Address() != 0 && - ((regs.depth_control.depth_enable && regs.depth_buffer.z_info.format != ZFormat::Invalid) || - (regs.depth_control.stencil_enable && - regs.depth_buffer.stencil_info.format != StencilFormat::Invalid))) { + if ((regs.depth_control.depth_enable && regs.depth_buffer.DepthValid()) || + (regs.depth_control.stencil_enable && regs.depth_buffer.StencilValid())) { const auto htile_address = regs.depth_htile_data_base.GetAddress(); const auto& hint = liverpool->last_db_extent; auto& [image_id, desc] = @@ -159,25 +157,29 @@ RenderState Rasterizer::PrepareRenderState(u32 mrt_mask) { state.width = std::min(state.width, image.info.size.width); state.height = std::min(state.height, image.info.size.height); - state.depth_attachment = { - .imageView = *image_view.image_view, - .imageLayout = vk::ImageLayout::eUndefined, - .loadOp = is_depth_clear ? vk::AttachmentLoadOp::eClear : vk::AttachmentLoadOp::eLoad, - .storeOp = vk::AttachmentStoreOp::eStore, - .clearValue = vk::ClearValue{.depthStencil = {.depth = regs.depth_clear}}, - }; - state.stencil_attachment = { - .imageView = *image_view.image_view, - .imageLayout = vk::ImageLayout::eUndefined, - .loadOp = is_stencil_clear ? vk::AttachmentLoadOp::eClear : vk::AttachmentLoadOp::eLoad, - .storeOp = vk::AttachmentStoreOp::eStore, - .clearValue = vk::ClearValue{.depthStencil = {.stencil = regs.stencil_clear}}, - }; + state.has_depth = regs.depth_buffer.DepthValid(); + state.has_stencil = regs.depth_buffer.StencilValid(); + if (state.has_depth) { + state.depth_attachment = { + .imageView = *image_view.image_view, + .imageLayout = vk::ImageLayout::eUndefined, + .loadOp = + is_depth_clear ? vk::AttachmentLoadOp::eClear : vk::AttachmentLoadOp::eLoad, + .storeOp = vk::AttachmentStoreOp::eStore, + .clearValue = vk::ClearValue{.depthStencil = {.depth = regs.depth_clear}}, + }; + } + if (state.has_stencil) { + state.stencil_attachment = { + .imageView = *image_view.image_view, + .imageLayout = vk::ImageLayout::eUndefined, + .loadOp = + is_stencil_clear ? vk::AttachmentLoadOp::eClear : vk::AttachmentLoadOp::eLoad, + .storeOp = vk::AttachmentStoreOp::eStore, + .clearValue = vk::ClearValue{.depthStencil = {.stencil = regs.stencil_clear}}, + }; + } texture_cache.TouchMeta(htile_address, slice, false); - state.has_depth = - regs.depth_buffer.z_info.format != AmdGpu::Liverpool::DepthBuffer::ZFormat::Invalid; - state.has_stencil = regs.depth_buffer.stencil_info.format != - AmdGpu::Liverpool::DepthBuffer::StencilFormat::Invalid; } return state; @@ -815,34 +817,60 @@ void Rasterizer::Resolve() { mrt1_range.base.layer = liverpool->regs.color_buffers[1].view.slice_start; mrt1_range.extent.layers = liverpool->regs.color_buffers[1].NumSlices() - mrt1_range.base.layer; - vk::ImageResolve region = { - .srcSubresource = - { - .aspectMask = vk::ImageAspectFlagBits::eColor, - .mipLevel = 0, - .baseArrayLayer = mrt0_range.base.layer, - .layerCount = mrt0_range.extent.layers, - }, - .srcOffset = {0, 0, 0}, - .dstSubresource = - { - .aspectMask = vk::ImageAspectFlagBits::eColor, - .mipLevel = 0, - .baseArrayLayer = mrt1_range.base.layer, - .layerCount = mrt1_range.extent.layers, - }, - .dstOffset = {0, 0, 0}, - .extent = {mrt1_image.info.size.width, mrt1_image.info.size.height, 1}, - }; - mrt0_image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits2::eTransferRead, mrt0_range); mrt1_image.Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits2::eTransferWrite, mrt1_range); - cmdbuf.resolveImage(mrt0_image.image, vk::ImageLayout::eTransferSrcOptimal, mrt1_image.image, - vk::ImageLayout::eTransferDstOptimal, region); + if (mrt0_image.info.num_samples == 1) { + // Vulkan does not allow resolve from a single sample image, so change it to a copy. + // Note that resolving a single-sampled image doesn't really make sense, but a game might do + // it. + vk::ImageCopy region = { + .srcSubresource = + { + .aspectMask = vk::ImageAspectFlagBits::eColor, + .mipLevel = 0, + .baseArrayLayer = mrt0_range.base.layer, + .layerCount = mrt0_range.extent.layers, + }, + .srcOffset = {0, 0, 0}, + .dstSubresource = + { + .aspectMask = vk::ImageAspectFlagBits::eColor, + .mipLevel = 0, + .baseArrayLayer = mrt1_range.base.layer, + .layerCount = mrt1_range.extent.layers, + }, + .dstOffset = {0, 0, 0}, + .extent = {mrt1_image.info.size.width, mrt1_image.info.size.height, 1}, + }; + cmdbuf.copyImage(mrt0_image.image, vk::ImageLayout::eTransferSrcOptimal, mrt1_image.image, + vk::ImageLayout::eTransferDstOptimal, region); + } else { + vk::ImageResolve region = { + .srcSubresource = + { + .aspectMask = vk::ImageAspectFlagBits::eColor, + .mipLevel = 0, + .baseArrayLayer = mrt0_range.base.layer, + .layerCount = mrt0_range.extent.layers, + }, + .srcOffset = {0, 0, 0}, + .dstSubresource = + { + .aspectMask = vk::ImageAspectFlagBits::eColor, + .mipLevel = 0, + .baseArrayLayer = mrt1_range.base.layer, + .layerCount = mrt1_range.extent.layers, + }, + .dstOffset = {0, 0, 0}, + .extent = {mrt1_image.info.size.width, mrt1_image.info.size.height, 1}, + }; + cmdbuf.resolveImage(mrt0_image.image, vk::ImageLayout::eTransferSrcOptimal, + mrt1_image.image, vk::ImageLayout::eTransferDstOptimal, region); + } } void Rasterizer::InlineData(VAddr address, const void* value, u32 num_bytes, bool is_gds) { @@ -998,6 +1026,10 @@ void Rasterizer::UpdateViewportScissorState() { enable_offset ? regs.window_offset.window_y_offset : 0); for (u32 idx = 0; idx < Liverpool::NumViewports; idx++) { + if (regs.viewports[idx].xscale == 0) { + // Scissor and viewport counts should be equal. + continue; + } auto vp_scsr = scsr; if (regs.mode_control.vport_scissor_enable) { vp_scsr.top_left_x = @@ -1020,13 +1052,6 @@ void Rasterizer::UpdateViewportScissorState() { cmdbuf.setScissor(0, scissors); } -void Rasterizer::UpdateDepthStencilState() { - auto& depth = liverpool->regs.depth_control; - - const auto cmdbuf = scheduler.CommandBuffer(); - cmdbuf.setDepthBoundsTestEnable(depth.depth_bounds_enable); -} - void Rasterizer::ScopeMarkerBegin(const std::string_view& str) { if (Config::nullGpu() || !Config::vkMarkersEnabled()) { return; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index ec1b5e13..80b22c7d 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -74,7 +74,6 @@ private: void UpdateDynamicState(const GraphicsPipeline& pipeline); void UpdateViewportScissorState(); - void UpdateDepthStencilState(); bool FilterDraw(); diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index 81415f8b..f6b0edda 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp @@ -30,7 +30,7 @@ void Scheduler::BeginRendering(const RenderState& new_state) { is_rendering = true; render_state = new_state; - const auto witdh = + const auto width = render_state.width != std::numeric_limits::max() ? render_state.width : 1; const auto height = render_state.height != std::numeric_limits::max() ? render_state.height : 1; @@ -39,7 +39,7 @@ void Scheduler::BeginRendering(const RenderState& new_state) { .renderArea = { .offset = {0, 0}, - .extent = {witdh, height}, + .extent = {width, height}, }, .layerCount = 1, .colorAttachmentCount = render_state.num_color_attachments, diff --git a/src/video_core/texture_cache/image_info.cpp b/src/video_core/texture_cache/image_info.cpp index 1445d41c..606ede55 100644 --- a/src/video_core/texture_cache/image_info.cpp +++ b/src/video_core/texture_cache/image_info.cpp @@ -266,7 +266,7 @@ ImageInfo::ImageInfo(const AmdGpu::Liverpool::ColorBuffer& buffer, props.is_tiled = buffer.IsTiled(); tiling_mode = buffer.GetTilingMode(); pixel_format = LiverpoolToVK::SurfaceFormat(buffer.info.format, buffer.NumFormat()); - num_samples = 1 << buffer.attrib.num_fragments_log2; + num_samples = buffer.NumSamples(); num_bits = NumBits(buffer.info.format); type = vk::ImageType::e2D; size.width = hint.Valid() ? hint.width : buffer.Pitch(); @@ -289,7 +289,7 @@ ImageInfo::ImageInfo(const AmdGpu::Liverpool::DepthBuffer& buffer, u32 num_slice props.is_tiled = false; pixel_format = LiverpoolToVK::DepthFormat(buffer.z_info.format, buffer.stencil_info.format); type = vk::ImageType::e2D; - num_samples = 1 << buffer.z_info.num_samples; // spec doesn't say it is a log2 + num_samples = buffer.NumSamples(); num_bits = buffer.NumBits(); size.width = hint.Valid() ? hint.width : buffer.Pitch(); size.height = hint.Valid() ? hint.height : buffer.Height();