renderer_vulkan: Simplify vertex binding logic and properly handle null buffers. (#2104)

* renderer_vulkan: Simplify vertex binding logic and properly handle null buffers. * renderer_vulkan: Remove need for empty bindVertexBuffers2EXT.
2025-01-15 11:25:13 +00:00 · 2025-01-10 00:52:12 -08:00 · 2025-01-10 00:52:12 -08:00 · 562ed2a025
parent 4563b6379d
commit 562ed2a025
6 changed files with 137 additions and 138 deletions
--- a/src/video_core/buffer_cache/buffer_cache.cpp
+++ b/src/video_core/buffer_cache/buffer_cache.cpp
@ -10,13 +10,13 @@
 #include "video_core/amdgpu/liverpool.h"
 #include "video_core/buffer_cache/buffer_cache.h"
 #include "video_core/renderer_vulkan/liverpool_to_vk.h"
 #include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
 #include "video_core/renderer_vulkan/vk_instance.h"
 #include "video_core/renderer_vulkan/vk_scheduler.h"
 #include "video_core/texture_cache/texture_cache.h"
 namespace VideoCore {
 static constexpr size_t NumVertexBuffers = 32;
 static constexpr size_t GdsBufferSize = 64_KB;
 static constexpr size_t StagingBufferSize = 1_GB;
 static constexpr size_t UboStreamBufferSize = 64_MB;
@ -89,35 +89,22 @@ void BufferCache::DownloadBufferMemory(Buffer& buffer, VAddr device_addr, u64 si
    }
 }
-bool BufferCache::BindVertexBuffers(
+void BufferCache::BindVertexBuffers(const Vulkan::GraphicsPipeline& pipeline) {
-    const Shader::Info& vs_info, const std::optional<Shader::Gcn::FetchShaderData>& fetch_shader) {
+    Vulkan::VertexInputs<vk::VertexInputAttributeDescription2EXT> attributes;
-    boost::container::small_vector<vk::VertexInputAttributeDescription2EXT, 16> attributes;
+    Vulkan::VertexInputs<vk::VertexInputBindingDescription2EXT> bindings;
-    boost::container::small_vector<vk::VertexInputBindingDescription2EXT, 16> bindings;
+    Vulkan::VertexInputs<AmdGpu::Buffer> guest_buffers;
-    SCOPE_EXIT {
+    pipeline.GetVertexInputs(attributes, bindings, guest_buffers);
        if (instance.IsVertexInputDynamicState()) {
            const auto cmdbuf = scheduler.CommandBuffer();
            cmdbuf.setVertexInputEXT(bindings, attributes);
        } else if (bindings.empty()) {
            // Required to call bindVertexBuffers2EXT at least once in the current command buffer
            // with non-null strides without a non-dynamic stride pipeline in between. Thus even
            // when nothing is bound we still need to make a dummy call. Non-null strides in turn
            // requires a count greater than 0.
            const auto cmdbuf = scheduler.CommandBuffer();
            const std::array null_buffers = {GetBuffer(NULL_BUFFER_ID).buffer.buffer};
            constexpr std::array null_offsets = {static_cast<vk::DeviceSize>(0)};
            cmdbuf.bindVertexBuffers2EXT(0, null_buffers, null_offsets, null_offsets, null_offsets);
        }
    };
-    if (!fetch_shader || fetch_shader->attributes.empty()) {
+    if (instance.IsVertexInputDynamicState()) {
-        return false;
+        // Update current vertex inputs.
        const auto cmdbuf = scheduler.CommandBuffer();
        cmdbuf.setVertexInputEXT(bindings, attributes);
    }
-    std::array<vk::Buffer, NumVertexBuffers> host_buffers;
+    if (bindings.empty()) {
-    std::array<vk::DeviceSize, NumVertexBuffers> host_offsets;
+        // If there are no bindings, there is nothing further to do.
-    std::array<vk::DeviceSize, NumVertexBuffers> host_sizes;
+        return;
-    std::array<vk::DeviceSize, NumVertexBuffers> host_strides;
+    }
    boost::container::static_vector<AmdGpu::Buffer, NumVertexBuffers> guest_buffers;
    struct BufferRange {
        VAddr base_address;
@ -125,61 +112,37 @@ bool BufferCache::BindVertexBuffers(
        vk::Buffer vk_buffer;
        u64 offset;
-        size_t GetSize() const {
+        [[nodiscard]] size_t GetSize() const {
            return end_address - base_address;
        }
    };
-    // Calculate buffers memory overlaps
+    // Build list of ranges covering the requested buffers
-    bool has_step_rate = false;
+    Vulkan::VertexInputs<BufferRange> ranges{};
-    boost::container::static_vector<BufferRange, NumVertexBuffers> ranges{};
+    for (const auto& buffer : guest_buffers) {
-    for (const auto& attrib : fetch_shader->attributes) {
+        if (buffer.GetSize() > 0) {
-        if (attrib.UsesStepRates()) {
+            ranges.emplace_back(buffer.base_address, buffer.base_address + buffer.GetSize());
            has_step_rate = true;
            continue;
        }
    }
-        const auto& buffer = attrib.GetSharp(vs_info);
+    // Merge connecting ranges together
-        if (buffer.GetSize() == 0) {
+    Vulkan::VertexInputs<BufferRange> ranges_merged{};
-            continue;
+    if (!ranges.empty()) {
-        }
+        std::ranges::sort(ranges, [](const BufferRange& lhv, const BufferRange& rhv) {
-        guest_buffers.emplace_back(buffer);
+            return lhv.base_address < rhv.base_address;
        ranges.emplace_back(buffer.base_address, buffer.base_address + buffer.GetSize());
        attributes.push_back({
            .location = attrib.semantic,
            .binding = attrib.semantic,
            .format =
                Vulkan::LiverpoolToVK::SurfaceFormat(buffer.GetDataFmt(), buffer.GetNumberFmt()),
            .offset = 0,
        });
-        bindings.push_back({
+        ranges_merged.emplace_back(ranges[0]);
-            .binding = attrib.semantic,
+        for (auto range : ranges) {
-            .stride = buffer.GetStride(),
+            auto& prev_range = ranges_merged.back();
-            .inputRate = attrib.GetStepRate() == Shader::Gcn::VertexAttribute::InstanceIdType::None
+            if (prev_range.end_address < range.base_address) {
-                             ? vk::VertexInputRate::eVertex
+                ranges_merged.emplace_back(range);
-                             : vk::VertexInputRate::eInstance,
+            } else {
-            .divisor = 1,
+                prev_range.end_address = std::max(prev_range.end_address, range.end_address);
-        });
+            }
    }
    if (ranges.empty()) {
        return false;
    }
    std::ranges::sort(ranges, [](const BufferRange& lhv, const BufferRange& rhv) {
        return lhv.base_address < rhv.base_address;
    });
    boost::container::static_vector<BufferRange, NumVertexBuffers> ranges_merged{ranges[0]};
    for (auto range : ranges) {
        auto& prev_range = ranges_merged.back();
        if (prev_range.end_address < range.base_address) {
            ranges_merged.emplace_back(range);
        } else {
            prev_range.end_address = std::max(prev_range.end_address, range.end_address);
        }
    }
-    // Map buffers
+    // Map buffers for merged ranges
    for (auto& range : ranges_merged) {
        const auto [buffer, offset] = ObtainBuffer(range.base_address, range.GetSize(), false);
        range.vk_buffer = buffer->buffer;
@ -187,32 +150,39 @@ bool BufferCache::BindVertexBuffers(
    }
    // Bind vertex buffers
-    const size_t num_buffers = guest_buffers.size();
+    Vulkan::VertexInputs<vk::Buffer> host_buffers;
-    for (u32 i = 0; i < num_buffers; ++i) {
+    Vulkan::VertexInputs<vk::DeviceSize> host_offsets;
-        const auto& buffer = guest_buffers[i];
+    Vulkan::VertexInputs<vk::DeviceSize> host_sizes;
-        const auto host_buffer = std::ranges::find_if(ranges_merged, [&](const BufferRange& range) {
+    Vulkan::VertexInputs<vk::DeviceSize> host_strides;
-            return (buffer.base_address >= range.base_address &&
+    const auto null_buffer =
-                    buffer.base_address < range.end_address);
+        instance.IsNullDescriptorSupported() ? VK_NULL_HANDLE : GetBuffer(NULL_BUFFER_ID).Handle();
-        });
+    for (const auto& buffer : guest_buffers) {
-        ASSERT(host_buffer != ranges_merged.cend());
+        if (buffer.GetSize() > 0) {
-
+            const auto host_buffer_info =
-        host_buffers[i] = host_buffer->vk_buffer;
+                std::ranges::find_if(ranges_merged, [&](const BufferRange& range) {
-        host_offsets[i] = host_buffer->offset + buffer.base_address - host_buffer->base_address;
+                    return buffer.base_address >= range.base_address &&
-        host_sizes[i] = buffer.GetSize();
+                           buffer.base_address < range.end_address;
-        host_strides[i] = buffer.GetStride();
+                });
-    }
+            ASSERT(host_buffer_info != ranges_merged.cend());
-
+            host_buffers.emplace_back(host_buffer_info->vk_buffer);
-    if (num_buffers > 0) {
+            host_offsets.push_back(host_buffer_info->offset + buffer.base_address -
-        const auto cmdbuf = scheduler.CommandBuffer();
+                                   host_buffer_info->base_address);
        if (instance.IsVertexInputDynamicState()) {
            cmdbuf.bindVertexBuffers(0, num_buffers, host_buffers.data(), host_offsets.data());
        } else {
-            cmdbuf.bindVertexBuffers2EXT(0, num_buffers, host_buffers.data(), host_offsets.data(),
+            host_buffers.emplace_back(null_buffer);
-                                         host_sizes.data(), host_strides.data());
+            host_offsets.push_back(0);
        }
        host_sizes.push_back(buffer.GetSize());
        host_strides.push_back(buffer.GetStride());
    }
-    return has_step_rate;
+    const auto cmdbuf = scheduler.CommandBuffer();
    const auto num_buffers = guest_buffers.size();
    if (instance.IsVertexInputDynamicState()) {
        cmdbuf.bindVertexBuffers(0, num_buffers, host_buffers.data(), host_offsets.data());
    } else {
        cmdbuf.bindVertexBuffers2EXT(0, num_buffers, host_buffers.data(), host_offsets.data(),
                                     host_sizes.data(), host_strides.data());
    }
 }
 void BufferCache::BindIndexBuffer(u32 index_offset) {
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@ -5,8 +5,6 @@
 #include <shared_mutex>
 #include <boost/container/small_vector.hpp>
 #include <boost/icl/interval_map.hpp>
 #include <tsl/robin_map.h>
 #include "common/div_ceil.h"
 #include "common/slot_vector.h"
 #include "common/types.h"
@ -26,6 +24,10 @@ struct FetchShaderData;
 struct Info;
 } // namespace Shader
 namespace Vulkan {
 class GraphicsPipeline;
 }
 namespace VideoCore {
 using BufferId = Common::SlotId;
@ -75,8 +77,7 @@ public:
    void InvalidateMemory(VAddr device_addr, u64 size);
    /// Binds host vertex buffers for the current draw.
-    bool BindVertexBuffers(const Shader::Info& vs_info,
+    void BindVertexBuffers(const Vulkan::GraphicsPipeline& pipeline);
                           const std::optional<Shader::Gcn::FetchShaderData>& fetch_shader);
    /// Bind host index buffer for the current draw.
    void BindIndexBuffer(u32 index_offset);
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
@ -57,35 +57,11 @@ GraphicsPipeline::GraphicsPipeline(
    pipeline_layout = std::move(layout);
    SetObjectName(device, *pipeline_layout, "Graphics PipelineLayout {}", debug_str);
-    boost::container::static_vector<vk::VertexInputBindingDescription, 32> vertex_bindings;
+    VertexInputs<vk::VertexInputAttributeDescription> vertex_attributes;
-    boost::container::static_vector<vk::VertexInputAttributeDescription, 32> vertex_attributes;
+    VertexInputs<vk::VertexInputBindingDescription> vertex_bindings;
-    if (fetch_shader && !instance.IsVertexInputDynamicState()) {
+    VertexInputs<AmdGpu::Buffer> guest_buffers;
-        const auto& vs_info = GetStage(Shader::LogicalStage::Vertex);
+    if (!instance.IsVertexInputDynamicState()) {
-        for (const auto& attrib : fetch_shader->attributes) {
+        GetVertexInputs(vertex_attributes, vertex_bindings, guest_buffers);
            if (attrib.UsesStepRates()) {
                // Skip attribute binding as the data will be pulled by shader
                continue;
            }
            const auto buffer = attrib.GetSharp(vs_info);
            if (buffer.GetSize() == 0) {
                continue;
            }
            vertex_attributes.push_back({
                .location = attrib.semantic,
                .binding = attrib.semantic,
                .format = LiverpoolToVK::SurfaceFormat(buffer.GetDataFmt(), buffer.GetNumberFmt()),
                .offset = 0,
            });
            vertex_bindings.push_back({
                .binding = attrib.semantic,
                .stride = buffer.GetStride(),
                .inputRate =
                    attrib.GetStepRate() == Shader::Gcn::VertexAttribute::InstanceIdType::None
                        ? vk::VertexInputRate::eVertex
                        : vk::VertexInputRate::eInstance,
            });
        }
    }
    const vk::PipelineVertexInputStateCreateInfo vertex_input_info = {
@ -161,7 +137,7 @@ GraphicsPipeline::GraphicsPipeline(
    }
    if (instance.IsVertexInputDynamicState()) {
        dynamic_states.push_back(vk::DynamicState::eVertexInputEXT);
-    } else {
+    } else if (!vertex_bindings.empty()) {
        dynamic_states.push_back(vk::DynamicState::eVertexInputBindingStrideEXT);
    }
@ -329,6 +305,51 @@ GraphicsPipeline::GraphicsPipeline(
 GraphicsPipeline::~GraphicsPipeline() = default;
 template <typename Attribute, typename Binding>
 void GraphicsPipeline::GetVertexInputs(VertexInputs<Attribute>& attributes,
                                       VertexInputs<Binding>& bindings,
                                       VertexInputs<AmdGpu::Buffer>& guest_buffers) const {
    if (!fetch_shader || fetch_shader->attributes.empty()) {
        return;
    }
    const auto& vs_info = GetStage(Shader::LogicalStage::Vertex);
    for (const auto& attrib : fetch_shader->attributes) {
        if (attrib.UsesStepRates()) {
            // Skip attribute binding as the data will be pulled by shader.
            continue;
        }
        const auto& buffer = attrib.GetSharp(vs_info);
        attributes.push_back(Attribute{
            .location = attrib.semantic,
            .binding = attrib.semantic,
            .format = LiverpoolToVK::SurfaceFormat(buffer.GetDataFmt(), buffer.GetNumberFmt()),
            .offset = 0,
        });
        bindings.push_back(Binding{
            .binding = attrib.semantic,
            .stride = buffer.GetStride(),
            .inputRate = attrib.GetStepRate() == Shader::Gcn::VertexAttribute::InstanceIdType::None
                             ? vk::VertexInputRate::eVertex
                             : vk::VertexInputRate::eInstance,
        });
        if constexpr (std::is_same_v<Attribute, vk::VertexInputBindingDescription2EXT>) {
            bindings.back().divisor = 1;
        }
        guest_buffers.emplace_back(buffer);
    }
 }
 // Declare templated GetVertexInputs for necessary types.
 template void GraphicsPipeline::GetVertexInputs(
    VertexInputs<vk::VertexInputAttributeDescription>& attributes,
    VertexInputs<vk::VertexInputBindingDescription>& bindings,
    VertexInputs<AmdGpu::Buffer>& guest_buffers) const;
 template void GraphicsPipeline::GetVertexInputs(
    VertexInputs<vk::VertexInputAttributeDescription2EXT>& attributes,
    VertexInputs<vk::VertexInputBindingDescription2EXT>& bindings,
    VertexInputs<AmdGpu::Buffer>& guest_buffers) const;
 void GraphicsPipeline::BuildDescSetLayout() {
    boost::container::small_vector<vk::DescriptorSetLayoutBinding, 32> bindings;
    u32 binding{};
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
@ -3,6 +3,7 @@
 #pragma once
 #include <boost/container/static_vector.hpp>
 #include <xxhash.h>
 #include "common/types.h"
@ -27,6 +28,9 @@ class DescriptorHeap;
 using Liverpool = AmdGpu::Liverpool;
 template <typename T>
 using VertexInputs = boost::container::static_vector<T, MaxVertexBufferCount>;
 struct GraphicsPipelineKey {
    std::array<size_t, MaxShaderStages> stage_hashes;
    u32 num_color_attachments;
@ -100,6 +104,11 @@ public:
               key.prim_type == AmdGpu::PrimitiveType::QuadList;
    }
    /// Gets the attributes and bindings for vertex inputs.
    template <typename Attribute, typename Binding>
    void GetVertexInputs(VertexInputs<Attribute>& attributes, VertexInputs<Binding>& bindings,
                         VertexInputs<AmdGpu::Buffer>& guest_buffers) const;
 private:
    void BuildDescSetLayout();
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@ -420,17 +420,17 @@ bool PipelineCache::RefreshGraphicsKey() {
    }
    }
-    const auto vs_info = infos[static_cast<u32>(Shader::LogicalStage::Vertex)];
+    const auto* vs_info = infos[static_cast<u32>(Shader::LogicalStage::Vertex)];
    if (vs_info && fetch_shader && !instance.IsVertexInputDynamicState()) {
        // Without vertex input dynamic state, the pipeline needs to specialize on format.
        // Stride will still be handled outside the pipeline using dynamic state.
        u32 vertex_binding = 0;
        for (const auto& attrib : fetch_shader->attributes) {
            if (attrib.UsesStepRates()) {
                // Skip attribute binding as the data will be pulled by shader.
                continue;
            }
            const auto& buffer = attrib.GetSharp(*vs_info);
            if (buffer.GetSize() == 0) {
                continue;
            }
            ASSERT(vertex_binding < MaxVertexBufferCount);
            key.vertex_buffer_formats[vertex_binding++] =
                Vulkan::LiverpoolToVK::SurfaceFormat(buffer.GetDataFmt(), buffer.GetNumberFmt());
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@ -248,9 +248,7 @@ void Rasterizer::Draw(bool is_indexed, u32 index_offset) {
        return;
    }
-    const auto& vs_info = pipeline->GetStage(Shader::LogicalStage::Vertex);
+    buffer_cache.BindVertexBuffers(*pipeline);
    const auto& fetch_shader = pipeline->GetFetchShader();
    buffer_cache.BindVertexBuffers(vs_info, fetch_shader);
    if (is_indexed) {
        buffer_cache.BindIndexBuffer(index_offset);
    }
@ -258,6 +256,8 @@ void Rasterizer::Draw(bool is_indexed, u32 index_offset) {
    BeginRendering(*pipeline, state);
    UpdateDynamicState(*pipeline);
    const auto& vs_info = pipeline->GetStage(Shader::LogicalStage::Vertex);
    const auto& fetch_shader = pipeline->GetFetchShader();
    const auto [vertex_offset, instance_offset] = GetDrawOffsets(regs, vs_info, fetch_shader);
    const auto cmdbuf = scheduler.CommandBuffer();
@ -292,9 +292,7 @@ void Rasterizer::DrawIndirect(bool is_indexed, VAddr arg_address, u32 offset, u3
        return;
    }
-    const auto& vs_info = pipeline->GetStage(Shader::LogicalStage::Vertex);
+    buffer_cache.BindVertexBuffers(*pipeline);
    const auto& fetch_shader = pipeline->GetFetchShader();
    buffer_cache.BindVertexBuffers(vs_info, fetch_shader);
    if (is_indexed) {
        buffer_cache.BindIndexBuffer(0);
    }