diff --git a/CMakeLists.txt b/CMakeLists.txt index de8753db..2da9465c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -553,6 +553,7 @@ set(VIDEO_CORE src/video_core/amdgpu/liverpool.cpp src/video_core/texture_cache/tile_manager.cpp src/video_core/texture_cache/tile_manager.h src/video_core/texture_cache/types.h + src/video_core/texture_cache/host_compatibility.h src/video_core/page_manager.cpp src/video_core/page_manager.h src/video_core/multi_level_page_table.h diff --git a/src/video_core/buffer_cache/buffer.h b/src/video_core/buffer_cache/buffer.h index 26d48eae..33497578 100644 --- a/src/video_core/buffer_cache/buffer.h +++ b/src/video_core/buffer_cache/buffer.h @@ -118,6 +118,25 @@ public: return buffer; } + std::optional GetBarrier(vk::AccessFlagBits2 dst_acess_mask, + vk::PipelineStageFlagBits2 dst_stage) { + if (dst_acess_mask == access_mask && stage == dst_stage) { + return {}; + } + + auto barrier = vk::BufferMemoryBarrier2{ + .srcStageMask = stage, + .srcAccessMask = access_mask, + .dstStageMask = dst_stage, + .dstAccessMask = dst_acess_mask, + .buffer = buffer.buffer, + .size = size_bytes, + }; + access_mask = dst_acess_mask; + stage = dst_stage; + return barrier; + } + public: VAddr cpu_addr = 0; bool is_picked{}; @@ -128,6 +147,8 @@ public: const Vulkan::Instance* instance{}; MemoryUsage usage; UniqueBuffer buffer; + vk::AccessFlagBits2 access_mask{vk::AccessFlagBits2::eNone}; + vk::PipelineStageFlagBits2 stage{vk::PipelineStageFlagBits2::eNone}; struct BufferView { u32 offset; u32 size; diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp index 93e05085..d67e953e 100644 --- a/src/video_core/buffer_cache/buffer_cache.cpp +++ b/src/video_core/buffer_cache/buffer_cache.cpp @@ -248,11 +248,11 @@ std::pair BufferCache::ObtainBuffer(VAddr device_addr, u32 size, b return {&buffer, buffer.Offset(device_addr)}; } -std::pair BufferCache::ObtainTempBuffer(VAddr gpu_addr, u32 size) { +std::pair BufferCache::ObtainTempBuffer(VAddr gpu_addr, u32 size) { const u64 page = gpu_addr >> CACHING_PAGEBITS; const BufferId buffer_id = page_table[page]; if (buffer_id) { - const Buffer& buffer = slot_buffers[buffer_id]; + Buffer& buffer = slot_buffers[buffer_id]; if (buffer.IsInBounds(gpu_addr, size)) { return {&buffer, buffer.Offset(gpu_addr)}; } diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index b9002cea..9be258ab 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -70,7 +70,7 @@ public: bool is_texel_buffer = false); /// Obtains a temporary buffer for usage in texture cache. - [[nodiscard]] std::pair ObtainTempBuffer(VAddr gpu_addr, u32 size); + [[nodiscard]] std::pair ObtainTempBuffer(VAddr gpu_addr, u32 size); /// Return true when a region is registered on the cache [[nodiscard]] bool IsRegionRegistered(VAddr addr, size_t size); diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 1d900123..b1a23532 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -104,6 +104,7 @@ bool ComputePipeline::BindResources(VideoCore::BufferCache& buffer_cache, boost::container::static_vector buffer_infos; boost::container::static_vector image_infos; boost::container::small_vector set_writes; + boost::container::small_vector buffer_barriers; Shader::PushData push_data{}; u32 binding{}; @@ -153,9 +154,9 @@ bool ComputePipeline::BindResources(VideoCore::BufferCache& buffer_cache, for (const auto& desc : info->texture_buffers) { const auto vsharp = desc.GetSharp(*info); vk::BufferView& buffer_view = buffer_views.emplace_back(VK_NULL_HANDLE); - if (vsharp.GetDataFmt() != AmdGpu::DataFormat::FormatInvalid) { + const u32 size = vsharp.GetSize(); + if (vsharp.GetDataFmt() != AmdGpu::DataFormat::FormatInvalid && size != 0) { const VAddr address = vsharp.base_address; - const u32 size = vsharp.GetSize(); if (desc.is_written) { if (texture_cache.TouchMeta(address, true)) { LOG_TRACE(Render_Vulkan, "Metadata update skipped"); @@ -183,6 +184,13 @@ bool ComputePipeline::BindResources(VideoCore::BufferCache& buffer_cache, } buffer_view = vk_buffer->View(offset_aligned, size + adjust, desc.is_written, vsharp.GetDataFmt(), vsharp.GetNumberFmt()); + + if (auto barrier = + vk_buffer->GetBarrier(desc.is_written ? vk::AccessFlagBits2::eShaderWrite + : vk::AccessFlagBits2::eShaderRead, + vk::PipelineStageFlagBits2::eComputeShader)) { + buffer_barriers.emplace_back(*barrier); + } } set_writes.push_back({ .dstSet = VK_NULL_HANDLE, @@ -222,6 +230,9 @@ bool ComputePipeline::BindResources(VideoCore::BufferCache& buffer_cache, } for (const auto& sampler : info->samplers) { const auto ssharp = sampler.GetSharp(*info); + if (ssharp.force_degamma) { + LOG_WARNING(Render_Vulkan, "Texture requires gamma correction"); + } const auto vk_sampler = texture_cache.GetSampler(ssharp); image_infos.emplace_back(vk_sampler, VK_NULL_HANDLE, vk::ImageLayout::eGeneral); set_writes.push_back({ @@ -239,6 +250,15 @@ bool ComputePipeline::BindResources(VideoCore::BufferCache& buffer_cache, } const auto cmdbuf = scheduler.CommandBuffer(); + + if (!buffer_barriers.empty()) { + auto dependencies = vk::DependencyInfo{ + .bufferMemoryBarrierCount = u32(buffer_barriers.size()), + .pBufferMemoryBarriers = buffer_barriers.data(), + }; + cmdbuf.pipelineBarrier2(dependencies); + } + cmdbuf.pushConstants(*pipeline_layout, vk::ShaderStageFlagBits::eCompute, 0u, sizeof(push_data), &push_data); cmdbuf.pushDescriptorSetKHR(vk::PipelineBindPoint::eCompute, *pipeline_layout, 0, set_writes); diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index f03e5d5d..5aec456f 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -359,6 +359,7 @@ void GraphicsPipeline::BindResources(const Liverpool::Regs& regs, boost::container::static_vector buffer_infos; boost::container::static_vector image_infos; boost::container::small_vector set_writes; + boost::container::small_vector buffer_barriers; Shader::PushData push_data{}; u32 binding{}; @@ -407,9 +408,9 @@ void GraphicsPipeline::BindResources(const Liverpool::Regs& regs, for (const auto& tex_buffer : stage->texture_buffers) { const auto vsharp = tex_buffer.GetSharp(*stage); vk::BufferView& buffer_view = buffer_views.emplace_back(VK_NULL_HANDLE); - if (vsharp.GetDataFmt() != AmdGpu::DataFormat::FormatInvalid) { + const u32 size = vsharp.GetSize(); + if (vsharp.GetDataFmt() != AmdGpu::DataFormat::FormatInvalid && size != 0) { const VAddr address = vsharp.base_address; - const u32 size = vsharp.GetSize(); const u32 alignment = instance.TexelBufferMinAlignment(); const auto [vk_buffer, offset] = buffer_cache.ObtainBuffer(address, size, tex_buffer.is_written, true); @@ -424,6 +425,12 @@ void GraphicsPipeline::BindResources(const Liverpool::Regs& regs, } buffer_view = vk_buffer->View(offset_aligned, size + adjust, tex_buffer.is_written, vsharp.GetDataFmt(), vsharp.GetNumberFmt()); + const auto dst_access = tex_buffer.is_written ? vk::AccessFlagBits2::eShaderWrite + : vk::AccessFlagBits2::eShaderRead; + if (auto barrier = vk_buffer->GetBarrier( + dst_access, vk::PipelineStageFlagBits2::eVertexShader)) { + buffer_barriers.emplace_back(*barrier); + } } set_writes.push_back({ .dstSet = VK_NULL_HANDLE, @@ -441,7 +448,7 @@ void GraphicsPipeline::BindResources(const Liverpool::Regs& regs, const auto tsharp = image_desc.GetSharp(*stage); if (tsharp) { tsharps.emplace_back(tsharp); - VideoCore::ImageInfo image_info{tsharp}; + VideoCore::ImageInfo image_info{tsharp, image_desc.is_depth}; VideoCore::ImageViewInfo view_info{tsharp, image_desc.is_storage}; const auto& image_view = texture_cache.FindTexture(image_info, view_info); const auto& image = texture_cache.GetImage(image_view.image_id); @@ -465,6 +472,9 @@ void GraphicsPipeline::BindResources(const Liverpool::Regs& regs, } for (const auto& sampler : stage->samplers) { auto ssharp = sampler.GetSharp(*stage); + if (ssharp.force_degamma) { + LOG_WARNING(Render_Vulkan, "Texture requires gamma correction"); + } if (sampler.disable_aniso) { const auto& tsharp = tsharps[sampler.associated_image]; if (tsharp.base_level == 0 && tsharp.last_level == 0) { @@ -485,6 +495,15 @@ void GraphicsPipeline::BindResources(const Liverpool::Regs& regs, } const auto cmdbuf = scheduler.CommandBuffer(); + + if (!buffer_barriers.empty()) { + auto dependencies = vk::DependencyInfo{ + .bufferMemoryBarrierCount = u32(buffer_barriers.size()), + .pBufferMemoryBarriers = buffer_barriers.data(), + }; + cmdbuf.pipelineBarrier2(dependencies); + } + if (!set_writes.empty()) { cmdbuf.pushDescriptorSetKHR(vk::PipelineBindPoint::eGraphics, *pipeline_layout, 0, set_writes); diff --git a/src/video_core/renderer_vulkan/vk_instance.cpp b/src/video_core/renderer_vulkan/vk_instance.cpp index 395f7198..c0105d8f 100644 --- a/src/video_core/renderer_vulkan/vk_instance.cpp +++ b/src/video_core/renderer_vulkan/vk_instance.cpp @@ -228,6 +228,7 @@ bool Instance::CreateDevice() { const bool maintenance5 = add_extension(VK_KHR_MAINTENANCE_5_EXTENSION_NAME); add_extension(VK_KHR_DYNAMIC_RENDERING_EXTENSION_NAME); add_extension(VK_EXT_SHADER_DEMOTE_TO_HELPER_INVOCATION_EXTENSION_NAME); + add_extension(VK_KHR_SYNCHRONIZATION_2_EXTENSION_NAME); #ifdef __APPLE__ // Required by Vulkan spec if supported. diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 6a8e0f13..33971cc5 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -145,6 +145,14 @@ const GraphicsPipeline* PipelineCache::GetGraphicsPipeline() { LOG_TRACE(Render_Vulkan, "FMask decompression pass skipped"); return nullptr; } + if (regs.depth_render_control.depth_compress_disable) { + LOG_TRACE(Render_Vulkan, "HTile decompress skipped (depth)"); + return nullptr; + } + if (regs.depth_render_control.stencil_compress_disable) { + LOG_TRACE(Render_Vulkan, "HTile decompress skipped (stencil)"); + return nullptr; + } if (!RefreshGraphicsKey()) { return nullptr; } diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 5a20899d..4207c18d 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -179,6 +179,10 @@ void Rasterizer::BeginRendering() { const auto& regs = liverpool->regs; RenderState state; + if (regs.color_control.degamma_enable) { + LOG_WARNING(Render_Vulkan, "Color buffers require gamma correction"); + } + for (auto col_buf_id = 0u; col_buf_id < Liverpool::NumColorBuffers; ++col_buf_id) { const auto& col_buf = regs.color_buffers[col_buf_id]; if (!col_buf) { diff --git a/src/video_core/texture_cache/host_compatibility.h b/src/video_core/texture_cache/host_compatibility.h new file mode 100644 index 00000000..0b4b6764 --- /dev/null +++ b/src/video_core/texture_cache/host_compatibility.h @@ -0,0 +1,391 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +// Copyright © 2023 Skyline Team and Contributors (https://github.com/skyline-emu/) +// Copyright © 2015-2023 The Khronos Group Inc. +// Copyright © 2015-2023 Valve Corporation +// Copyright © 2015-2023 LunarG, Inc. + +#pragma once + +#include +#include + +namespace VideoCore { +/** + * @brief All classes of format compatibility according to the Vulkan specification + * @url + * https://github.com/KhronosGroup/Vulkan-ValidationLayers/blob/d37c676f75f545a3e5a98d7dfb89864391a1db1e/layers/generated/vk_format_utils.h#L47-L131 + * @note This is copied directly from Vulkan Validation Layers and doesn't follow the Skyline naming + * conventions + */ +enum class FORMAT_COMPATIBILITY_CLASS { + NONE = 0, + _10BIT_2PLANE_420, + _10BIT_2PLANE_422, + _10BIT_2PLANE_444, + _10BIT_3PLANE_420, + _10BIT_3PLANE_422, + _10BIT_3PLANE_444, + _12BIT_2PLANE_420, + _12BIT_2PLANE_422, + _12BIT_2PLANE_444, + _12BIT_3PLANE_420, + _12BIT_3PLANE_422, + _12BIT_3PLANE_444, + _128BIT, + _16BIT, + _16BIT_2PLANE_420, + _16BIT_2PLANE_422, + _16BIT_2PLANE_444, + _16BIT_3PLANE_420, + _16BIT_3PLANE_422, + _16BIT_3PLANE_444, + _192BIT, + _24BIT, + _256BIT, + _32BIT, + _32BIT_B8G8R8G8, + _32BIT_G8B8G8R8, + _48BIT, + _64BIT, + _64BIT_B10G10R10G10, + _64BIT_B12G12R12G12, + _64BIT_B16G16R16G16, + _64BIT_G10B10G10R10, + _64BIT_G12B12G12R12, + _64BIT_G16B16G16R16, + _64BIT_R10G10B10A10, + _64BIT_R12G12B12A12, + _8BIT, + _8BIT_2PLANE_420, + _8BIT_2PLANE_422, + _8BIT_2PLANE_444, + _8BIT_3PLANE_420, + _8BIT_3PLANE_422, + _8BIT_3PLANE_444, + _96BIT, + ASTC_10X10, + ASTC_10X5, + ASTC_10X6, + ASTC_10X8, + ASTC_12X10, + ASTC_12X12, + ASTC_4X4, + ASTC_5X4, + ASTC_5X5, + ASTC_6X5, + ASTC_6X6, + ASTC_8X5, + ASTC_8X6, + ASTC_8X8, + BC1_RGB, + BC1_RGBA, + BC2, + BC3, + BC4, + BC5, + BC6H, + BC7, + D16, + D16S8, + D24, + D24S8, + D32, + D32S8, + EAC_R, + EAC_RG, + ETC2_EAC_RGBA, + ETC2_RGB, + ETC2_RGBA, + PVRTC1_2BPP, + PVRTC1_4BPP, + PVRTC2_2BPP, + PVRTC2_4BPP, + S8 +}; + +/** + * @brief The format compatibility class according to the Vulkan specification + * @url + * https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#formats-compatibility-classes + * @url + * https://github.com/KhronosGroup/Vulkan-ValidationLayers/blob/d37c676f75f545a3e5a98d7dfb89864391a1db1e/layers/generated/vk_format_utils.cpp#L70-L812 + * @note This is copied directly from Vulkan Validation Layers and doesn't follow the Skyline naming + * conventions + */ +static const std::unordered_map vkFormatClassTable{ + {VK_FORMAT_A1R5G5B5_UNORM_PACK16, FORMAT_COMPATIBILITY_CLASS::_16BIT}, + {VK_FORMAT_A2B10G10R10_SINT_PACK32, FORMAT_COMPATIBILITY_CLASS::_32BIT}, + {VK_FORMAT_A2B10G10R10_SNORM_PACK32, FORMAT_COMPATIBILITY_CLASS::_32BIT}, + {VK_FORMAT_A2B10G10R10_SSCALED_PACK32, FORMAT_COMPATIBILITY_CLASS::_32BIT}, + {VK_FORMAT_A2B10G10R10_UINT_PACK32, FORMAT_COMPATIBILITY_CLASS::_32BIT}, + {VK_FORMAT_A2B10G10R10_UNORM_PACK32, FORMAT_COMPATIBILITY_CLASS::_32BIT}, + {VK_FORMAT_A2B10G10R10_USCALED_PACK32, FORMAT_COMPATIBILITY_CLASS::_32BIT}, + {VK_FORMAT_A2R10G10B10_SINT_PACK32, FORMAT_COMPATIBILITY_CLASS::_32BIT}, + {VK_FORMAT_A2R10G10B10_SNORM_PACK32, FORMAT_COMPATIBILITY_CLASS::_32BIT}, + {VK_FORMAT_A2R10G10B10_SSCALED_PACK32, FORMAT_COMPATIBILITY_CLASS::_32BIT}, + {VK_FORMAT_A2R10G10B10_UINT_PACK32, FORMAT_COMPATIBILITY_CLASS::_32BIT}, + {VK_FORMAT_A2R10G10B10_UNORM_PACK32, FORMAT_COMPATIBILITY_CLASS::_32BIT}, + {VK_FORMAT_A2R10G10B10_USCALED_PACK32, FORMAT_COMPATIBILITY_CLASS::_32BIT}, + {VK_FORMAT_A4B4G4R4_UNORM_PACK16_EXT, FORMAT_COMPATIBILITY_CLASS::_16BIT}, + {VK_FORMAT_A4R4G4B4_UNORM_PACK16_EXT, FORMAT_COMPATIBILITY_CLASS::_16BIT}, + {VK_FORMAT_A8B8G8R8_SINT_PACK32, FORMAT_COMPATIBILITY_CLASS::_32BIT}, + {VK_FORMAT_A8B8G8R8_SNORM_PACK32, FORMAT_COMPATIBILITY_CLASS::_32BIT}, + {VK_FORMAT_A8B8G8R8_SRGB_PACK32, FORMAT_COMPATIBILITY_CLASS::_32BIT}, + {VK_FORMAT_A8B8G8R8_SSCALED_PACK32, FORMAT_COMPATIBILITY_CLASS::_32BIT}, + {VK_FORMAT_A8B8G8R8_UINT_PACK32, FORMAT_COMPATIBILITY_CLASS::_32BIT}, + {VK_FORMAT_A8B8G8R8_UNORM_PACK32, FORMAT_COMPATIBILITY_CLASS::_32BIT}, + {VK_FORMAT_A8B8G8R8_USCALED_PACK32, FORMAT_COMPATIBILITY_CLASS::_32BIT}, + {VK_FORMAT_ASTC_10x10_SFLOAT_BLOCK_EXT, FORMAT_COMPATIBILITY_CLASS::ASTC_10X10}, + {VK_FORMAT_ASTC_10x10_SRGB_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_10X10}, + {VK_FORMAT_ASTC_10x10_UNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_10X10}, + {VK_FORMAT_ASTC_10x5_SFLOAT_BLOCK_EXT, FORMAT_COMPATIBILITY_CLASS::ASTC_10X5}, + {VK_FORMAT_ASTC_10x5_SRGB_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_10X5}, + {VK_FORMAT_ASTC_10x5_UNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_10X5}, + {VK_FORMAT_ASTC_10x6_SFLOAT_BLOCK_EXT, FORMAT_COMPATIBILITY_CLASS::ASTC_10X6}, + {VK_FORMAT_ASTC_10x6_SRGB_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_10X6}, + {VK_FORMAT_ASTC_10x6_UNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_10X6}, + {VK_FORMAT_ASTC_10x8_SFLOAT_BLOCK_EXT, FORMAT_COMPATIBILITY_CLASS::ASTC_10X8}, + {VK_FORMAT_ASTC_10x8_SRGB_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_10X8}, + {VK_FORMAT_ASTC_10x8_UNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_10X8}, + {VK_FORMAT_ASTC_12x10_SFLOAT_BLOCK_EXT, FORMAT_COMPATIBILITY_CLASS::ASTC_12X10}, + {VK_FORMAT_ASTC_12x10_SRGB_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_12X10}, + {VK_FORMAT_ASTC_12x10_UNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_12X10}, + {VK_FORMAT_ASTC_12x12_SFLOAT_BLOCK_EXT, FORMAT_COMPATIBILITY_CLASS::ASTC_12X12}, + {VK_FORMAT_ASTC_12x12_SRGB_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_12X12}, + {VK_FORMAT_ASTC_12x12_UNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_12X12}, + {VK_FORMAT_ASTC_4x4_SFLOAT_BLOCK_EXT, FORMAT_COMPATIBILITY_CLASS::ASTC_4X4}, + {VK_FORMAT_ASTC_4x4_SRGB_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_4X4}, + {VK_FORMAT_ASTC_4x4_UNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_4X4}, + {VK_FORMAT_ASTC_5x4_SFLOAT_BLOCK_EXT, FORMAT_COMPATIBILITY_CLASS::ASTC_5X4}, + {VK_FORMAT_ASTC_5x4_SRGB_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_5X4}, + {VK_FORMAT_ASTC_5x4_UNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_5X4}, + {VK_FORMAT_ASTC_5x5_SFLOAT_BLOCK_EXT, FORMAT_COMPATIBILITY_CLASS::ASTC_5X5}, + {VK_FORMAT_ASTC_5x5_SRGB_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_5X5}, + {VK_FORMAT_ASTC_5x5_UNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_5X5}, + {VK_FORMAT_ASTC_6x5_SFLOAT_BLOCK_EXT, FORMAT_COMPATIBILITY_CLASS::ASTC_6X5}, + {VK_FORMAT_ASTC_6x5_SRGB_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_6X5}, + {VK_FORMAT_ASTC_6x5_UNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_6X5}, + {VK_FORMAT_ASTC_6x6_SFLOAT_BLOCK_EXT, FORMAT_COMPATIBILITY_CLASS::ASTC_6X6}, + {VK_FORMAT_ASTC_6x6_SRGB_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_6X6}, + {VK_FORMAT_ASTC_6x6_UNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_6X6}, + {VK_FORMAT_ASTC_8x5_SFLOAT_BLOCK_EXT, FORMAT_COMPATIBILITY_CLASS::ASTC_8X5}, + {VK_FORMAT_ASTC_8x5_SRGB_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_8X5}, + {VK_FORMAT_ASTC_8x5_UNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_8X5}, + {VK_FORMAT_ASTC_8x6_SFLOAT_BLOCK_EXT, FORMAT_COMPATIBILITY_CLASS::ASTC_8X6}, + {VK_FORMAT_ASTC_8x6_SRGB_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_8X6}, + {VK_FORMAT_ASTC_8x6_UNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_8X6}, + {VK_FORMAT_ASTC_8x8_SFLOAT_BLOCK_EXT, FORMAT_COMPATIBILITY_CLASS::ASTC_8X8}, + {VK_FORMAT_ASTC_8x8_SRGB_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_8X8}, + {VK_FORMAT_ASTC_8x8_UNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_8X8}, + {VK_FORMAT_B10G11R11_UFLOAT_PACK32, FORMAT_COMPATIBILITY_CLASS::_32BIT}, + {VK_FORMAT_B10X6G10X6R10X6G10X6_422_UNORM_4PACK16, + FORMAT_COMPATIBILITY_CLASS::_64BIT_B10G10R10G10}, + {VK_FORMAT_B12X4G12X4R12X4G12X4_422_UNORM_4PACK16, + FORMAT_COMPATIBILITY_CLASS::_64BIT_B12G12R12G12}, + {VK_FORMAT_B16G16R16G16_422_UNORM, FORMAT_COMPATIBILITY_CLASS::_64BIT_B16G16R16G16}, + {VK_FORMAT_B4G4R4A4_UNORM_PACK16, FORMAT_COMPATIBILITY_CLASS::_16BIT}, + {VK_FORMAT_B5G5R5A1_UNORM_PACK16, FORMAT_COMPATIBILITY_CLASS::_16BIT}, + {VK_FORMAT_B5G6R5_UNORM_PACK16, FORMAT_COMPATIBILITY_CLASS::_16BIT}, + {VK_FORMAT_B8G8R8A8_SINT, FORMAT_COMPATIBILITY_CLASS::_32BIT}, + {VK_FORMAT_B8G8R8A8_SNORM, FORMAT_COMPATIBILITY_CLASS::_32BIT}, + {VK_FORMAT_B8G8R8A8_SRGB, FORMAT_COMPATIBILITY_CLASS::_32BIT}, + {VK_FORMAT_B8G8R8A8_SSCALED, FORMAT_COMPATIBILITY_CLASS::_32BIT}, + {VK_FORMAT_B8G8R8A8_UINT, FORMAT_COMPATIBILITY_CLASS::_32BIT}, + {VK_FORMAT_B8G8R8A8_UNORM, FORMAT_COMPATIBILITY_CLASS::_32BIT}, + {VK_FORMAT_B8G8R8A8_USCALED, FORMAT_COMPATIBILITY_CLASS::_32BIT}, + {VK_FORMAT_B8G8R8G8_422_UNORM, FORMAT_COMPATIBILITY_CLASS::_32BIT_B8G8R8G8}, + {VK_FORMAT_B8G8R8_SINT, FORMAT_COMPATIBILITY_CLASS::_24BIT}, + {VK_FORMAT_B8G8R8_SNORM, FORMAT_COMPATIBILITY_CLASS::_24BIT}, + {VK_FORMAT_B8G8R8_SRGB, FORMAT_COMPATIBILITY_CLASS::_24BIT}, + {VK_FORMAT_B8G8R8_SSCALED, FORMAT_COMPATIBILITY_CLASS::_24BIT}, + {VK_FORMAT_B8G8R8_UINT, FORMAT_COMPATIBILITY_CLASS::_24BIT}, + {VK_FORMAT_B8G8R8_UNORM, FORMAT_COMPATIBILITY_CLASS::_24BIT}, + {VK_FORMAT_B8G8R8_USCALED, FORMAT_COMPATIBILITY_CLASS::_24BIT}, + {VK_FORMAT_BC1_RGBA_SRGB_BLOCK, FORMAT_COMPATIBILITY_CLASS::BC1_RGBA}, + {VK_FORMAT_BC1_RGBA_UNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::BC1_RGBA}, + {VK_FORMAT_BC1_RGB_SRGB_BLOCK, FORMAT_COMPATIBILITY_CLASS::BC1_RGB}, + {VK_FORMAT_BC1_RGB_UNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::BC1_RGB}, + {VK_FORMAT_BC2_SRGB_BLOCK, FORMAT_COMPATIBILITY_CLASS::BC2}, + {VK_FORMAT_BC2_UNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::BC2}, + {VK_FORMAT_BC3_SRGB_BLOCK, FORMAT_COMPATIBILITY_CLASS::BC3}, + {VK_FORMAT_BC3_UNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::BC3}, + {VK_FORMAT_BC4_SNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::BC4}, + {VK_FORMAT_BC4_UNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::BC4}, + {VK_FORMAT_BC5_SNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::BC5}, + {VK_FORMAT_BC5_UNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::BC5}, + {VK_FORMAT_BC6H_SFLOAT_BLOCK, FORMAT_COMPATIBILITY_CLASS::BC6H}, + {VK_FORMAT_BC6H_UFLOAT_BLOCK, FORMAT_COMPATIBILITY_CLASS::BC6H}, + {VK_FORMAT_BC7_SRGB_BLOCK, FORMAT_COMPATIBILITY_CLASS::BC7}, + {VK_FORMAT_BC7_UNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::BC7}, + {VK_FORMAT_D16_UNORM, FORMAT_COMPATIBILITY_CLASS::D16}, + {VK_FORMAT_D16_UNORM_S8_UINT, FORMAT_COMPATIBILITY_CLASS::D16S8}, + {VK_FORMAT_D24_UNORM_S8_UINT, FORMAT_COMPATIBILITY_CLASS::D24S8}, + {VK_FORMAT_D32_SFLOAT, FORMAT_COMPATIBILITY_CLASS::D32}, + {VK_FORMAT_D32_SFLOAT_S8_UINT, FORMAT_COMPATIBILITY_CLASS::D32S8}, + {VK_FORMAT_E5B9G9R9_UFLOAT_PACK32, FORMAT_COMPATIBILITY_CLASS::_32BIT}, + {VK_FORMAT_EAC_R11G11_SNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::EAC_RG}, + {VK_FORMAT_EAC_R11G11_UNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::EAC_RG}, + {VK_FORMAT_EAC_R11_SNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::EAC_R}, + {VK_FORMAT_EAC_R11_UNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::EAC_R}, + {VK_FORMAT_ETC2_R8G8B8A1_SRGB_BLOCK, FORMAT_COMPATIBILITY_CLASS::ETC2_RGBA}, + {VK_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::ETC2_RGBA}, + {VK_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK, FORMAT_COMPATIBILITY_CLASS::ETC2_EAC_RGBA}, + {VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::ETC2_EAC_RGBA}, + {VK_FORMAT_ETC2_R8G8B8_SRGB_BLOCK, FORMAT_COMPATIBILITY_CLASS::ETC2_RGB}, + {VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::ETC2_RGB}, + {VK_FORMAT_G10X6B10X6G10X6R10X6_422_UNORM_4PACK16, + FORMAT_COMPATIBILITY_CLASS::_64BIT_G10B10G10R10}, + {VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16, + FORMAT_COMPATIBILITY_CLASS::_10BIT_2PLANE_420}, + {VK_FORMAT_G10X6_B10X6R10X6_2PLANE_422_UNORM_3PACK16, + FORMAT_COMPATIBILITY_CLASS::_10BIT_2PLANE_422}, + {VK_FORMAT_G10X6_B10X6R10X6_2PLANE_444_UNORM_3PACK16_EXT, + FORMAT_COMPATIBILITY_CLASS::_10BIT_2PLANE_444}, + {VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_420_UNORM_3PACK16, + FORMAT_COMPATIBILITY_CLASS::_10BIT_3PLANE_420}, + {VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_422_UNORM_3PACK16, + FORMAT_COMPATIBILITY_CLASS::_10BIT_3PLANE_422}, + {VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_444_UNORM_3PACK16, + FORMAT_COMPATIBILITY_CLASS::_10BIT_3PLANE_444}, + {VK_FORMAT_G12X4B12X4G12X4R12X4_422_UNORM_4PACK16, + FORMAT_COMPATIBILITY_CLASS::_64BIT_G12B12G12R12}, + {VK_FORMAT_G12X4_B12X4R12X4_2PLANE_420_UNORM_3PACK16, + FORMAT_COMPATIBILITY_CLASS::_12BIT_2PLANE_420}, + {VK_FORMAT_G12X4_B12X4R12X4_2PLANE_422_UNORM_3PACK16, + FORMAT_COMPATIBILITY_CLASS::_12BIT_2PLANE_422}, + {VK_FORMAT_G12X4_B12X4R12X4_2PLANE_444_UNORM_3PACK16_EXT, + FORMAT_COMPATIBILITY_CLASS::_12BIT_2PLANE_444}, + {VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_420_UNORM_3PACK16, + FORMAT_COMPATIBILITY_CLASS::_12BIT_3PLANE_420}, + {VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_422_UNORM_3PACK16, + FORMAT_COMPATIBILITY_CLASS::_12BIT_3PLANE_422}, + {VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_444_UNORM_3PACK16, + FORMAT_COMPATIBILITY_CLASS::_12BIT_3PLANE_444}, + {VK_FORMAT_G16B16G16R16_422_UNORM, FORMAT_COMPATIBILITY_CLASS::_64BIT_G16B16G16R16}, + {VK_FORMAT_G16_B16R16_2PLANE_420_UNORM, FORMAT_COMPATIBILITY_CLASS::_16BIT_2PLANE_420}, + {VK_FORMAT_G16_B16R16_2PLANE_422_UNORM, FORMAT_COMPATIBILITY_CLASS::_16BIT_2PLANE_422}, + {VK_FORMAT_G16_B16R16_2PLANE_444_UNORM_EXT, FORMAT_COMPATIBILITY_CLASS::_16BIT_2PLANE_444}, + {VK_FORMAT_G16_B16_R16_3PLANE_420_UNORM, FORMAT_COMPATIBILITY_CLASS::_16BIT_3PLANE_420}, + {VK_FORMAT_G16_B16_R16_3PLANE_422_UNORM, FORMAT_COMPATIBILITY_CLASS::_16BIT_3PLANE_422}, + {VK_FORMAT_G16_B16_R16_3PLANE_444_UNORM, FORMAT_COMPATIBILITY_CLASS::_16BIT_3PLANE_444}, + {VK_FORMAT_G8B8G8R8_422_UNORM, FORMAT_COMPATIBILITY_CLASS::_32BIT_G8B8G8R8}, + {VK_FORMAT_G8_B8R8_2PLANE_420_UNORM, FORMAT_COMPATIBILITY_CLASS::_8BIT_2PLANE_420}, + {VK_FORMAT_G8_B8R8_2PLANE_422_UNORM, FORMAT_COMPATIBILITY_CLASS::_8BIT_2PLANE_422}, + {VK_FORMAT_G8_B8R8_2PLANE_444_UNORM_EXT, FORMAT_COMPATIBILITY_CLASS::_8BIT_2PLANE_444}, + {VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM, FORMAT_COMPATIBILITY_CLASS::_8BIT_3PLANE_420}, + {VK_FORMAT_G8_B8_R8_3PLANE_422_UNORM, FORMAT_COMPATIBILITY_CLASS::_8BIT_3PLANE_422}, + {VK_FORMAT_G8_B8_R8_3PLANE_444_UNORM, FORMAT_COMPATIBILITY_CLASS::_8BIT_3PLANE_444}, + {VK_FORMAT_PVRTC1_2BPP_SRGB_BLOCK_IMG, FORMAT_COMPATIBILITY_CLASS::PVRTC1_2BPP}, + {VK_FORMAT_PVRTC1_2BPP_UNORM_BLOCK_IMG, FORMAT_COMPATIBILITY_CLASS::PVRTC1_2BPP}, + {VK_FORMAT_PVRTC1_4BPP_SRGB_BLOCK_IMG, FORMAT_COMPATIBILITY_CLASS::PVRTC1_4BPP}, + {VK_FORMAT_PVRTC1_4BPP_UNORM_BLOCK_IMG, FORMAT_COMPATIBILITY_CLASS::PVRTC1_4BPP}, + {VK_FORMAT_PVRTC2_2BPP_SRGB_BLOCK_IMG, FORMAT_COMPATIBILITY_CLASS::PVRTC2_2BPP}, + {VK_FORMAT_PVRTC2_2BPP_UNORM_BLOCK_IMG, FORMAT_COMPATIBILITY_CLASS::PVRTC2_2BPP}, + {VK_FORMAT_PVRTC2_4BPP_SRGB_BLOCK_IMG, FORMAT_COMPATIBILITY_CLASS::PVRTC2_4BPP}, + {VK_FORMAT_PVRTC2_4BPP_UNORM_BLOCK_IMG, FORMAT_COMPATIBILITY_CLASS::PVRTC2_4BPP}, + {VK_FORMAT_R10X6G10X6B10X6A10X6_UNORM_4PACK16, FORMAT_COMPATIBILITY_CLASS::_64BIT_R10G10B10A10}, + {VK_FORMAT_R10X6G10X6_UNORM_2PACK16, FORMAT_COMPATIBILITY_CLASS::_32BIT}, + {VK_FORMAT_R10X6_UNORM_PACK16, FORMAT_COMPATIBILITY_CLASS::_16BIT}, + {VK_FORMAT_R12X4G12X4B12X4A12X4_UNORM_4PACK16, FORMAT_COMPATIBILITY_CLASS::_64BIT_R12G12B12A12}, + {VK_FORMAT_R12X4G12X4_UNORM_2PACK16, FORMAT_COMPATIBILITY_CLASS::_32BIT}, + {VK_FORMAT_R12X4_UNORM_PACK16, FORMAT_COMPATIBILITY_CLASS::_16BIT}, + {VK_FORMAT_R16G16B16A16_SFLOAT, FORMAT_COMPATIBILITY_CLASS::_64BIT}, + {VK_FORMAT_R16G16B16A16_SINT, FORMAT_COMPATIBILITY_CLASS::_64BIT}, + {VK_FORMAT_R16G16B16A16_SNORM, FORMAT_COMPATIBILITY_CLASS::_64BIT}, + {VK_FORMAT_R16G16B16A16_SSCALED, FORMAT_COMPATIBILITY_CLASS::_64BIT}, + {VK_FORMAT_R16G16B16A16_UINT, FORMAT_COMPATIBILITY_CLASS::_64BIT}, + {VK_FORMAT_R16G16B16A16_UNORM, FORMAT_COMPATIBILITY_CLASS::_64BIT}, + {VK_FORMAT_R16G16B16A16_USCALED, FORMAT_COMPATIBILITY_CLASS::_64BIT}, + {VK_FORMAT_R16G16B16_SFLOAT, FORMAT_COMPATIBILITY_CLASS::_48BIT}, + {VK_FORMAT_R16G16B16_SINT, FORMAT_COMPATIBILITY_CLASS::_48BIT}, + {VK_FORMAT_R16G16B16_SNORM, FORMAT_COMPATIBILITY_CLASS::_48BIT}, + {VK_FORMAT_R16G16B16_SSCALED, FORMAT_COMPATIBILITY_CLASS::_48BIT}, + {VK_FORMAT_R16G16B16_UINT, FORMAT_COMPATIBILITY_CLASS::_48BIT}, + {VK_FORMAT_R16G16B16_UNORM, FORMAT_COMPATIBILITY_CLASS::_48BIT}, + {VK_FORMAT_R16G16B16_USCALED, FORMAT_COMPATIBILITY_CLASS::_48BIT}, + {VK_FORMAT_R16G16_SFLOAT, FORMAT_COMPATIBILITY_CLASS::_32BIT}, + {VK_FORMAT_R16G16_SINT, FORMAT_COMPATIBILITY_CLASS::_32BIT}, + {VK_FORMAT_R16G16_SNORM, FORMAT_COMPATIBILITY_CLASS::_32BIT}, + {VK_FORMAT_R16G16_SSCALED, FORMAT_COMPATIBILITY_CLASS::_32BIT}, + {VK_FORMAT_R16G16_UINT, FORMAT_COMPATIBILITY_CLASS::_32BIT}, + {VK_FORMAT_R16G16_UNORM, FORMAT_COMPATIBILITY_CLASS::_32BIT}, + {VK_FORMAT_R16G16_USCALED, FORMAT_COMPATIBILITY_CLASS::_32BIT}, + {VK_FORMAT_R16_SFLOAT, FORMAT_COMPATIBILITY_CLASS::_16BIT}, + {VK_FORMAT_R16_SINT, FORMAT_COMPATIBILITY_CLASS::_16BIT}, + {VK_FORMAT_R16_SNORM, FORMAT_COMPATIBILITY_CLASS::_16BIT}, + {VK_FORMAT_R16_SSCALED, FORMAT_COMPATIBILITY_CLASS::_16BIT}, + {VK_FORMAT_R16_UINT, FORMAT_COMPATIBILITY_CLASS::_16BIT}, + {VK_FORMAT_R16_UNORM, FORMAT_COMPATIBILITY_CLASS::_16BIT}, + {VK_FORMAT_R16_USCALED, FORMAT_COMPATIBILITY_CLASS::_16BIT}, + {VK_FORMAT_R32G32B32A32_SFLOAT, FORMAT_COMPATIBILITY_CLASS::_128BIT}, + {VK_FORMAT_R32G32B32A32_SINT, FORMAT_COMPATIBILITY_CLASS::_128BIT}, + {VK_FORMAT_R32G32B32A32_UINT, FORMAT_COMPATIBILITY_CLASS::_128BIT}, + {VK_FORMAT_R32G32B32_SFLOAT, FORMAT_COMPATIBILITY_CLASS::_96BIT}, + {VK_FORMAT_R32G32B32_SINT, FORMAT_COMPATIBILITY_CLASS::_96BIT}, + {VK_FORMAT_R32G32B32_UINT, FORMAT_COMPATIBILITY_CLASS::_96BIT}, + {VK_FORMAT_R32G32_SFLOAT, FORMAT_COMPATIBILITY_CLASS::_64BIT}, + {VK_FORMAT_R32G32_SINT, FORMAT_COMPATIBILITY_CLASS::_64BIT}, + {VK_FORMAT_R32G32_UINT, FORMAT_COMPATIBILITY_CLASS::_64BIT}, + {VK_FORMAT_R32_SFLOAT, FORMAT_COMPATIBILITY_CLASS::_32BIT}, + {VK_FORMAT_R32_SINT, FORMAT_COMPATIBILITY_CLASS::_32BIT}, + {VK_FORMAT_R32_UINT, FORMAT_COMPATIBILITY_CLASS::_32BIT}, + {VK_FORMAT_R4G4B4A4_UNORM_PACK16, FORMAT_COMPATIBILITY_CLASS::_16BIT}, + {VK_FORMAT_R4G4_UNORM_PACK8, FORMAT_COMPATIBILITY_CLASS::_8BIT}, + {VK_FORMAT_R5G5B5A1_UNORM_PACK16, FORMAT_COMPATIBILITY_CLASS::_16BIT}, + {VK_FORMAT_R5G6B5_UNORM_PACK16, FORMAT_COMPATIBILITY_CLASS::_16BIT}, + {VK_FORMAT_R64G64B64A64_SFLOAT, FORMAT_COMPATIBILITY_CLASS::_256BIT}, + {VK_FORMAT_R64G64B64A64_SINT, FORMAT_COMPATIBILITY_CLASS::_256BIT}, + {VK_FORMAT_R64G64B64A64_UINT, FORMAT_COMPATIBILITY_CLASS::_256BIT}, + {VK_FORMAT_R64G64B64_SFLOAT, FORMAT_COMPATIBILITY_CLASS::_192BIT}, + {VK_FORMAT_R64G64B64_SINT, FORMAT_COMPATIBILITY_CLASS::_192BIT}, + {VK_FORMAT_R64G64B64_UINT, FORMAT_COMPATIBILITY_CLASS::_192BIT}, + {VK_FORMAT_R64G64_SFLOAT, FORMAT_COMPATIBILITY_CLASS::_128BIT}, + {VK_FORMAT_R64G64_SINT, FORMAT_COMPATIBILITY_CLASS::_128BIT}, + {VK_FORMAT_R64G64_UINT, FORMAT_COMPATIBILITY_CLASS::_128BIT}, + {VK_FORMAT_R64_SFLOAT, FORMAT_COMPATIBILITY_CLASS::_64BIT}, + {VK_FORMAT_R64_SINT, FORMAT_COMPATIBILITY_CLASS::_64BIT}, + {VK_FORMAT_R64_UINT, FORMAT_COMPATIBILITY_CLASS::_64BIT}, + {VK_FORMAT_R8G8B8A8_SINT, FORMAT_COMPATIBILITY_CLASS::_32BIT}, + {VK_FORMAT_R8G8B8A8_SNORM, FORMAT_COMPATIBILITY_CLASS::_32BIT}, + {VK_FORMAT_R8G8B8A8_SRGB, FORMAT_COMPATIBILITY_CLASS::_32BIT}, + {VK_FORMAT_R8G8B8A8_SSCALED, FORMAT_COMPATIBILITY_CLASS::_32BIT}, + {VK_FORMAT_R8G8B8A8_UINT, FORMAT_COMPATIBILITY_CLASS::_32BIT}, + {VK_FORMAT_R8G8B8A8_UNORM, FORMAT_COMPATIBILITY_CLASS::_32BIT}, + {VK_FORMAT_R8G8B8A8_USCALED, FORMAT_COMPATIBILITY_CLASS::_32BIT}, + {VK_FORMAT_R8G8B8_SINT, FORMAT_COMPATIBILITY_CLASS::_24BIT}, + {VK_FORMAT_R8G8B8_SNORM, FORMAT_COMPATIBILITY_CLASS::_24BIT}, + {VK_FORMAT_R8G8B8_SRGB, FORMAT_COMPATIBILITY_CLASS::_24BIT}, + {VK_FORMAT_R8G8B8_SSCALED, FORMAT_COMPATIBILITY_CLASS::_24BIT}, + {VK_FORMAT_R8G8B8_UINT, FORMAT_COMPATIBILITY_CLASS::_24BIT}, + {VK_FORMAT_R8G8B8_UNORM, FORMAT_COMPATIBILITY_CLASS::_24BIT}, + {VK_FORMAT_R8G8B8_USCALED, FORMAT_COMPATIBILITY_CLASS::_24BIT}, + {VK_FORMAT_R8G8_SINT, FORMAT_COMPATIBILITY_CLASS::_16BIT}, + {VK_FORMAT_R8G8_SNORM, FORMAT_COMPATIBILITY_CLASS::_16BIT}, + {VK_FORMAT_R8G8_SRGB, FORMAT_COMPATIBILITY_CLASS::_16BIT}, + {VK_FORMAT_R8G8_SSCALED, FORMAT_COMPATIBILITY_CLASS::_16BIT}, + {VK_FORMAT_R8G8_UINT, FORMAT_COMPATIBILITY_CLASS::_16BIT}, + {VK_FORMAT_R8G8_UNORM, FORMAT_COMPATIBILITY_CLASS::_16BIT}, + {VK_FORMAT_R8G8_USCALED, FORMAT_COMPATIBILITY_CLASS::_16BIT}, + {VK_FORMAT_R8_SINT, FORMAT_COMPATIBILITY_CLASS::_8BIT}, + {VK_FORMAT_R8_SNORM, FORMAT_COMPATIBILITY_CLASS::_8BIT}, + {VK_FORMAT_R8_SRGB, FORMAT_COMPATIBILITY_CLASS::_8BIT}, + {VK_FORMAT_R8_SSCALED, FORMAT_COMPATIBILITY_CLASS::_8BIT}, + {VK_FORMAT_R8_UINT, FORMAT_COMPATIBILITY_CLASS::_8BIT}, + {VK_FORMAT_R8_UNORM, FORMAT_COMPATIBILITY_CLASS::_8BIT}, + {VK_FORMAT_R8_USCALED, FORMAT_COMPATIBILITY_CLASS::_8BIT}, + {VK_FORMAT_S8_UINT, FORMAT_COMPATIBILITY_CLASS::S8}, + {VK_FORMAT_X8_D24_UNORM_PACK32, FORMAT_COMPATIBILITY_CLASS::D24}, + {VK_FORMAT_UNDEFINED, FORMAT_COMPATIBILITY_CLASS::NONE}, +}; + +/** + * @return If the two formats are compatible according to Vulkan's format compatibility rules + * @url + * https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#formats-compatibility + */ +static bool IsVulkanFormatCompatible(VkFormat lhs, VkFormat rhs) { + if (lhs == rhs) + return true; + return vkFormatClassTable.at(lhs) == vkFormatClassTable.at(rhs); +} +} // namespace VideoCore diff --git a/src/video_core/texture_cache/image.cpp b/src/video_core/texture_cache/image.cpp index 0d20eaea..13ea7ce9 100644 --- a/src/video_core/texture_cache/image.cpp +++ b/src/video_core/texture_cache/image.cpp @@ -242,6 +242,74 @@ void Image::Upload(vk::Buffer buffer, u64 offset) { vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eTransferRead); } +void Image::CopyImage(const Image& image) { + scheduler->EndRendering(); + Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits::eTransferWrite); + + auto cmdbuf = scheduler->CommandBuffer(); + + boost::container::small_vector image_copy{}; + for (u32 m = 0; m < image.info.resources.levels; ++m) { + const auto mip_w = std::max(info.size.width >> m, 1u); + const auto mip_h = std::max(info.size.height >> m, 1u); + const auto mip_d = std::max(info.size.depth >> m, 1u); + + image_copy.emplace_back(vk::ImageCopy{ + .srcSubresource{ + .aspectMask = image.aspect_mask, + .mipLevel = m, + .baseArrayLayer = 0, + .layerCount = image.info.resources.layers, + }, + .dstSubresource{ + .aspectMask = image.aspect_mask, + .mipLevel = m, + .baseArrayLayer = 0, + .layerCount = image.info.resources.layers, + }, + .extent = {mip_w, mip_h, mip_d}, + }); + } + cmdbuf.copyImage(image.image, image.layout, this->image, this->layout, image_copy); + + Transit(vk::ImageLayout::eGeneral, + vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eTransferRead); +} + +void Image::CopyMip(const Image& image, u32 mip) { + scheduler->EndRendering(); + Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits::eTransferWrite); + + auto cmdbuf = scheduler->CommandBuffer(); + + const auto mip_w = std::max(info.size.width >> mip, 1u); + const auto mip_h = std::max(info.size.height >> mip, 1u); + const auto mip_d = std::max(info.size.depth >> mip, 1u); + + ASSERT(mip_w == image.info.size.width); + ASSERT(mip_h == image.info.size.height); + + const vk::ImageCopy image_copy{ + .srcSubresource{ + .aspectMask = image.aspect_mask, + .mipLevel = 0, + .baseArrayLayer = 0, + .layerCount = image.info.resources.layers, + }, + .dstSubresource{ + .aspectMask = image.aspect_mask, + .mipLevel = mip, + .baseArrayLayer = 0, + .layerCount = info.resources.layers, + }, + .extent = {mip_w, mip_h, mip_d}, + }; + cmdbuf.copyImage(image.image, image.layout, this->image, this->layout, image_copy); + + Transit(vk::ImageLayout::eGeneral, + vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eTransferRead); +} + Image::~Image() = default; } // namespace VideoCore diff --git a/src/video_core/texture_cache/image.h b/src/video_core/texture_cache/image.h index 3df8ddb7..f932b25a 100644 --- a/src/video_core/texture_cache/image.h +++ b/src/video_core/texture_cache/image.h @@ -32,6 +32,7 @@ enum ImageFlagBits : u32 { Registered = 1 << 6, ///< True when the image is registered Picked = 1 << 7, ///< Temporary flag to mark the image as picked MetaRegistered = 1 << 8, ///< True when metadata for this surface is known and registered + Deleted = 1 << 9, ///< Indicates that images was marked for deletion once frame is done }; DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits) @@ -95,6 +96,9 @@ struct Image { vk::CommandBuffer cmdbuf = {}); void Upload(vk::Buffer buffer, u64 offset); + void CopyImage(const Image& image); + void CopyMip(const Image& image, u32 mip); + const Vulkan::Instance* instance; Vulkan::Scheduler* scheduler; ImageInfo info; @@ -112,6 +116,7 @@ struct Image { vk::Flags access_mask = vk::AccessFlagBits::eNone; vk::ImageLayout layout = vk::ImageLayout::eUndefined; boost::container::small_vector mip_hashes; + u64 tick_accessed_last{0}; }; } // namespace VideoCore diff --git a/src/video_core/texture_cache/image_info.cpp b/src/video_core/texture_cache/image_info.cpp index 4ac4aee8..bd467168 100644 --- a/src/video_core/texture_cache/image_info.cpp +++ b/src/video_core/texture_cache/image_info.cpp @@ -174,6 +174,7 @@ ImageInfo::ImageInfo(const AmdGpu::Liverpool::ColorBuffer& buffer, const auto color_slice_sz = buffer.GetColorSliceSize(); guest_size_bytes = color_slice_sz * buffer.NumSlices(); mips_layout.emplace_back(color_slice_sz, pitch, 0); + tiling_idx = static_cast(buffer.attrib.tile_mode_index.Value()); } ImageInfo::ImageInfo(const AmdGpu::Liverpool::DepthBuffer& buffer, u32 num_slices, @@ -199,9 +200,19 @@ ImageInfo::ImageInfo(const AmdGpu::Liverpool::DepthBuffer& buffer, u32 num_slice mips_layout.emplace_back(depth_slice_sz, pitch, 0); } -ImageInfo::ImageInfo(const AmdGpu::Image& image) noexcept { +ImageInfo::ImageInfo(const AmdGpu::Image& image, bool force_depth /*= false*/) noexcept { tiling_mode = image.GetTilingMode(); pixel_format = LiverpoolToVK::SurfaceFormat(image.GetDataFmt(), image.GetNumberFmt()); + // Override format if image is forced to be a depth target + if (force_depth || tiling_mode == AmdGpu::TilingMode::Depth_MacroTiled) { + if (pixel_format == vk::Format::eR32Sfloat) { + pixel_format = vk::Format::eD32SfloatS8Uint; + } else if (pixel_format == vk::Format::eR16Sfloat) { + pixel_format = vk::Format::eD16UnormS8Uint; + } else { + UNREACHABLE(); + } + } type = ConvertImageType(image.GetType()); props.is_tiled = image.IsTiled(); props.is_cube = image.GetType() == AmdGpu::ImageType::Cube; @@ -287,4 +298,74 @@ void ImageInfo::UpdateSize() { guest_size_bytes *= resources.layers; } +bool ImageInfo::IsMipOf(const ImageInfo& info) const { + if (!IsCompatible(info)) { + return false; + } + + // Currently we expect only on level to be copied. + if (resources.levels != 1) { + return false; + } + + const int mip = info.resources.levels - resources.levels; + if (mip < 1) { + return false; + } + + const auto mip_w = std::max(info.size.width >> mip, 1u); + const auto mip_h = std::max(info.size.height >> mip, 1u); + if ((size.width != mip_w) || (size.height != mip_h)) { + return false; + } + + const auto mip_d = std::max(info.size.depth >> mip, 1u); + if (info.type == vk::ImageType::e3D && type == vk::ImageType::e2D) { + // In case of 2D array to 3D copy, make sure we have proper number of layers. + if (resources.layers != mip_d) { + return false; + } + } else { + if (type != info.type) { + return false; + } + } + + // Check if the mip has correct size. + if (info.mips_layout.size() <= mip || info.mips_layout[mip].size != guest_size_bytes) { + return false; + } + + return true; +} + +bool ImageInfo::IsSliceOf(const ImageInfo& info) const { + if (!IsCompatible(info)) { + return false; + } + + // Array slices should be of the same type. + if (type != info.type) { + return false; + } + + // 2D dimensions of both images should be the same. + if ((size.width != info.size.width) || (size.height != info.size.height)) { + return false; + } + + // Check for size alignment. + const bool slice_size = info.guest_size_bytes / info.resources.layers; + if (guest_size_bytes % slice_size != 0) { + return false; + } + + // Ensure that address is aligned too. + if (((info.guest_address - guest_address) % guest_size_bytes) != 0) { + return false; + } + + return true; +} + } // namespace VideoCore diff --git a/src/video_core/texture_cache/image_info.h b/src/video_core/texture_cache/image_info.h index ddad318d..ba8985b8 100644 --- a/src/video_core/texture_cache/image_info.h +++ b/src/video_core/texture_cache/image_info.h @@ -3,7 +3,6 @@ #pragma once -#include "common/enum.h" #include "common/types.h" #include "core/libraries/videoout/buffer.h" #include "video_core/amdgpu/liverpool.h" @@ -20,7 +19,7 @@ struct ImageInfo { const AmdGpu::Liverpool::CbDbExtent& hint = {}) noexcept; ImageInfo(const AmdGpu::Liverpool::DepthBuffer& buffer, u32 num_slices, VAddr htile_address, const AmdGpu::Liverpool::CbDbExtent& hint = {}) noexcept; - ImageInfo(const AmdGpu::Image& image) noexcept; + ImageInfo(const AmdGpu::Image& image, bool force_depth = false) noexcept; bool IsTiled() const { return tiling_mode != AmdGpu::TilingMode::Display_Linear; @@ -29,6 +28,15 @@ struct ImageInfo { bool IsPacked() const; bool IsDepthStencil() const; + bool IsMipOf(const ImageInfo& info) const; + bool IsSliceOf(const ImageInfo& info) const; + + /// Verifies if images are compatible for subresource merging. + bool IsCompatible(const ImageInfo& info) const { + return (pixel_format == info.pixel_format && tiling_idx == info.tiling_idx && + num_samples == info.num_samples && num_bits == info.num_bits); + } + void UpdateSize(); struct { diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp index 3354a8ec..90dc7140 100644 --- a/src/video_core/texture_cache/texture_cache.cpp +++ b/src/video_core/texture_cache/texture_cache.cpp @@ -1,18 +1,21 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#include #include #include "common/assert.h" #include "video_core/buffer_cache/buffer_cache.h" #include "video_core/page_manager.h" #include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_scheduler.h" +#include "video_core/texture_cache/host_compatibility.h" #include "video_core/texture_cache/texture_cache.h" #include "video_core/texture_cache/tile_manager.h" namespace VideoCore { static constexpr u64 PageShift = 12; +static constexpr u64 NumFramesBeforeRemoval = 32; TextureCache::TextureCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_, BufferCache& buffer_cache_, PageManager& tracker_) @@ -43,7 +46,7 @@ void TextureCache::InvalidateMemory(VAddr address, size_t size) { // Ensure image is reuploaded when accessed again. image.flags |= ImageFlagBits::CpuModified; // Untrack image, so the range is unprotected and the guest can write freely. - UntrackImage(image, image_id); + UntrackImage(image_id); }); } @@ -53,46 +56,183 @@ void TextureCache::UnmapMemory(VAddr cpu_addr, size_t size) { boost::container::small_vector deleted_images; ForEachImageInRegion(cpu_addr, size, [&](ImageId id, Image&) { deleted_images.push_back(id); }); for (const ImageId id : deleted_images) { - Image& image = slot_images[id]; - if (True(image.flags & ImageFlagBits::Tracked)) { - UntrackImage(image, id); - } // TODO: Download image data back to host. - UnregisterImage(id); - DeleteImage(id); + FreeImage(id); } } +ImageId TextureCache::ResolveDepthOverlap(const ImageInfo& requested_info, ImageId cache_image_id) { + const auto& cache_info = slot_images[cache_image_id].info; + + const bool was_bound_as_texture = + !cache_info.usage.depth_target && (cache_info.usage.texture || cache_info.usage.storage); + if (requested_info.usage.depth_target && was_bound_as_texture) { + auto new_image_id = slot_images.insert(instance, scheduler, requested_info); + RegisterImage(new_image_id); + + // auto& new_image = slot_images[new_image_id]; + // TODO: need to run a helper for depth copy here + + FreeImage(cache_image_id); + return new_image_id; + } + + const bool should_bind_as_texture = + !requested_info.usage.depth_target && + (requested_info.usage.texture || requested_info.usage.storage); + if (cache_info.usage.depth_target && should_bind_as_texture) { + return cache_image_id; + } + + return {}; +} + +ImageId TextureCache::ResolveOverlap(const ImageInfo& image_info, ImageId cache_image_id, + ImageId merged_image_id) { + auto& tex_cache_image = slot_images[cache_image_id]; + + if (image_info.guest_address == tex_cache_image.info.guest_address) { // Equal address + if (image_info.size != tex_cache_image.info.size) { + // Very likely this kind of overlap is caused by allocation from a pool. We can assume + // it is safe to delete the image if it wasn't accessed in some amount of frames. + if (scheduler.CurrentTick() - tex_cache_image.tick_accessed_last > + NumFramesBeforeRemoval) { + + FreeImage(cache_image_id); + } + return merged_image_id; + } + + if (auto depth_image_id = ResolveDepthOverlap(image_info, cache_image_id)) { + return depth_image_id; + } + + if (image_info.pixel_format != tex_cache_image.info.pixel_format || + image_info.size != tex_cache_image.info.size || + image_info.guest_size_bytes <= tex_cache_image.info.guest_size_bytes) { + return merged_image_id ? merged_image_id : cache_image_id; + } + + ImageId new_image_id{}; + if (image_info.type == tex_cache_image.info.type) { + new_image_id = ExpandImage(image_info, cache_image_id); + } else { + UNREACHABLE(); + } + return new_image_id; + } + + // Right overlap, the image requested is a possible subresource of the image from cache. + if (image_info.guest_address > tex_cache_image.info.guest_address) { + // Should be handled by view. No additional actions needed. + } else { + // Left overlap, the image from cache is a possible subresource of the image requested + if (!merged_image_id) { + // We need to have a larger, already allocated image to copy this one into + return {}; + } + + if (tex_cache_image.info.IsMipOf(image_info)) { + tex_cache_image.Transit(vk::ImageLayout::eTransferSrcOptimal, + vk::AccessFlagBits::eTransferRead); + + const auto num_mips_to_copy = tex_cache_image.info.resources.levels; + ASSERT(num_mips_to_copy == 1); + + auto& merged_image = slot_images[merged_image_id]; + merged_image.CopyMip(tex_cache_image, image_info.resources.levels - 1); + + FreeImage(cache_image_id); + } + + if (tex_cache_image.info.IsSliceOf(image_info)) { + UNREACHABLE(); + } + } + + return merged_image_id; +} + +ImageId TextureCache::ExpandImage(const ImageInfo& info, ImageId image_id) { + + const auto new_image_id = slot_images.insert(instance, scheduler, info); + RegisterImage(new_image_id); + + auto& src_image = slot_images[image_id]; + auto& new_image = slot_images[new_image_id]; + + src_image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits::eTransferRead); + new_image.CopyImage(src_image); + + FreeImage(image_id); + + TrackImage(new_image_id); + new_image.flags &= ~ImageFlagBits::CpuModified; + return new_image_id; +} + ImageId TextureCache::FindImage(const ImageInfo& info) { if (info.guest_address == 0) [[unlikely]] { return NULL_IMAGE_VIEW_ID; } std::unique_lock lock{mutex}; - boost::container::small_vector image_ids; + boost::container::small_vector image_ids; ForEachImageInRegion( info.guest_address, info.guest_size_bytes, [&](ImageId image_id, Image& image) { - // Address and width must match. - if (image.cpu_addr != info.guest_address || image.info.size.width != info.size.width) { + // Ignore images scheduled for deletion + if (True(image.flags & ImageFlagBits::Deleted)) { return; } - if (info.IsDepthStencil() != image.info.IsDepthStencil() && - info.pixel_format != vk::Format::eR32Sfloat) { + + // Check if image is fully outside of the region + const auto in_image_cpu_addr = info.guest_address; + const auto in_image_cpu_addr_end = info.guest_address + info.guest_size_bytes; + if (in_image_cpu_addr_end <= image.cpu_addr) { return; } + if (in_image_cpu_addr >= image.cpu_addr_end) { + return; + } + image_ids.push_back(image_id); }); - // ASSERT_MSG(image_ids.size() <= 1, "Overlapping images not allowed!"); - ImageId image_id{}; - if (image_ids.empty()) { + + // Check for a perfect match first + for (const auto& cache_id : image_ids) { + auto& cache_image = slot_images[cache_id]; + + if (cache_image.info.guest_address == info.guest_address && + cache_image.info.guest_size_bytes == info.guest_size_bytes && + cache_image.info.size == info.size) { + + ASSERT(cache_image.info.type == info.type); + if (IsVulkanFormatCompatible((VkFormat)info.pixel_format, + (VkFormat)cache_image.info.pixel_format)) { + image_id = cache_id; + } + break; + } + } + + // Try to resolve overlaps (if any) + if (!image_id) { + for (const auto& cache_id : image_ids) { + const auto& merged_info = image_id ? slot_images[image_id].info : info; + image_id = ResolveOverlap(merged_info, cache_id, image_id); + } + } + + // Create and register a new image + if (!image_id) { image_id = slot_images.insert(instance, scheduler, info); RegisterImage(image_id); - } else { - image_id = image_ids[image_ids.size() > 1 ? 1 : 0]; } + slot_images[image_id].tick_accessed_last = scheduler.CurrentTick(); + return image_id; } @@ -135,31 +275,7 @@ ImageView& TextureCache::FindTexture(const ImageInfo& info, const ImageViewInfo& usage.texture = true; } - // These changes are temporary and should be removed once texture cache will handle subresources - // merging - auto view_info_tmp = view_info; - if (view_info_tmp.range.base.level > image.info.resources.levels - 1 || - view_info_tmp.range.base.layer > image.info.resources.layers - 1 || - view_info_tmp.range.extent.levels > image.info.resources.levels || - view_info_tmp.range.extent.layers > image.info.resources.layers) { - - LOG_DEBUG(Render_Vulkan, - "Subresource range ({}~{},{}~{}) exceeds base image extents ({},{})", - view_info_tmp.range.base.level, view_info_tmp.range.extent.levels, - view_info_tmp.range.base.layer, view_info_tmp.range.extent.layers, - image.info.resources.levels, image.info.resources.layers); - - view_info_tmp.range.base.level = - std::min(view_info_tmp.range.base.level, image.info.resources.levels - 1); - view_info_tmp.range.base.layer = - std::min(view_info_tmp.range.base.layer, image.info.resources.layers - 1); - view_info_tmp.range.extent.levels = - std::min(view_info_tmp.range.extent.levels, image.info.resources.levels); - view_info_tmp.range.extent.layers = - std::min(view_info_tmp.range.extent.layers, image.info.resources.layers); - } - - return RegisterImageView(image_id, view_info_tmp); + return RegisterImageView(image_id, view_info); } ImageView& TextureCache::FindRenderTarget(const ImageInfo& image_info, @@ -204,10 +320,18 @@ ImageView& TextureCache::FindDepthTarget(const ImageInfo& image_info, Image& image = slot_images[image_id]; image.flags |= ImageFlagBits::GpuModified; image.flags &= ~ImageFlagBits::CpuModified; - image.aspect_mask = vk::ImageAspectFlagBits::eDepth | vk::ImageAspectFlagBits::eStencil; + image.aspect_mask = vk::ImageAspectFlagBits::eDepth; - const auto new_layout = view_info.is_storage ? vk::ImageLayout::eDepthStencilAttachmentOptimal - : vk::ImageLayout::eDepthStencilReadOnlyOptimal; + const bool has_stencil = image_info.usage.stencil; + if (has_stencil) { + image.aspect_mask |= vk::ImageAspectFlagBits::eStencil; + } + + const auto new_layout = view_info.is_storage + ? has_stencil ? vk::ImageLayout::eDepthStencilAttachmentOptimal + : vk::ImageLayout::eDepthAttachmentOptimal + : has_stencil ? vk::ImageLayout::eDepthStencilReadOnlyOptimal + : vk::ImageLayout::eDepthReadOnlyOptimal; image.Transit(new_layout, vk::AccessFlagBits::eDepthStencilAttachmentWrite | vk::AccessFlagBits::eDepthStencilAttachmentRead); @@ -224,6 +348,7 @@ ImageView& TextureCache::FindDepthTarget(const ImageInfo& image_info, // Update tracked image usage image.info.usage.depth_target = true; + image.info.usage.stencil = has_stencil; return RegisterImageView(image_id, view_info); } @@ -260,7 +385,7 @@ void TextureCache::RefreshImage(Image& image, Vulkan::Scheduler* custom_schedule .bufferRowLength = static_cast(mip_pitch), .bufferImageHeight = static_cast(mip_height), .imageSubresource{ - .aspectMask = vk::ImageAspectFlagBits::eColor, + .aspectMask = image.aspect_mask & ~vk::ImageAspectFlagBits::eStencil, .mipLevel = m, .baseArrayLayer = 0, .layerCount = num_layers, @@ -290,6 +415,17 @@ void TextureCache::RefreshImage(Image& image, Vulkan::Scheduler* custom_schedule const auto [vk_buffer, buf_offset] = buffer_cache.ObtainTempBuffer(image_addr, image_size); buffer = vk_buffer->Handle(); offset = buf_offset; + + // The obtained buffer may be written by a shader so we need to emit a barrier to prevent + // RAW hazard + if (auto barrier = vk_buffer->GetBarrier(vk::AccessFlagBits2::eTransferRead, + vk::PipelineStageFlagBits2::eTransfer)) { + auto dependencies = vk::DependencyInfo{ + .bufferMemoryBarrierCount = 1, + .pBufferMemoryBarriers = &barrier.value(), + }; + cmdbuf.pipelineBarrier2(dependencies); + } } for (auto& copy : image_copy) { @@ -335,7 +471,8 @@ void TextureCache::UnregisterImage(ImageId image_id) { }); } -void TextureCache::TrackImage(Image& image, ImageId image_id) { +void TextureCache::TrackImage(ImageId image_id) { + auto& image = slot_images[image_id]; if (True(image.flags & ImageFlagBits::Tracked)) { return; } @@ -343,7 +480,8 @@ void TextureCache::TrackImage(Image& image, ImageId image_id) { tracker.UpdatePagesCachedCount(image.cpu_addr, image.info.guest_size_bytes, 1); } -void TextureCache::UntrackImage(Image& image, ImageId image_id) { +void TextureCache::UntrackImage(ImageId image_id) { + auto& image = slot_images[image_id]; if (False(image.flags & ImageFlagBits::Tracked)) { return; } @@ -356,6 +494,8 @@ void TextureCache::DeleteImage(ImageId image_id) { ASSERT_MSG(False(image.flags & ImageFlagBits::Tracked), "Image was not untracked"); ASSERT_MSG(False(image.flags & ImageFlagBits::Registered), "Image was not unregistered"); + image.flags |= ImageFlagBits::Deleted; + // Remove any registered meta areas. const auto& meta_info = image.info.meta_info; if (meta_info.cmask_addr) { diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 31b1e393..14209396 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -65,9 +65,18 @@ public: return; } RefreshImage(image, custom_scheduler); - TrackImage(image, image_id); + TrackImage(image_id); } + [[nodiscard]] ImageId ResolveOverlap(const ImageInfo& info, ImageId cache_img_id, + ImageId merged_image_id); + + /// Resolves depth overlap and either re-creates the image or returns existing one + [[nodiscard]] ImageId ResolveDepthOverlap(const ImageInfo& requested_info, + ImageId cache_img_id); + + [[nodiscard]] ImageId ExpandImage(const ImageInfo& info, ImageId image_id); + /// Reuploads image contents. void RefreshImage(Image& image, Vulkan::Scheduler* custom_scheduler = nullptr); @@ -167,14 +176,20 @@ private: void UnregisterImage(ImageId image); /// Track CPU reads and writes for image - void TrackImage(Image& image, ImageId image_id); + void TrackImage(ImageId image_id); /// Stop tracking CPU reads and writes for image - void UntrackImage(Image& image, ImageId image_id); + void UntrackImage(ImageId image_id); /// Removes the image and any views/surface metas that reference it. void DeleteImage(ImageId image_id); + void FreeImage(ImageId image_id) { + UntrackImage(image_id); + UnregisterImage(image_id); + DeleteImage(image_id); + } + private: const Vulkan::Instance& instance; Vulkan::Scheduler& scheduler; diff --git a/src/video_core/texture_cache/tile_manager.cpp b/src/video_core/texture_cache/tile_manager.cpp index 5f3ed0f8..7fe5598d 100644 --- a/src/video_core/texture_cache/tile_manager.cpp +++ b/src/video_core/texture_cache/tile_manager.cpp @@ -254,11 +254,8 @@ struct DetilerParams { u32 sizes[14]; }; -static constexpr size_t StreamBufferSize = 1_GB; - TileManager::TileManager(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler) - : instance{instance}, scheduler{scheduler}, - stream_buffer{instance, scheduler, MemoryUsage::Upload, StreamBufferSize} { + : instance{instance}, scheduler{scheduler} { static const std::array detiler_shaders{ HostShaders::DETILE_M8X1_COMP, HostShaders::DETILE_M8X2_COMP, HostShaders::DETILE_M32X1_COMP, HostShaders::DETILE_M32X2_COMP, @@ -397,11 +394,6 @@ std::optional TileManager::TryDetile(Image& image) { // Prepare input buffer const u32 image_size = image.info.guest_size_bytes; const auto [in_buffer, in_offset] = [&] -> std::pair { - // Use stream buffer for smaller textures. - if (image_size <= stream_buffer.GetFreeSize()) { - u32 offset = stream_buffer.Copy(image.info.guest_address, image_size); - return {stream_buffer.Handle(), offset}; - } // Request temporary host buffer for larger sizes. auto in_buffer = AllocBuffer(image_size); const auto addr = reinterpret_cast(image.info.guest_address); diff --git a/src/video_core/texture_cache/tile_manager.h b/src/video_core/texture_cache/tile_manager.h index 00765b1f..0baabf98 100644 --- a/src/video_core/texture_cache/tile_manager.h +++ b/src/video_core/texture_cache/tile_manager.h @@ -51,7 +51,6 @@ private: private: const Vulkan::Instance& instance; Vulkan::Scheduler& scheduler; - StreamBuffer stream_buffer; std::array detilers; }; diff --git a/src/video_core/texture_cache/types.h b/src/video_core/texture_cache/types.h index 45ffe251..bcef1935 100644 --- a/src/video_core/texture_cache/types.h +++ b/src/video_core/texture_cache/types.h @@ -36,6 +36,8 @@ struct Extent3D { u32 width; u32 height; u32 depth; + + auto operator<=>(const Extent3D&) const = default; }; struct SubresourceLayers {