From 2c87171b95ee68d82f5efdf7563af5b233caad5f Mon Sep 17 00:00:00 2001 From: psucien Date: Wed, 5 Jun 2024 13:10:48 +0200 Subject: [PATCH] texture_cache: a support for m8x1 and m8x4 layouts added to the detiler --- src/video_core/texture_cache/image.cpp | 14 +- src/video_core/texture_cache/image.h | 2 + src/video_core/texture_cache/image_view.cpp | 10 +- src/video_core/texture_cache/image_view.h | 7 +- .../texture_cache/texture_cache.cpp | 121 ++++++------ src/video_core/texture_cache/texture_cache.h | 17 +- src/video_core/texture_cache/tile_manager.cpp | 184 +++++++++++++++++- src/video_core/texture_cache/tile_manager.h | 38 ++++ 8 files changed, 326 insertions(+), 67 deletions(-) diff --git a/src/video_core/texture_cache/image.cpp b/src/video_core/texture_cache/image.cpp index bc8cb198..14f824d0 100644 --- a/src/video_core/texture_cache/image.cpp +++ b/src/video_core/texture_cache/image.cpp @@ -151,6 +151,18 @@ Image::Image(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_, if (info.type == vk::ImageType::e3D) { flags |= vk::ImageCreateFlagBits::e2DArrayCompatible; } + if (info.is_tiled) { + flags |= vk::ImageCreateFlagBits::eExtendedUsage; + if (false) { // IsBlockCodedFormat() + flags |= vk::ImageCreateFlagBits::eBlockTexelViewCompatible; + } + } + + info.usage = ImageUsageFlags(info.pixel_format); + if (info.is_tiled || info.is_storage) { + info.usage |= vk::ImageUsageFlagBits::eStorage; + } + const vk::ImageCreateInfo image_ci = { .flags = flags, .imageType = info.type, @@ -163,7 +175,7 @@ Image::Image(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_, .mipLevels = static_cast(info.resources.levels), .arrayLayers = static_cast(info.resources.layers), .tiling = vk::ImageTiling::eOptimal, - .usage = ImageUsageFlags(info.pixel_format), + .usage = info.usage, .initialLayout = vk::ImageLayout::eUndefined, }; diff --git a/src/video_core/texture_cache/image.h b/src/video_core/texture_cache/image.h index 5d9a1547..f2de6abb 100644 --- a/src/video_core/texture_cache/image.h +++ b/src/video_core/texture_cache/image.h @@ -39,8 +39,10 @@ struct ImageInfo { explicit ImageInfo(const AmdGpu::Image& image) noexcept; bool is_tiled = false; + bool is_storage = false; vk::Format pixel_format = vk::Format::eUndefined; vk::ImageType type = vk::ImageType::e1D; + vk::ImageUsageFlags usage; SubresourceExtent resources; Extent3D size{1, 1, 1}; u32 pitch = 0; diff --git a/src/video_core/texture_cache/image_view.cpp b/src/video_core/texture_cache/image_view.cpp index 353e4e7f..919415e8 100644 --- a/src/video_core/texture_cache/image_view.cpp +++ b/src/video_core/texture_cache/image_view.cpp @@ -58,10 +58,16 @@ ImageViewInfo::ImageViewInfo(const AmdGpu::Image& image) noexcept { mapping.a = ConvertComponentSwizzle(image.dst_sel_w); } -ImageView::ImageView(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler, - const ImageViewInfo& info_, vk::Image image) +ImageView::ImageView(const Vulkan::Instance& instance, const ImageViewInfo& info_, vk::Image image, + std::optional usage_override /*= {}*/) : info{info_} { + vk::ImageViewUsageCreateInfo usage_ci{}; + if (usage_override) { + usage_ci.usage = usage_override.value(); + } + const vk::ImageViewCreateInfo image_view_ci = { + .pNext = usage_override.has_value() ? &usage_ci : nullptr, .image = image, .viewType = info.type, .format = info.format, diff --git a/src/video_core/texture_cache/image_view.h b/src/video_core/texture_cache/image_view.h index 7f98e8ec..aa4ec8ee 100644 --- a/src/video_core/texture_cache/image_view.h +++ b/src/video_core/texture_cache/image_view.h @@ -7,6 +7,8 @@ #include "video_core/renderer_vulkan/vk_common.h" #include "video_core/texture_cache/types.h" +#include + namespace Vulkan { class Instance; class Scheduler; @@ -22,13 +24,14 @@ struct ImageViewInfo { vk::Format format = vk::Format::eR8G8B8A8Unorm; SubresourceRange range; vk::ComponentMapping mapping{}; + bool used_for_detiling = false; auto operator<=>(const ImageViewInfo&) const = default; }; struct ImageView { - explicit ImageView(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler, - const ImageViewInfo& info, vk::Image image); + explicit ImageView(const Vulkan::Instance& instance, const ImageViewInfo& info, vk::Image image, + std::optional usage_override = {}); ~ImageView(); ImageView(const ImageView&) = delete; diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp index 3e2a7dea..93791d46 100644 --- a/src/video_core/texture_cache/texture_cache.cpp +++ b/src/video_core/texture_cache/texture_cache.cpp @@ -5,6 +5,7 @@ #include "common/assert.h" #include "common/config.h" #include "core/virtual_memory.h" +#include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/texture_cache/texture_cache.h" #include "video_core/texture_cache/tile_manager.h" @@ -63,8 +64,10 @@ static constexpr u64 PageShift = 12; TextureCache::TextureCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_) : instance{instance_}, scheduler{scheduler_}, - staging{instance, scheduler, vk::BufferUsageFlagBits::eTransferSrc, StreamBufferSize, - Vulkan::BufferType::Upload} { + staging{instance, scheduler, + vk::BufferUsageFlagBits::eTransferSrc | vk::BufferUsageFlagBits::eStorageBuffer, + StreamBufferSize, Vulkan::BufferType::Upload}, + tile_manager{instance, scheduler, *this, staging} { #ifndef _WIN64 sigset_t signal_mask; @@ -91,7 +94,7 @@ TextureCache::TextureCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& ASSERT(null_id.index == 0); ImageViewInfo view_info; - void(slot_image_views.insert(instance, scheduler, view_info, slot_images[null_id].image)); + void(slot_image_views.insert(instance, view_info, slot_images[null_id].image)); } TextureCache::~TextureCache() { @@ -138,21 +141,41 @@ Image& TextureCache::FindImage(const ImageInfo& info, VAddr cpu_address) { return image; } -ImageView& TextureCache::FindImageView(const AmdGpu::Image& desc) { - Image& image = FindImage(ImageInfo{desc}, desc.Address()); - - const ImageViewInfo view_info{desc}; +ImageView& TextureCache::RegisterImageView(Image& image, const ImageViewInfo& view_info) { if (const ImageViewId view_id = image.FindView(view_info); view_id) { return slot_image_views[view_id]; } + // All tiled images are created with storage usage flag. This makes set of formats (e.g. sRGB) + // impossible to use. However, during view creation, if an image isn't used as storage and not a + // target for the detiler, we can temporary remove its storage bit. + std::optional usage_override; + if (!image.info.is_storage && !view_info.used_for_detiling) { + usage_override = image.info.usage & ~vk::ImageUsageFlagBits::eStorage; + } + const ImageViewId view_id = - slot_image_views.insert(instance, scheduler, view_info, image.image); + slot_image_views.insert(instance, view_info, image.image, usage_override); image.image_view_infos.emplace_back(view_info); image.image_view_ids.emplace_back(view_id); return slot_image_views[view_id]; } +ImageView& TextureCache::FindImageView(const AmdGpu::Image& desc) { + Image& image = FindImage(ImageInfo{desc}, desc.Address()); + + const ImageViewInfo view_info{desc}; + return RegisterImageView(image, view_info); +} + +ImageView& TextureCache::GetImageViewForDetiler(Image& image) { + ImageViewInfo view_info; + view_info.format = DemoteImageFormatForDetiling(image.info.pixel_format); + view_info.used_for_detiling = true; + + return RegisterImageView(image, view_info); +} + ImageView& TextureCache::RenderTarget(const AmdGpu::Liverpool::ColorBuffer& buffer, const AmdGpu::Liverpool::CbDbExtent& hint) { const ImageInfo info{buffer, hint}; @@ -160,15 +183,7 @@ ImageView& TextureCache::RenderTarget(const AmdGpu::Liverpool::ColorBuffer& buff ImageViewInfo view_info; view_info.format = info.pixel_format; - if (const ImageViewId view_id = image.FindView(view_info); view_id) { - return slot_image_views[view_id]; - } - - const ImageViewId view_id = - slot_image_views.insert(instance, scheduler, view_info, image.image); - image.image_view_infos.emplace_back(view_info); - image.image_view_ids.emplace_back(view_id); - return slot_image_views[view_id]; + return RegisterImageView(image, view_info); } void TextureCache::RefreshImage(Image& image) { @@ -176,52 +191,48 @@ void TextureCache::RefreshImage(Image& image) { image.flags &= ~ImageFlagBits::CpuModified; { - - // Upload data to the staging buffer. - const auto [data, offset, _] = staging.Map(image.info.guest_size_bytes, 4); - const u8* image_data = reinterpret_cast(image.cpu_addr); - if (image.info.is_tiled) { - ConvertTileToLinear(data, image_data, image.info.size.width, image.info.size.height, - Config::isNeoMode()); - } else { + if (!tile_manager.TryDetile(image)) { + // Upload data to the staging buffer. + const auto& [data, offset, _] = staging.Map(image.info.guest_size_bytes, 4); + const u8* image_data = reinterpret_cast(image.cpu_addr); std::memcpy(data, image_data, image.info.guest_size_bytes); + staging.Commit(image.info.guest_size_bytes); + + const auto cmdbuf = scheduler.CommandBuffer(); + image.Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits::eTransferWrite); + + // Copy to the image. + const vk::BufferImageCopy image_copy = { + .bufferOffset = offset, + .bufferRowLength = 0, + .bufferImageHeight = 0, + .imageSubresource{ + .aspectMask = vk::ImageAspectFlagBits::eColor, + .mipLevel = 0, + .baseArrayLayer = 0, + .layerCount = 1, + }, + .imageOffset = {0, 0, 0}, + .imageExtent = {image.info.size.width, image.info.size.height, 1}, + }; + + cmdbuf.copyBufferToImage(staging.Handle(), image.image, + vk::ImageLayout::eTransferDstOptimal, image_copy); } - staging.Commit(image.info.guest_size_bytes); - - // Copy to the image. - const vk::BufferImageCopy image_copy = { - .bufferOffset = offset, - .bufferRowLength = 0, - .bufferImageHeight = 0, - .imageSubresource{ - .aspectMask = vk::ImageAspectFlagBits::eColor, - .mipLevel = 0, - .baseArrayLayer = 0, - .layerCount = 1, - }, - .imageOffset = {0, 0, 0}, - .imageExtent = {image.info.size.width, image.info.size.height, 1}, - }; - - const auto cmdbuf = scheduler.CommandBuffer(); - const vk::ImageSubresourceRange range = { - .aspectMask = vk::ImageAspectFlagBits::eColor, - .baseMipLevel = 0, - .levelCount = 1, - .baseArrayLayer = 0, - .layerCount = VK_REMAINING_ARRAY_LAYERS, - }; - - image.Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits::eTransferWrite); - - cmdbuf.copyBufferToImage(staging.Handle(), image.image, - vk::ImageLayout::eTransferDstOptimal, image_copy); image.Transit(vk::ImageLayout::eGeneral, vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eTransferRead); return; } + const vk::ImageSubresourceRange range = { + .aspectMask = vk::ImageAspectFlagBits::eColor, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + }; + const u8* image_data = reinterpret_cast(image.cpu_addr); for (u32 l = 0; l < image.info.resources.layers; l++) { // Upload data to the staging buffer. diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 94c49929..f9384211 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -13,6 +13,7 @@ #include "video_core/texture_cache/image_view.h" #include "video_core/texture_cache/sampler.h" #include "video_core/texture_cache/slot_vector.h" +#include "video_core/texture_cache/tile_manager.h" namespace Core::Libraries::VideoOut { struct BufferAttributeGroup; @@ -36,22 +37,27 @@ public: void OnCpuWrite(VAddr address); /// Retrieves the image handle of the image with the provided attributes and address. - Image& FindImage(const ImageInfo& info, VAddr cpu_address); + [[nodiscard]] Image& FindImage(const ImageInfo& info, VAddr cpu_address); /// Retrieves an image view with the properties of the specified image descriptor. - ImageView& FindImageView(const AmdGpu::Image& image); + [[nodiscard]] ImageView& FindImageView(const AmdGpu::Image& image); + + /// Retrieves an image view with "demoted" pixel format used in detiling + [[nodiscard]] ImageView& GetImageViewForDetiler(Image& image); /// Retrieves the render target with specified properties - ImageView& RenderTarget(const AmdGpu::Liverpool::ColorBuffer& buffer, - const AmdGpu::Liverpool::CbDbExtent& hint); + [[nodiscard]] ImageView& RenderTarget(const AmdGpu::Liverpool::ColorBuffer& buffer, + const AmdGpu::Liverpool::CbDbExtent& hint); /// Reuploads image contents. void RefreshImage(Image& image); /// Retrieves the sampler that matches the provided S# descriptor. - vk::Sampler GetSampler(const AmdGpu::Sampler& sampler); + [[nodiscard]] vk::Sampler GetSampler(const AmdGpu::Sampler& sampler); private: + ImageView& RegisterImageView(Image& image, const ImageViewInfo& view_info); + /// Iterate over all page indices in a range template static void ForEachPage(PAddr addr, size_t size, Func&& func) { @@ -128,6 +134,7 @@ private: const Vulkan::Instance& instance; Vulkan::Scheduler& scheduler; Vulkan::StreamBuffer staging; + TileManager tile_manager; SlotVector slot_images; SlotVector slot_image_views; tsl::robin_map samplers; diff --git a/src/video_core/texture_cache/tile_manager.cpp b/src/video_core/texture_cache/tile_manager.cpp index 7d961921..d33427db 100644 --- a/src/video_core/texture_cache/tile_manager.cpp +++ b/src/video_core/texture_cache/tile_manager.cpp @@ -1,10 +1,15 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later -#include -#include "common/assert.h" +#include "boost/container/static_vector.hpp" +#include "video_core/renderer_vulkan/vk_instance.h" +#include "video_core/renderer_vulkan/vk_scheduler.h" +#include "video_core/texture_cache/image_view.h" +#include "video_core/texture_cache/texture_cache.h" #include "video_core/texture_cache/tile_manager.h" +#include + namespace VideoCore { static u32 IntLog2(u32 i) { @@ -162,4 +167,179 @@ void ConvertTileToLinear(u8* dst, const u8* src, u32 width, u32 height, bool is_ } } +vk::Format DemoteImageFormatForDetiling(vk::Format format) { + switch (format) { + case vk::Format::eB8G8R8A8Srgb: + case vk::Format::eR8G8B8A8Unorm: + return vk::Format::eR8G8B8A8Uint; + case vk::Format::eR8Unorm: + return vk::Format::eR8Uint; + default: + LOG_ERROR(Render_Vulkan, "Unexpected format for demotion {}", vk::to_string(format)); + break; + } + return format; +} + +const DetilerContext* TileManager::GetDetiler(const Image& image) const { + const auto format = DemoteImageFormatForDetiling(image.info.pixel_format); + + if (image.info.tiling_mode == AmdGpu::TilingMode::Texture_MicroTiled) { + switch (format) { + case vk::Format::eR8Uint: + return &detilers[DetilerType::Micro8x1]; + case vk::Format::eR8G8B8A8Uint: + return &detilers[DetilerType::Micro8x4]; + default: + return nullptr; + } + } + return nullptr; +} + +TileManager::TileManager(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler, + TextureCache& texture_cache, Vulkan::StreamBuffer& staging) + : instance{instance}, scheduler{scheduler}, texture_cache{texture_cache}, staging{staging} { + + for (int pl_id = 0; pl_id < DetilerType::Max; ++pl_id) { + auto& ctx = detilers[pl_id]; + + const std::vector shader_code{}; + + const vk::ShaderModuleCreateInfo shader_info = { + .codeSize = shader_code.size(), + .pCode = shader_code.data(), + }; + + vk::UniqueShaderModule module; + try { + module = instance.GetDevice().createShaderModuleUnique(shader_info); + } catch (vk::SystemError& err) { + UNREACHABLE_MSG("{}", err.what()); + } + + const vk::PipelineShaderStageCreateInfo shader_ci = { + .stage = vk::ShaderStageFlagBits::eCompute, + .module = *module, + .pName = "main", + }; + + boost::container::static_vector bindings{ + { + .binding = 0, + .descriptorType = vk::DescriptorType::eStorageBuffer, + .descriptorCount = 1, + .stageFlags = vk::ShaderStageFlagBits::eCompute, + }, + { + .binding = 1, + .descriptorType = vk::DescriptorType::eStorageImage, + .descriptorCount = 1, + .stageFlags = vk::ShaderStageFlagBits::eCompute, + }, + }; + + const vk::DescriptorSetLayoutCreateInfo desc_layout_ci = { + .flags = vk::DescriptorSetLayoutCreateFlagBits::ePushDescriptorKHR, + .bindingCount = static_cast(bindings.size()), + .pBindings = bindings.data(), + }; + static auto desc_layout = + instance.GetDevice().createDescriptorSetLayoutUnique(desc_layout_ci); + + const vk::PushConstantRange push_constants = { + .stageFlags = vk::ShaderStageFlagBits::eCompute, + .offset = 0, + .size = sizeof(u32), + }; + + const vk::DescriptorSetLayout set_layout = *desc_layout; + const vk::PipelineLayoutCreateInfo layout_info = { + .setLayoutCount = 1U, + .pSetLayouts = &set_layout, + .pushConstantRangeCount = 1, + .pPushConstantRanges = &push_constants, + }; + ctx.pl_layout = instance.GetDevice().createPipelineLayoutUnique(layout_info); + + const vk::ComputePipelineCreateInfo compute_pipeline_ci = { + .stage = shader_ci, + .layout = *ctx.pl_layout, + }; + auto result = instance.GetDevice().createComputePipelineUnique( + /*pipeline_cache*/ {}, compute_pipeline_ci); + if (result.result == vk::Result::eSuccess) { + ctx.pl = std::move(result.value); + } else { + UNREACHABLE_MSG("Detiler pipeline creation failed!"); + } + } +} + +TileManager::~TileManager() = default; + +bool TileManager::TryDetile(Image& image) { + if (!image.info.is_tiled) { + return false; + } + + const auto* detiler = GetDetiler(image); + if (!detiler) { + LOG_ERROR(Render_Vulkan, "Unsupported tiled image: {} {}", + vk::to_string(image.info.pixel_format), static_cast(image.info.tiling_mode)); + return false; + } + + const auto& [data, offset, _] = staging.Map(image.info.guest_size_bytes, 4); + const u8* image_data = reinterpret_cast(image.cpu_addr); + std::memcpy(data, image_data, image.info.guest_size_bytes); + staging.Commit(image.info.guest_size_bytes); + + auto cmdbuf = scheduler.CommandBuffer(); + cmdbuf.bindPipeline(vk::PipelineBindPoint::eCompute, *detiler->pl); + + image.Transit(vk::ImageLayout::eGeneral, vk::AccessFlagBits::eShaderWrite); + + const vk::DescriptorBufferInfo input_buffer_info{ + .buffer = staging.Handle(), + .offset = offset, + .range = image.info.guest_size_bytes, + }; + + const auto& demoted_view = texture_cache.GetImageViewForDetiler(image); + const vk::DescriptorImageInfo output_image_info{ + .imageView = *demoted_view.image_view, + .imageLayout = image.layout, + }; + + std::vector set_writes{ + { + .dstSet = VK_NULL_HANDLE, + .dstBinding = 0, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = vk::DescriptorType::eStorageBuffer, + .pBufferInfo = &input_buffer_info, + }, + { + .dstSet = VK_NULL_HANDLE, + .dstBinding = 1, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = vk::DescriptorType::eStorageImage, + .pImageInfo = &output_image_info, + }, + }; + cmdbuf.pushDescriptorSetKHR(vk::PipelineBindPoint::eCompute, *detiler->pl_layout, 0, + set_writes); + + cmdbuf.pushConstants(*detiler->pl_layout, vk::ShaderStageFlagBits::eCompute, 0u, + sizeof(image.info.pitch), &image.info.pitch); + + cmdbuf.dispatch((image.info.size.width * image.info.size.height) / 64, 1, + 1); // round to 64 + + return true; +} + } // namespace VideoCore diff --git a/src/video_core/texture_cache/tile_manager.h b/src/video_core/texture_cache/tile_manager.h index 7903114e..b8c10a5c 100644 --- a/src/video_core/texture_cache/tile_manager.h +++ b/src/video_core/texture_cache/tile_manager.h @@ -4,10 +4,48 @@ #pragma once #include "common/types.h" +#include "video_core/renderer_vulkan/vk_stream_buffer.h" +#include "video_core/texture_cache/image.h" namespace VideoCore { +class TextureCache; + /// Converts tiled texture data to linear format. void ConvertTileToLinear(u8* dst, const u8* src, u32 width, u32 height, bool neo); +/// Converts image format to the one used internally by detiler. +vk::Format DemoteImageFormatForDetiling(vk::Format format); + +enum DetilerType : u32 { + Micro8x1, + Micro8x4, + + Max +}; + +struct DetilerContext { + vk::UniquePipeline pl; + vk::UniquePipelineLayout pl_layout; +}; + +class TileManager { +public: + TileManager(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler, + TextureCache& texture_cache, Vulkan::StreamBuffer& staging); + ~TileManager(); + + bool TryDetile(Image& image); + +private: + const DetilerContext* GetDetiler(const Image& image) const; + +private: + const Vulkan::Instance& instance; + Vulkan::Scheduler& scheduler; + TextureCache& texture_cache; + Vulkan::StreamBuffer& staging; + std::array detilers; +}; + } // namespace VideoCore