texture_cache: slight detilers refactoring (#2036)
Some checks are pending
Build and Release / reuse (push) Waiting to run
Build and Release / clang-format (push) Waiting to run
Build and Release / get-info (push) Waiting to run
Build and Release / windows-sdl (push) Blocked by required conditions
Build and Release / windows-qt (push) Blocked by required conditions
Build and Release / macos-sdl (push) Blocked by required conditions
Build and Release / macos-qt (push) Blocked by required conditions
Build and Release / linux-sdl (push) Blocked by required conditions
Build and Release / linux-qt (push) Blocked by required conditions
Build and Release / pre-release (push) Blocked by required conditions

This commit is contained in:
psucien 2025-01-03 21:42:23 +01:00 committed by GitHub
parent c2be12f009
commit 8e8671323a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
18 changed files with 87 additions and 164 deletions

View file

@ -660,7 +660,7 @@ bool BufferCache::SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr,
FindFlags::NoCreate | FindFlags::RelaxDim | FindFlags::RelaxFmt | FindFlags::RelaxSize;
TextureCache::BaseDesc desc{};
desc.info.guest_address = device_addr;
desc.info.guest_size_bytes = size;
desc.info.guest_size = size;
const ImageId image_id = texture_cache.FindImage(desc, find_flags);
if (!image_id) {
return false;

View file

@ -2,14 +2,14 @@
# SPDX-License-Identifier: GPL-2.0-or-later
set(SHADER_FILES
detile_m8x1.comp
detile_m8x2.comp
detile_m32x1.comp
detile_m32x2.comp
detile_m32x4.comp
detile_macro8x1.comp
detile_macro32x1.comp
detile_macro32x2.comp
detilers/macro_32bpp.comp
detilers/macro_64bpp.comp
detilers/macro_8bpp.comp
detilers/micro_128bpp.comp
detilers/micro_16bpp.comp
detilers/micro_32bpp.comp
detilers/micro_64bpp.comp
detilers/micro_8bpp.comp
fs_tri.vert
post_process.frag
)

View file

@ -427,7 +427,7 @@ bool Presenter::ShowSplash(Frame* frame /*= nullptr*/) {
VideoCore::Extent3D{splash->GetImageInfo().width, splash->GetImageInfo().height, 1};
info.pitch = splash->GetImageInfo().width;
info.guest_address = VAddr(splash->GetImageData().data());
info.guest_size_bytes = splash->GetImageData().size();
info.guest_size = splash->GetImageData().size();
info.mips_layout.emplace_back(splash->GetImageData().size(),
splash->GetImageInfo().width,
splash->GetImageInfo().height, 0);

View file

@ -210,7 +210,7 @@ Image::Image(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_,
Vulkan::SetObjectName(instance->GetDevice(), (vk::Image)image, "Image {}x{}x{} {:#x}:{:#x}",
info.size.width, info.size.height, info.size.depth, info.guest_address,
info.guest_size_bytes);
info.guest_size);
}
boost::container::small_vector<vk::ImageMemoryBarrier2, 32> Image::GetBarriers(

View file

@ -80,7 +80,7 @@ struct Image {
[[nodiscard]] bool Overlaps(VAddr overlap_cpu_addr, size_t overlap_size) const noexcept {
const VAddr overlap_end = overlap_cpu_addr + overlap_size;
const auto image_addr = info.guest_address;
const auto image_end = info.guest_address + info.guest_size_bytes;
const auto image_end = info.guest_address + info.guest_size;
return image_addr < overlap_end && overlap_cpu_addr < image_end;
}

View file

@ -250,15 +250,15 @@ ImageInfo::ImageInfo(const Libraries::VideoOut::BufferAttributeGroup& group,
guest_address = cpu_address;
if (!props.is_tiled) {
guest_size_bytes = pitch * size.height * 4;
guest_size = pitch * size.height * 4;
} else {
if (Config::isNeoMode()) {
guest_size_bytes = pitch * ((size.height + 127) & (~127)) * 4;
guest_size = pitch * ((size.height + 127) & (~127)) * 4;
} else {
guest_size_bytes = pitch * ((size.height + 63) & (~63)) * 4;
guest_size = pitch * ((size.height + 63) & (~63)) * 4;
}
}
mips_layout.emplace_back(guest_size_bytes, pitch, 0);
mips_layout.emplace_back(guest_size, pitch, 0);
}
ImageInfo::ImageInfo(const AmdGpu::Liverpool::ColorBuffer& buffer,
@ -279,7 +279,7 @@ ImageInfo::ImageInfo(const AmdGpu::Liverpool::ColorBuffer& buffer,
guest_address = buffer.Address();
const auto color_slice_sz = buffer.GetColorSliceSize();
guest_size_bytes = color_slice_sz * buffer.NumSlices();
guest_size = color_slice_sz * buffer.NumSlices();
mips_layout.emplace_back(color_slice_sz, pitch, 0);
tiling_idx = static_cast<u32>(buffer.attrib.tile_mode_index.Value());
}
@ -303,7 +303,7 @@ ImageInfo::ImageInfo(const AmdGpu::Liverpool::DepthBuffer& buffer, u32 num_slice
guest_address = buffer.Address();
const auto depth_slice_sz = buffer.GetDepthSliceSize();
guest_size_bytes = depth_slice_sz * num_slices;
guest_size = depth_slice_sz * num_slices;
mips_layout.emplace_back(depth_slice_sz, pitch, 0);
}
@ -339,7 +339,7 @@ ImageInfo::ImageInfo(const AmdGpu::Image& image, const Shader::ImageResource& de
void ImageInfo::UpdateSize() {
mips_layout.clear();
MipInfo mip_info{};
guest_size_bytes = 0;
guest_size = 0;
for (auto mip = 0u; mip < resources.levels; ++mip) {
auto bpp = num_bits;
auto mip_w = pitch >> mip;
@ -392,11 +392,11 @@ void ImageInfo::UpdateSize() {
}
}
mip_info.size *= mip_d;
mip_info.offset = guest_size_bytes;
mip_info.offset = guest_size;
mips_layout.emplace_back(mip_info);
guest_size_bytes += mip_info.size;
guest_size += mip_info.size;
}
guest_size_bytes *= resources.layers;
guest_size *= resources.layers;
}
int ImageInfo::IsMipOf(const ImageInfo& info) const {
@ -468,18 +468,18 @@ int ImageInfo::IsSliceOf(const ImageInfo& info) const {
}
// Check for size alignment.
const bool slice_size = info.guest_size_bytes / info.resources.layers;
if (guest_size_bytes % slice_size != 0) {
const bool slice_size = info.guest_size / info.resources.layers;
if (guest_size % slice_size != 0) {
return -1;
}
// Ensure that address is aligned too.
const auto addr_diff = guest_address - info.guest_address;
if ((addr_diff % guest_size_bytes) != 0) {
if ((addr_diff % guest_size) != 0) {
return -1;
}
return addr_diff / guest_size_bytes;
return addr_diff / guest_size;
}
} // namespace VideoCore

View file

@ -84,7 +84,7 @@ struct ImageInfo {
};
boost::container::small_vector<MipInfo, 14> mips_layout;
VAddr guest_address{0};
u32 guest_size_bytes{0};
u32 guest_size{0};
u32 tiling_idx{0}; // TODO: merge with existing!
VAddr stencil_addr{0};

View file

@ -3,7 +3,9 @@
#include <optional>
#include <xxhash.h>
#include "common/assert.h"
#include "common/debug.h"
#include "video_core/buffer_cache/buffer_cache.h"
#include "video_core/page_manager.h"
#include "video_core/renderer_vulkan/vk_instance.h"
@ -34,7 +36,7 @@ TextureCache::TextureCache(const Vulkan::Instance& instance_, Vulkan::Scheduler&
Vulkan::SetObjectName(instance.GetDevice(), null_image, "Null Image");
img.flags = ImageFlagBits::Empty;
img.track_addr = img.info.guest_address;
img.track_addr_end = img.info.guest_address + img.info.guest_size_bytes;
img.track_addr_end = img.info.guest_address + img.info.guest_size;
ImageViewInfo view_info;
const auto null_view_id =
@ -50,7 +52,7 @@ void TextureCache::MarkAsMaybeDirty(ImageId image_id, Image& image) {
if (image.hash == 0) {
// Initialize hash
const u8* addr = std::bit_cast<u8*>(image.info.guest_address);
image.hash = XXH3_64bits(addr, image.info.guest_size_bytes);
image.hash = XXH3_64bits(addr, image.info.guest_size);
}
image.flags |= ImageFlagBits::MaybeCpuDirty;
UntrackImage(image_id);
@ -63,7 +65,7 @@ void TextureCache::InvalidateMemory(VAddr addr, size_t size) {
const auto pages_end = PageManager::GetNextPageAddr(addr + size - 1);
ForEachImageInRegion(pages_start, pages_end - pages_start, [&](ImageId image_id, Image& image) {
const auto image_begin = image.info.guest_address;
const auto image_end = image.info.guest_address + image.info.guest_size_bytes;
const auto image_end = image.info.guest_address + image.info.guest_size;
if (image_begin < end && addr < image_end) {
// Start or end of the modified region is in the image, or the image is entirely within
// the modified region, so the image was definitely accessed by this page fault.
@ -201,7 +203,7 @@ std::tuple<ImageId, int, int> TextureCache::ResolveOverlap(const ImageInfo& imag
}
if (image_info.pixel_format != tex_cache_image.info.pixel_format ||
image_info.guest_size_bytes <= tex_cache_image.info.guest_size_bytes) {
image_info.guest_size <= tex_cache_image.info.guest_size) {
auto result_id = merged_image_id ? merged_image_id : cache_image_id;
const auto& result_image = slot_images[result_id];
return {
@ -302,7 +304,7 @@ ImageId TextureCache::FindImage(BaseDesc& desc, FindFlags flags) {
std::scoped_lock lock{mutex};
boost::container::small_vector<ImageId, 8> image_ids;
ForEachImageInRegion(info.guest_address, info.guest_size_bytes,
ForEachImageInRegion(info.guest_address, info.guest_size,
[&](ImageId image_id, Image& image) { image_ids.push_back(image_id); });
ImageId image_id{};
@ -313,8 +315,7 @@ ImageId TextureCache::FindImage(BaseDesc& desc, FindFlags flags) {
if (cache_image.info.guest_address != info.guest_address) {
continue;
}
if (False(flags & FindFlags::RelaxSize) &&
cache_image.info.guest_size_bytes != info.guest_size_bytes) {
if (False(flags & FindFlags::RelaxSize) && cache_image.info.guest_size != info.guest_size) {
continue;
}
if (False(flags & FindFlags::RelaxDim) && cache_image.info.size != info.size) {
@ -455,7 +456,7 @@ ImageView& TextureCache::FindDepthTarget(BaseDesc& desc) {
if (!stencil_id) {
ImageInfo info{};
info.guest_address = desc.info.stencil_addr;
info.guest_size_bytes = desc.info.stencil_size;
info.guest_size = desc.info.stencil_size;
info.size = desc.info.size;
stencil_id = slot_images.insert(instance, scheduler, info);
RegisterImage(stencil_id);
@ -468,6 +469,9 @@ ImageView& TextureCache::FindDepthTarget(BaseDesc& desc) {
}
void TextureCache::RefreshImage(Image& image, Vulkan::Scheduler* custom_scheduler /*= nullptr*/) {
RENDERER_TRACE;
TRACE_HINT(fmt::format("{:x}:{:x}", image.info.guest_address, image.info.guest_size));
if (False(image.flags & ImageFlagBits::Dirty)) {
return;
}
@ -543,7 +547,7 @@ void TextureCache::RefreshImage(Image& image, Vulkan::Scheduler* custom_schedule
const auto cmdbuf = sched_ptr->CommandBuffer();
const VAddr image_addr = image.info.guest_address;
const size_t image_size = image.info.guest_size_bytes;
const size_t image_size = image.info.guest_size;
const auto [vk_buffer, buf_offset] =
buffer_cache.ObtainViewBuffer(image_addr, image_size, is_gpu_dirty);
@ -612,7 +616,7 @@ void TextureCache::RegisterImage(ImageId image_id) {
ASSERT_MSG(False(image.flags & ImageFlagBits::Registered),
"Trying to register an already registered image");
image.flags |= ImageFlagBits::Registered;
ForEachPage(image.info.guest_address, image.info.guest_size_bytes,
ForEachPage(image.info.guest_address, image.info.guest_size,
[this, image_id](u64 page) { page_table[page].push_back(image_id); });
}
@ -621,7 +625,7 @@ void TextureCache::UnregisterImage(ImageId image_id) {
ASSERT_MSG(True(image.flags & ImageFlagBits::Registered),
"Trying to unregister an already unregistered image");
image.flags &= ~ImageFlagBits::Registered;
ForEachPage(image.info.guest_address, image.info.guest_size_bytes, [this, image_id](u64 page) {
ForEachPage(image.info.guest_address, image.info.guest_size, [this, image_id](u64 page) {
const auto page_it = page_table.find(page);
if (page_it == nullptr) {
UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PageShift);
@ -640,7 +644,7 @@ void TextureCache::UnregisterImage(ImageId image_id) {
void TextureCache::TrackImage(ImageId image_id) {
auto& image = slot_images[image_id];
const auto image_begin = image.info.guest_address;
const auto image_end = image.info.guest_address + image.info.guest_size_bytes;
const auto image_end = image.info.guest_address + image.info.guest_size;
if (image_begin == image.track_addr && image_end == image.track_addr_end) {
return;
}
@ -649,7 +653,7 @@ void TextureCache::TrackImage(ImageId image_id) {
// Re-track the whole image
image.track_addr = image_begin;
image.track_addr_end = image_end;
tracker.UpdatePagesCachedCount(image_begin, image.info.guest_size_bytes, 1);
tracker.UpdatePagesCachedCount(image_begin, image.info.guest_size, 1);
} else {
if (image_begin < image.track_addr) {
TrackImageHead(image_id);
@ -674,7 +678,7 @@ void TextureCache::TrackImageHead(ImageId image_id) {
void TextureCache::TrackImageTail(ImageId image_id) {
auto& image = slot_images[image_id];
const auto image_end = image.info.guest_address + image.info.guest_size_bytes;
const auto image_end = image.info.guest_address + image.info.guest_size;
if (image_end == image.track_addr_end) {
return;
}
@ -719,7 +723,7 @@ void TextureCache::UntrackImageHead(ImageId image_id) {
void TextureCache::UntrackImageTail(ImageId image_id) {
auto& image = slot_images[image_id];
const auto image_end = image.info.guest_address + image.info.guest_size_bytes;
const auto image_end = image.info.guest_address + image.info.guest_size;
if (!image.IsTracked() || image.track_addr_end < image_end) {
return;
}

View file

@ -8,128 +8,47 @@
#include "video_core/texture_cache/image_view.h"
#include "video_core/texture_cache/tile_manager.h"
#include "video_core/host_shaders/detile_m32x1_comp.h"
#include "video_core/host_shaders/detile_m32x2_comp.h"
#include "video_core/host_shaders/detile_m32x4_comp.h"
#include "video_core/host_shaders/detile_m8x1_comp.h"
#include "video_core/host_shaders/detile_m8x2_comp.h"
#include "video_core/host_shaders/detile_macro32x1_comp.h"
#include "video_core/host_shaders/detile_macro32x2_comp.h"
#include "video_core/host_shaders/detile_macro8x1_comp.h"
#include "video_core/host_shaders/detilers/macro_32bpp_comp.h"
#include "video_core/host_shaders/detilers/macro_64bpp_comp.h"
#include "video_core/host_shaders/detilers/macro_8bpp_comp.h"
#include "video_core/host_shaders/detilers/micro_128bpp_comp.h"
#include "video_core/host_shaders/detilers/micro_16bpp_comp.h"
#include "video_core/host_shaders/detilers/micro_32bpp_comp.h"
#include "video_core/host_shaders/detilers/micro_64bpp_comp.h"
#include "video_core/host_shaders/detilers/micro_8bpp_comp.h"
#include <boost/container/static_vector.hpp>
// #include <boost/container/static_vector.hpp>
#include <magic_enum/magic_enum.hpp>
#include <vk_mem_alloc.h>
namespace VideoCore {
static vk::Format DemoteImageFormatForDetiling(vk::Format format) {
switch (format) {
case vk::Format::eR8Unorm:
case vk::Format::eR8Snorm:
case vk::Format::eR8Uint:
case vk::Format::eR8Srgb:
return vk::Format::eR8Uint;
case vk::Format::eR8G8Unorm:
case vk::Format::eR8G8Snorm:
case vk::Format::eR8G8Uint:
case vk::Format::eR8G8Srgb:
case vk::Format::eR16Unorm:
case vk::Format::eR16Snorm:
case vk::Format::eR16Uint:
case vk::Format::eR16Sfloat:
case vk::Format::eD16Unorm:
case vk::Format::eR4G4B4A4UnormPack16:
case vk::Format::eR5G5B5A1UnormPack16:
case vk::Format::eB5G5R5A1UnormPack16:
case vk::Format::eB5G6R5UnormPack16:
return vk::Format::eR8G8Uint;
case vk::Format::eR8G8B8A8Unorm:
case vk::Format::eR8G8B8A8Snorm:
case vk::Format::eR8G8B8A8Uint:
case vk::Format::eR8G8B8A8Srgb:
case vk::Format::eB8G8R8A8Unorm:
case vk::Format::eB8G8R8A8Snorm:
case vk::Format::eB8G8R8A8Uint:
case vk::Format::eB8G8R8A8Srgb:
case vk::Format::eR16G16Unorm:
case vk::Format::eR16G16Snorm:
case vk::Format::eR16G16Uint:
case vk::Format::eR16G16Sfloat:
case vk::Format::eR32Uint:
case vk::Format::eR32Sfloat:
case vk::Format::eD32Sfloat:
case vk::Format::eA2B10G10R10UnormPack32:
case vk::Format::eA2B10G10R10SnormPack32:
case vk::Format::eA2B10G10R10UintPack32:
case vk::Format::eB10G11R11UfloatPack32:
case vk::Format::eE5B9G9R9UfloatPack32:
return vk::Format::eR32Uint;
case vk::Format::eR16G16B16A16Unorm:
case vk::Format::eR16G16B16A16Snorm:
case vk::Format::eR16G16B16A16Uint:
case vk::Format::eR16G16B16A16Sfloat:
case vk::Format::eR32G32Uint:
case vk::Format::eR32G32Sfloat:
case vk::Format::eBc1RgbaUnormBlock:
case vk::Format::eBc1RgbaSrgbBlock:
case vk::Format::eBc4UnormBlock:
case vk::Format::eBc4SnormBlock:
return vk::Format::eR32G32Uint;
case vk::Format::eR32G32B32A32Uint:
case vk::Format::eR32G32B32A32Sfloat:
case vk::Format::eBc2UnormBlock:
case vk::Format::eBc2SrgbBlock:
case vk::Format::eBc3UnormBlock:
case vk::Format::eBc3SrgbBlock:
case vk::Format::eBc5UnormBlock:
case vk::Format::eBc5SnormBlock:
case vk::Format::eBc6HUfloatBlock:
case vk::Format::eBc6HSfloatBlock:
case vk::Format::eBc7UnormBlock:
case vk::Format::eBc7SrgbBlock:
return vk::Format::eR32G32B32A32Uint;
default:
break;
}
// Log missing formats only once to avoid spamming the log.
static constexpr size_t MaxFormatIndex = 256;
static std::array<bool, MaxFormatIndex> logged_formats{};
if (const u32 index = u32(format); !logged_formats[index]) {
LOG_ERROR(Render_Vulkan, "Unexpected format for demotion {}", vk::to_string(format));
logged_formats[index] = true;
}
return format;
}
const DetilerContext* TileManager::GetDetiler(const ImageInfo& info) const {
const auto format = DemoteImageFormatForDetiling(info.pixel_format);
const auto bpp = info.num_bits * (info.props.is_block ? 16 : 1);
switch (info.tiling_mode) {
case AmdGpu::TilingMode::Texture_MicroTiled:
switch (format) {
case vk::Format::eR8Uint:
return &detilers[DetilerType::Micro8x1];
case vk::Format::eR8G8Uint:
return &detilers[DetilerType::Micro8x2];
case vk::Format::eR32Uint:
return &detilers[DetilerType::Micro32x1];
case vk::Format::eR32G32Uint:
return &detilers[DetilerType::Micro32x2];
case vk::Format::eR32G32B32A32Uint:
return &detilers[DetilerType::Micro32x4];
switch (bpp) {
case 8:
return &detilers[DetilerType::Micro8];
case 16:
return &detilers[DetilerType::Micro16];
case 32:
return &detilers[DetilerType::Micro32];
case 64:
return &detilers[DetilerType::Micro64];
case 128:
return &detilers[DetilerType::Micro128];
default:
return nullptr;
}
case AmdGpu::TilingMode::Texture_Volume:
switch (format) {
case vk::Format::eR8Uint:
return &detilers[DetilerType::Macro8x1];
case vk::Format::eR32Uint:
return &detilers[DetilerType::Macro32x1];
case vk::Format::eR32G32Uint:
return &detilers[DetilerType::Macro32x2];
switch (bpp) {
case 8:
return &detilers[DetilerType::Macro8];
case 32:
return &detilers[DetilerType::Macro32];
case 64:
return &detilers[DetilerType::Macro64];
default:
return nullptr;
}
@ -149,10 +68,10 @@ struct DetilerParams {
TileManager::TileManager(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler)
: instance{instance}, scheduler{scheduler} {
static const std::array detiler_shaders{
HostShaders::DETILE_M8X1_COMP, HostShaders::DETILE_M8X2_COMP,
HostShaders::DETILE_M32X1_COMP, HostShaders::DETILE_M32X2_COMP,
HostShaders::DETILE_M32X4_COMP, HostShaders::DETILE_MACRO8X1_COMP,
HostShaders::DETILE_MACRO32X1_COMP, HostShaders::DETILE_MACRO32X2_COMP,
HostShaders::MICRO_8BPP_COMP, HostShaders::MICRO_16BPP_COMP,
HostShaders::MICRO_32BPP_COMP, HostShaders::MICRO_64BPP_COMP,
HostShaders::MICRO_128BPP_COMP, HostShaders::MACRO_8BPP_COMP,
HostShaders::MACRO_32BPP_COMP, HostShaders::MACRO_64BPP_COMP,
};
boost::container::static_vector<vk::DescriptorSetLayoutBinding, 2> bindings{
@ -293,7 +212,7 @@ std::pair<vk::Buffer, u32> TileManager::TryDetile(vk::Buffer in_buffer, u32 in_o
return {in_buffer, in_offset};
}
const u32 image_size = info.guest_size_bytes;
const u32 image_size = info.guest_size;
// Prepare output buffer
auto out_buffer = AllocBuffer(image_size, true);

View file

@ -12,15 +12,15 @@ class TextureCache;
struct ImageInfo;
enum DetilerType : u32 {
Micro8x1,
Micro8x2,
Micro32x1,
Micro32x2,
Micro32x4,
Micro8,
Micro16,
Micro32,
Micro64,
Micro128,
Macro8x1,
Macro32x1,
Macro32x2,
Macro8,
Macro32,
Macro64,
Max
};