From 394331f2066f25114d057e8bfc240dd63ebac2cb Mon Sep 17 00:00:00 2001 From: psucien <168137814+psucien@users.noreply.github.com> Date: Sun, 12 Jan 2025 19:25:25 +0100 Subject: [PATCH] video_core: detiler: display micro 64bpp (#2137) --- src/video_core/amdgpu/resource.h | 3 + src/video_core/host_shaders/CMakeLists.txt | 1 + .../detilers/display_micro_64bpp.comp | 60 +++++++++++++++++++ src/video_core/texture_cache/image_info.cpp | 1 + .../texture_cache/texture_cache.cpp | 6 +- src/video_core/texture_cache/tile_manager.cpp | 21 +++++-- src/video_core/texture_cache/tile_manager.h | 2 + 7 files changed, 86 insertions(+), 8 deletions(-) create mode 100644 src/video_core/host_shaders/detilers/display_micro_64bpp.comp diff --git a/src/video_core/amdgpu/resource.h b/src/video_core/amdgpu/resource.h index 744aacdc..75b8b2ac 100644 --- a/src/video_core/amdgpu/resource.h +++ b/src/video_core/amdgpu/resource.h @@ -119,6 +119,7 @@ constexpr std::string_view NameOf(ImageType type) { enum class TilingMode : u32 { Depth_MacroTiled = 0u, Display_Linear = 0x8u, + Display_MicroTiled = 0x9u, Display_MacroTiled = 0xAu, Texture_MicroTiled = 0xDu, Texture_MacroTiled = 0xEu, @@ -131,6 +132,8 @@ constexpr std::string_view NameOf(TilingMode type) { return "Depth_MacroTiled"; case TilingMode::Display_Linear: return "Display_Linear"; + case TilingMode::Display_MicroTiled: + return "Display_MicroTiled"; case TilingMode::Display_MacroTiled: return "Display_MacroTiled"; case TilingMode::Texture_MicroTiled: diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt index a9c2964a..e60cca12 100644 --- a/src/video_core/host_shaders/CMakeLists.txt +++ b/src/video_core/host_shaders/CMakeLists.txt @@ -2,6 +2,7 @@ # SPDX-License-Identifier: GPL-2.0-or-later set(SHADER_FILES + detilers/display_micro_64bpp.comp detilers/macro_32bpp.comp detilers/macro_64bpp.comp detilers/macro_8bpp.comp diff --git a/src/video_core/host_shaders/detilers/display_micro_64bpp.comp b/src/video_core/host_shaders/detilers/display_micro_64bpp.comp new file mode 100644 index 00000000..3e048568 --- /dev/null +++ b/src/video_core/host_shaders/detilers/display_micro_64bpp.comp @@ -0,0 +1,60 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#version 450 + +layout (local_size_x = 64, local_size_y = 1, local_size_z = 1) in; + +layout(std430, binding = 0) buffer input_buf { + uint in_data[]; +}; +layout(std430, binding = 1) buffer output_buf { + uint out_data[]; +}; + +layout(push_constant) uniform image_info { + uint num_levels; + uint pitch; + uint height; + uint c0; + uint c1; +} info; + +const uint lut_64bpp[16] = { + 0x05040100, 0x0d0c0908, + 0x07060302, 0x0f0e0b0a, + 0x15141110, 0x1d1c1918, + 0x17161312, 0x1f1e1b1a, + 0x25242120, 0x2d2c2928, + 0x27262322, 0x2f2e2b2a, + 0x35343130, 0x3d3c3938, + 0x37363332, 0x3f3e3b3a, +}; + +#define MICRO_TILE_DIM (8) +#define MICRO_TILE_SZ (512) +#define TEXELS_PER_ELEMENT (1) +#define BPP (64) + +void main() { + uint x = gl_GlobalInvocationID.x % info.pitch; + uint y = (gl_GlobalInvocationID.x / info.pitch) % info.height; + uint z = gl_GlobalInvocationID.x / (info.pitch * info.height); + + uint col = bitfieldExtract(x, 0, 3); + uint row = bitfieldExtract(y, 0, 3); + uint idx_dw = lut_64bpp[(col + row * MICRO_TILE_DIM) >> 2u]; + uint byte_ofs = gl_LocalInvocationID.x & 3u; + uint idx = bitfieldExtract(idx_dw >> (8 * byte_ofs), 0, 8); + + uint slice_offs = z * info.c1 * MICRO_TILE_SZ; + uint tile_row = y / MICRO_TILE_DIM; + uint tile_column = x / MICRO_TILE_DIM; + uint tile_offs = ((tile_row * info.c0) + tile_column) * MICRO_TILE_SZ; + uint offs = slice_offs + tile_offs + ((idx * BPP) / 8u); + + uint p0 = in_data[(offs >> 2) + 0]; + uint p1 = in_data[(offs >> 2) + 1]; + out_data[2 * gl_GlobalInvocationID.x + 0] = p0; + out_data[2 * gl_GlobalInvocationID.x + 1] = p1; +} diff --git a/src/video_core/texture_cache/image_info.cpp b/src/video_core/texture_cache/image_info.cpp index 58c2a8e2..1992f1fb 100644 --- a/src/video_core/texture_cache/image_info.cpp +++ b/src/video_core/texture_cache/image_info.cpp @@ -182,6 +182,7 @@ void ImageInfo::UpdateSize() { case AmdGpu::TilingMode::Texture_Volume: mip_d += (-mip_d) & 3u; [[fallthrough]]; + case AmdGpu::TilingMode::Display_MicroTiled: case AmdGpu::TilingMode::Texture_MicroTiled: { std::tie(mip_info.pitch, mip_info.size) = ImageSizeMicroTiled(mip_w, mip_h, bpp, num_samples); diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp index bef083d1..a281b89c 100644 --- a/src/video_core/texture_cache/texture_cache.cpp +++ b/src/video_core/texture_cache/texture_cache.cpp @@ -469,9 +469,6 @@ ImageView& TextureCache::FindDepthTarget(BaseDesc& desc) { } void TextureCache::RefreshImage(Image& image, Vulkan::Scheduler* custom_scheduler /*= nullptr*/) { - RENDERER_TRACE; - TRACE_HINT(fmt::format("{:x}:{:x}", image.info.guest_address, image.info.guest_size)); - if (False(image.flags & ImageFlagBits::Dirty)) { return; } @@ -480,6 +477,9 @@ void TextureCache::RefreshImage(Image& image, Vulkan::Scheduler* custom_schedule return; } + RENDERER_TRACE; + TRACE_HINT(fmt::format("{:x}:{:x}", image.info.guest_address, image.info.guest_size)); + if (True(image.flags & ImageFlagBits::MaybeCpuDirty) && False(image.flags & ImageFlagBits::CpuDirty)) { // The image size should be less than page size to be considered MaybeCpuDirty diff --git a/src/video_core/texture_cache/tile_manager.cpp b/src/video_core/texture_cache/tile_manager.cpp index aba255ce..ede91d12 100644 --- a/src/video_core/texture_cache/tile_manager.cpp +++ b/src/video_core/texture_cache/tile_manager.cpp @@ -8,6 +8,7 @@ #include "video_core/texture_cache/image_view.h" #include "video_core/texture_cache/tile_manager.h" +#include "video_core/host_shaders/detilers/display_micro_64bpp_comp.h" #include "video_core/host_shaders/detilers/macro_32bpp_comp.h" #include "video_core/host_shaders/detilers/macro_64bpp_comp.h" #include "video_core/host_shaders/detilers/macro_8bpp_comp.h" @@ -53,6 +54,14 @@ const DetilerContext* TileManager::GetDetiler(const ImageInfo& info) const { return nullptr; } break; + case AmdGpu::TilingMode::Display_MicroTiled: + switch (bpp) { + case 64: + return &detilers[DetilerType::Display_Micro64]; + default: + return nullptr; + } + break; default: return nullptr; } @@ -68,10 +77,11 @@ struct DetilerParams { TileManager::TileManager(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler) : instance{instance}, scheduler{scheduler} { static const std::array detiler_shaders{ - HostShaders::MICRO_8BPP_COMP, HostShaders::MICRO_16BPP_COMP, - HostShaders::MICRO_32BPP_COMP, HostShaders::MICRO_64BPP_COMP, - HostShaders::MICRO_128BPP_COMP, HostShaders::MACRO_8BPP_COMP, - HostShaders::MACRO_32BPP_COMP, HostShaders::MACRO_64BPP_COMP, + HostShaders::MICRO_8BPP_COMP, HostShaders::MICRO_16BPP_COMP, + HostShaders::MICRO_32BPP_COMP, HostShaders::MICRO_64BPP_COMP, + HostShaders::MICRO_128BPP_COMP, HostShaders::MACRO_8BPP_COMP, + HostShaders::MACRO_32BPP_COMP, HostShaders::MACRO_64BPP_COMP, + HostShaders::DISPLAY_MICRO_64BPP_COMP, }; boost::container::static_vector bindings{ @@ -258,7 +268,8 @@ std::pair TileManager::TryDetile(vk::Buffer in_buffer, u32 in_o params.num_levels = info.resources.levels; params.pitch0 = info.pitch >> (info.props.is_block ? 2u : 0u); params.height = info.size.height; - if (info.tiling_mode == AmdGpu::TilingMode::Texture_Volume) { + if (info.tiling_mode == AmdGpu::TilingMode::Texture_Volume || + info.tiling_mode == AmdGpu::TilingMode::Display_MicroTiled) { ASSERT(info.resources.levels == 1); const auto tiles_per_row = info.pitch / 8u; const auto tiles_per_slice = tiles_per_row * ((info.size.height + 7u) / 8u); diff --git a/src/video_core/texture_cache/tile_manager.h b/src/video_core/texture_cache/tile_manager.h index 4eae7be9..adda16b3 100644 --- a/src/video_core/texture_cache/tile_manager.h +++ b/src/video_core/texture_cache/tile_manager.h @@ -22,6 +22,8 @@ enum DetilerType : u32 { Macro32, Macro64, + Display_Micro64, + Max };