diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt index c2a3b53fd..44761545d 100644 --- a/src/video_core/host_shaders/CMakeLists.txt +++ b/src/video_core/host_shaders/CMakeLists.txt @@ -7,6 +7,7 @@ set(SHADER_FILES detile_m32x1.comp detile_m32x2.comp detile_m32x4.comp + detile_macro8x1.comp detile_macro32x1.comp detile_macro32x2.comp fs_tri.vert diff --git a/src/video_core/host_shaders/detile_macro32x2.comp b/src/video_core/host_shaders/detile_macro32x2.comp index d161484c1..986acc963 100644 --- a/src/video_core/host_shaders/detile_macro32x2.comp +++ b/src/video_core/host_shaders/detile_macro32x2.comp @@ -87,7 +87,7 @@ void main() { uint offs = slice_offs + tile_offs + (idx * BPP / 8); uint p0 = in_data[(offs >> 2) + 0]; - uint p1 = in_data[(offs >> 2) + 1]; + uint p1 = in_data[(offs >> 2) + 1]; out_data[2 * gl_GlobalInvocationID.x + 0] = p0; - out_data[2 * gl_GlobalInvocationID.x + 1] = p1; + out_data[2 * gl_GlobalInvocationID.x + 1] = p1; } diff --git a/src/video_core/host_shaders/detile_macro8x1.comp b/src/video_core/host_shaders/detile_macro8x1.comp new file mode 100644 index 000000000..cddc8af5b --- /dev/null +++ b/src/video_core/host_shaders/detile_macro8x1.comp @@ -0,0 +1,101 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#version 450 + +layout (local_size_x = 64, local_size_y = 1, local_size_z = 1) in; + +layout(std430, binding = 0) buffer input_buf { + uint in_data[]; +}; +layout(std430, binding = 1) buffer output_buf { + uint out_data[]; +}; + +layout(push_constant) uniform image_info { + uint num_levels; + uint pitch; + uint height; + uint c0; + uint c1; +} info; + +const uint lut_8bpp[][16] = { + { + 0x05040100, 0x45444140, + 0x07060302, 0x47464342, + 0x0d0c0908, 0x4d4c4948, + 0x0f0e0b0a, 0x4f4e4b4a, + 0x85848180, 0xc5c4c1c0, + 0x87868382, 0xc7c6c3c2, + 0x8d8c8988, 0xcdccc9c8, + 0x8f8e8b8a, 0xcfcecbca, + }, + { + 0x15141110, 0x55545150, + 0x17161312, 0x57565352, + 0x1d1c1918, 0x5d5c5958, + 0x1f1e1b1a, 0x5f5e5b5a, + 0x95949190, 0xd5d4d1d0, + 0x97969392, 0xd7d6d3d2, + 0x9d9c9998, 0xdddcd9d8, + 0x9f9e9b9a, 0xdfdedbda, + }, + { + 0x25242120, 0x65646160, + 0x27262322, 0x67666362, + 0x2d2c2928, 0x6d6c6968, + 0x2f2e2b2a, 0x6f6e6b6a, + 0xa5a4a1a0, 0xe5e4e1e0, + 0xa7a6a3a2, 0xe7e6e3e2, + 0xadaca9a8, 0xedece9e8, + 0xafaeabaa, 0xefeeebea, + }, + { + 0x35343130, 0x75747170, + 0x37363332, 0x77767372, + 0x3d3c3938, 0x7d7c7978, + 0x3f3e3b3a, 0x7f7e7b7a, + 0xb5b4b1b0, 0xf5f4f1f0, + 0xb7b6b3b2, 0xf7f6f3f2, + 0xbdbcb9b8, 0xfdfcf9f8, + 0xbfbebbba, 0xfffefbfa, + }, +}; + +#define MICRO_TILE_DIM (8) +#define MICRO_TILE_SZ (256) +#define TEXELS_PER_ELEMENT (1) +#define BPP (8) + +shared uint scratch[16]; + +void main() { + uint slot = gl_LocalInvocationID.x >> 2u; + atomicAnd(scratch[slot], 0); + + uint x = gl_GlobalInvocationID.x % info.pitch; + uint y = (gl_GlobalInvocationID.x / info.pitch) % info.height; + uint z = gl_GlobalInvocationID.x / (info.pitch * info.height); + + uint col = bitfieldExtract(x, 0, 3); + uint row = bitfieldExtract(y, 0, 3); + uint lut = bitfieldExtract(z, 0, 2); + uint idx_dw = lut_8bpp[lut][(col + row * MICRO_TILE_DIM) >> 2u]; + uint byte_ofs = (gl_LocalInvocationID.x & 3u) * 8; + uint idx = bitfieldExtract(idx_dw >> byte_ofs, 0, 8); + + uint slice_offs = (z >> 2u) * info.c1 * MICRO_TILE_SZ; + uint tile_row = y / MICRO_TILE_DIM; + uint tile_column = x / MICRO_TILE_DIM; + uint tile_offs = ((tile_row * info.c0) + tile_column) * MICRO_TILE_SZ; + uint offs = (slice_offs + tile_offs) + (idx * BPP / 8); + + uint p0 = in_data[offs >> 2u]; + uint byte = bitfieldExtract(p0 >> (offs * 8), 0, 8); + atomicOr(scratch[slot], byte << byte_ofs); + + if (byte_ofs == 0) { + out_data[gl_GlobalInvocationID.x >> 2u] = scratch[slot]; + } +} diff --git a/src/video_core/texture_cache/tile_manager.cpp b/src/video_core/texture_cache/tile_manager.cpp index c1243dafb..0e550c7dc 100644 --- a/src/video_core/texture_cache/tile_manager.cpp +++ b/src/video_core/texture_cache/tile_manager.cpp @@ -15,6 +15,7 @@ #include "video_core/host_shaders/detile_m8x2_comp.h" #include "video_core/host_shaders/detile_macro32x1_comp.h" #include "video_core/host_shaders/detile_macro32x2_comp.h" +#include "video_core/host_shaders/detile_macro8x1_comp.h" #include #include @@ -108,6 +109,8 @@ const DetilerContext* TileManager::GetDetiler(const ImageInfo& info) const { } case AmdGpu::TilingMode::Texture_Volume: switch (format) { + case vk::Format::eR8Uint: + return &detilers[DetilerType::Macro8x1]; case vk::Format::eR32Uint: return &detilers[DetilerType::Macro32x1]; case vk::Format::eR32G32Uint: @@ -133,8 +136,8 @@ TileManager::TileManager(const Vulkan::Instance& instance, Vulkan::Scheduler& sc static const std::array detiler_shaders{ HostShaders::DETILE_M8X1_COMP, HostShaders::DETILE_M8X2_COMP, HostShaders::DETILE_M32X1_COMP, HostShaders::DETILE_M32X2_COMP, - HostShaders::DETILE_M32X4_COMP, HostShaders::DETILE_MACRO32X1_COMP, - HostShaders::DETILE_MACRO32X2_COMP, + HostShaders::DETILE_M32X4_COMP, HostShaders::DETILE_MACRO8X1_COMP, + HostShaders::DETILE_MACRO32X1_COMP, HostShaders::DETILE_MACRO32X2_COMP, }; boost::container::static_vector bindings{ @@ -323,7 +326,6 @@ std::pair TileManager::TryDetile(vk::Buffer in_buffer, u32 in_o params.height = info.size.height; if (info.tiling_mode == AmdGpu::TilingMode::Texture_Volume) { ASSERT(info.resources.levels == 1); - ASSERT(info.num_bits >= 32); const auto tiles_per_row = info.pitch / 8u; const auto tiles_per_slice = tiles_per_row * ((info.size.height + 7u) / 8u); params.sizes[0] = tiles_per_row; diff --git a/src/video_core/texture_cache/tile_manager.h b/src/video_core/texture_cache/tile_manager.h index 1d731d2f2..bcf5accd3 100644 --- a/src/video_core/texture_cache/tile_manager.h +++ b/src/video_core/texture_cache/tile_manager.h @@ -18,6 +18,7 @@ enum DetilerType : u32 { Micro32x2, Micro32x4, + Macro8x1, Macro32x1, Macro32x2,