From a24e7ba61b11f84ec3d6388a13101f95cecca0d3 Mon Sep 17 00:00:00 2001
From: Liam <byteslice@airmail.cc>
Date: Thu, 14 Apr 2022 09:57:06 -0400
Subject: [PATCH 1/3] maxwell3d: add small_index_2 register

---
 src/video_core/engines/maxwell_3d.cpp | 5 +++++
 src/video_core/engines/maxwell_3d.h   | 7 ++++++-
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 54a902f56f..7399e760fe 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -214,6 +214,11 @@ void Maxwell3D::ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argume
         regs.index_array.first = regs.small_index.first;
         dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
         return DrawArrays();
+    case MAXWELL3D_REG_INDEX(small_index_2):
+        regs.index_array.count = regs.small_index_2.count;
+        regs.index_array.first = regs.small_index_2.first;
+        dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
+        return DrawArrays();
     case MAXWELL3D_REG_INDEX(topology_override):
         use_topology_override = true;
         return;
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 3f5b38e554..d36dc3daa0 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -1220,7 +1220,12 @@ public:
                     BitField<16, 16, u32> count;
                 } small_index;
 
-                INSERT_PADDING_WORDS_NOINIT(0x6);
+                union {
+                    BitField<0, 16, u32> first;
+                    BitField<16, 16, u32> count;
+                } small_index_2;
+
+                INSERT_PADDING_WORDS_NOINIT(0x5);
 
                 INSERT_PADDING_WORDS_NOINIT(0x1F);
 

From d4571b123d814272c4a7069cfe29ba8e957b1f10 Mon Sep 17 00:00:00 2001
From: Liam <byteslice@airmail.cc>
Date: Thu, 14 Apr 2022 09:57:55 -0400
Subject: [PATCH 2/3] buffer_cache: cap vertex buffer sizes

---
 src/video_core/buffer_cache/buffer_cache.h | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index 21bfb76a45..3f2bf6294c 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -1311,7 +1311,20 @@ void BufferCache<P>::UpdateVertexBuffer(u32 index) {
     const GPUVAddr gpu_addr_begin = array.StartAddress();
     const GPUVAddr gpu_addr_end = limit.LimitAddress() + 1;
     const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr_begin);
-    const u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin);
+    u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin);
+    if (address_size >= 64_MiB) {
+        // Reported vertex buffer size is very large, cap to mapped buffer size
+        GPUVAddr submapped_addr_end = gpu_addr_begin;
+
+        const auto ranges{gpu_memory.GetSubmappedRange(gpu_addr_begin, address_size)};
+        if (ranges.size() > 0) {
+            const auto& [addr, size] = *ranges.begin();
+            submapped_addr_end = addr + size;
+        }
+
+        address_size =
+            std::min(address_size, static_cast<u32>(submapped_addr_end - gpu_addr_begin));
+    }
     const u32 size = address_size; // TODO: Analyze stride and number of vertices
     if (array.enable == 0 || size == 0 || !cpu_addr) {
         vertex_buffers[index] = NULL_BINDING;

From f783883bf89311b51aef76b6b8b07d112369eca7 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Wed, 13 Apr 2022 17:01:57 +0200
Subject: [PATCH 3/3] video_core: implement formats for N64 emulation

---
 .../renderer_opengl/gl_texture_cache.cpp      | 26 ++++++++++
 .../renderer_opengl/maxwell_to_gl.h           |  2 +
 .../renderer_vulkan/maxwell_to_vk.cpp         |  2 +
 .../renderer_vulkan/vk_texture_cache.cpp      | 49 ++++++++++++++++---
 src/video_core/surface.h                      |  8 +++
 .../texture_cache/format_lookup_table.cpp     |  6 +++
 src/video_core/texture_cache/formatter.h      |  4 ++
 .../vulkan_common/vulkan_device.cpp           | 12 +++++
 8 files changed, 102 insertions(+), 7 deletions(-)

diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp
index d120763580..f8c6e5c7e6 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -182,6 +182,26 @@ GLenum AttachmentType(PixelFormat format) {
     }
 }
 
+GLint ConvertA5B5G5R1_UNORM(SwizzleSource source) {
+    switch (source) {
+    case SwizzleSource::Zero:
+        return GL_ZERO;
+    case SwizzleSource::R:
+        return GL_ALPHA;
+    case SwizzleSource::G:
+        return GL_BLUE;
+    case SwizzleSource::B:
+        return GL_GREEN;
+    case SwizzleSource::A:
+        return GL_RED;
+    case SwizzleSource::OneInt:
+    case SwizzleSource::OneFloat:
+        return GL_ONE;
+    }
+    UNREACHABLE_MSG("Invalid swizzle source={}", source);
+    return GL_NONE;
+}
+
 void ApplySwizzle(GLuint handle, PixelFormat format, std::array<SwizzleSource, 4> swizzle) {
     switch (format) {
     case PixelFormat::D24_UNORM_S8_UINT:
@@ -192,6 +212,12 @@ void ApplySwizzle(GLuint handle, PixelFormat format, std::array<SwizzleSource, 4
                             TextureMode(format, swizzle[0] == SwizzleSource::R));
         std::ranges::transform(swizzle, swizzle.begin(), ConvertGreenRed);
         break;
+    case PixelFormat::A5B5G5R1_UNORM: {
+        std::array<GLint, 4> gl_swizzle;
+        std::ranges::transform(swizzle, gl_swizzle.begin(), ConvertA5B5G5R1_UNORM);
+        glTextureParameteriv(handle, GL_TEXTURE_SWIZZLE_RGBA, gl_swizzle.data());
+        return;
+    }
     default:
         break;
     }
diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h
index db5bf1d30a..03adf3d4c5 100644
--- a/src/video_core/renderer_opengl/maxwell_to_gl.h
+++ b/src/video_core/renderer_opengl/maxwell_to_gl.h
@@ -30,6 +30,7 @@ constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> FORMAT_TAB
     {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV},           // A2B10G10R10_UNORM
     {GL_RGB10_A2UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UINT
     {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV},             // A1B5G5R5_UNORM
+    {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_5_5_5_1},                 // A5B5G5R1_UNORM
     {GL_R8, GL_RED, GL_UNSIGNED_BYTE},                                // R8_UNORM
     {GL_R8_SNORM, GL_RED, GL_BYTE},                                   // R8_SNORM
     {GL_R8I, GL_RED_INTEGER, GL_BYTE},                                // R8_SINT
@@ -87,6 +88,7 @@ constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> FORMAT_TAB
     {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT},                         // BC3_SRGB
     {GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM},                            // BC7_SRGB
     {GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4_REV},               // A4B4G4R4_UNORM
+    {GL_R8, GL_RED, GL_UNSIGNED_BYTE},                                // R4G4_UNORM
     {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR},                        // ASTC_2D_4X4_SRGB
     {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR},                        // ASTC_2D_8X8_SRGB
     {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR},                        // ASTC_2D_8X5_SRGB
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
index 1c136c4108..a2c6d0e6cb 100644
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
@@ -127,6 +127,7 @@ struct FormatTuple {
     {VK_FORMAT_A2B10G10R10_UNORM_PACK32, Attachable | Storage}, // A2B10G10R10_UNORM
     {VK_FORMAT_A2B10G10R10_UINT_PACK32, Attachable | Storage},  // A2B10G10R10_UINT
     {VK_FORMAT_A1R5G5B5_UNORM_PACK16, Attachable},         // A1B5G5R5_UNORM (flipped with swizzle)
+    {VK_FORMAT_R5G5B5A1_UNORM_PACK16},                     // A5B5G5R1_UNORM (specially swizzled)
     {VK_FORMAT_R8_UNORM, Attachable | Storage},            // R8_UNORM
     {VK_FORMAT_R8_SNORM, Attachable | Storage},            // R8_SNORM
     {VK_FORMAT_R8_SINT, Attachable | Storage},             // R8_SINT
@@ -184,6 +185,7 @@ struct FormatTuple {
     {VK_FORMAT_BC3_SRGB_BLOCK},                                // BC3_SRGB
     {VK_FORMAT_BC7_SRGB_BLOCK},                                // BC7_SRGB
     {VK_FORMAT_R4G4B4A4_UNORM_PACK16, Attachable},             // A4B4G4R4_UNORM
+    {VK_FORMAT_R4G4_UNORM_PACK8},                              // R4G4_UNORM
     {VK_FORMAT_ASTC_4x4_SRGB_BLOCK},                           // ASTC_2D_4X4_SRGB
     {VK_FORMAT_ASTC_8x8_SRGB_BLOCK},                           // ASTC_2D_8X8_SRGB
     {VK_FORMAT_ASTC_8x5_SRGB_BLOCK},                           // ASTC_2D_8X5_SRGB
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
index 2c2ccc7c6c..49691ce0c3 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
@@ -438,6 +438,32 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
     }
 }
 
+[[nodiscard]] SwizzleSource SwapGreenRed(SwizzleSource value) {
+    switch (value) {
+    case SwizzleSource::R:
+        return SwizzleSource::G;
+    case SwizzleSource::G:
+        return SwizzleSource::R;
+    default:
+        return value;
+    }
+}
+
+[[nodiscard]] SwizzleSource SwapSpecial(SwizzleSource value) {
+    switch (value) {
+    case SwizzleSource::A:
+        return SwizzleSource::R;
+    case SwizzleSource::R:
+        return SwizzleSource::A;
+    case SwizzleSource::G:
+        return SwizzleSource::B;
+    case SwizzleSource::B:
+        return SwizzleSource::G;
+    default:
+        return value;
+    }
+}
+
 void CopyBufferToImage(vk::CommandBuffer cmdbuf, VkBuffer src_buffer, VkImage image,
                        VkImageAspectFlags aspect_mask, bool is_initialized,
                        std::span<const VkBufferImageCopy> copies) {
@@ -554,14 +580,25 @@ void CopyBufferToImage(vk::CommandBuffer cmdbuf, VkBuffer src_buffer, VkImage im
     };
 }
 
-[[nodiscard]] bool IsFormatFlipped(PixelFormat format, bool emulate_bgr565) {
+void TryTransformSwizzleIfNeeded(PixelFormat format, std::array<SwizzleSource, 4>& swizzle,
+                                 bool emulate_bgr565) {
     switch (format) {
     case PixelFormat::A1B5G5R5_UNORM:
-        return true;
+        std::ranges::transform(swizzle, swizzle.begin(), SwapBlueRed);
+        break;
     case PixelFormat::B5G6R5_UNORM:
-        return emulate_bgr565;
+        if (emulate_bgr565) {
+            std::ranges::transform(swizzle, swizzle.begin(), SwapBlueRed);
+        }
+        break;
+    case PixelFormat::A5B5G5R1_UNORM:
+        std::ranges::transform(swizzle, swizzle.begin(), SwapSpecial);
+        break;
+    case PixelFormat::R4G4_UNORM:
+        std::ranges::transform(swizzle, swizzle.begin(), SwapGreenRed);
+        break;
     default:
-        return false;
+        break;
     }
 }
 
@@ -1496,9 +1533,7 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI
     };
     if (!info.IsRenderTarget()) {
         swizzle = info.Swizzle();
-        if (IsFormatFlipped(format, device->MustEmulateBGR565())) {
-            std::ranges::transform(swizzle, swizzle.begin(), SwapBlueRed);
-        }
+        TryTransformSwizzleIfNeeded(format, swizzle, device->MustEmulateBGR565());
         if ((aspect_mask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) != 0) {
             std::ranges::transform(swizzle, swizzle.begin(), ConvertGreenRed);
         }
diff --git a/src/video_core/surface.h b/src/video_core/surface.h
index 5704cf16aa..86fea61ae2 100644
--- a/src/video_core/surface.h
+++ b/src/video_core/surface.h
@@ -25,6 +25,7 @@ enum class PixelFormat {
     A2B10G10R10_UNORM,
     A2B10G10R10_UINT,
     A1B5G5R5_UNORM,
+    A5B5G5R1_UNORM,
     R8_UNORM,
     R8_SNORM,
     R8_SINT,
@@ -82,6 +83,7 @@ enum class PixelFormat {
     BC3_SRGB,
     BC7_SRGB,
     A4B4G4R4_UNORM,
+    R4G4_UNORM,
     ASTC_2D_4X4_SRGB,
     ASTC_2D_8X8_SRGB,
     ASTC_2D_8X5_SRGB,
@@ -156,6 +158,7 @@ constexpr std::array<u32, MaxPixelFormat> BLOCK_WIDTH_TABLE = {{
     1,  // A2B10G10R10_UNORM
     1,  // A2B10G10R10_UINT
     1,  // A1B5G5R5_UNORM
+    1,  // A5B5G5R1_UNORM
     1,  // R8_UNORM
     1,  // R8_SNORM
     1,  // R8_SINT
@@ -213,6 +216,7 @@ constexpr std::array<u32, MaxPixelFormat> BLOCK_WIDTH_TABLE = {{
     4,  // BC3_SRGB
     4,  // BC7_SRGB
     1,  // A4B4G4R4_UNORM
+    1,  // R4G4_UNORM
     4,  // ASTC_2D_4X4_SRGB
     8,  // ASTC_2D_8X8_SRGB
     8,  // ASTC_2D_8X5_SRGB
@@ -256,6 +260,7 @@ constexpr std::array<u32, MaxPixelFormat> BLOCK_HEIGHT_TABLE = {{
     1,  // A2B10G10R10_UNORM
     1,  // A2B10G10R10_UINT
     1,  // A1B5G5R5_UNORM
+    1,  // A5B5G5R1_UNORM
     1,  // R8_UNORM
     1,  // R8_SNORM
     1,  // R8_SINT
@@ -313,6 +318,7 @@ constexpr std::array<u32, MaxPixelFormat> BLOCK_HEIGHT_TABLE = {{
     4,  // BC3_SRGB
     4,  // BC7_SRGB
     1,  // A4B4G4R4_UNORM
+    1,  // R4G4_UNORM
     4,  // ASTC_2D_4X4_SRGB
     8,  // ASTC_2D_8X8_SRGB
     5,  // ASTC_2D_8X5_SRGB
@@ -356,6 +362,7 @@ constexpr std::array<u32, MaxPixelFormat> BITS_PER_BLOCK_TABLE = {{
     32,  // A2B10G10R10_UNORM
     32,  // A2B10G10R10_UINT
     16,  // A1B5G5R5_UNORM
+    16,  // A5B5G5R1_UNORM
     8,   // R8_UNORM
     8,   // R8_SNORM
     8,   // R8_SINT
@@ -413,6 +420,7 @@ constexpr std::array<u32, MaxPixelFormat> BITS_PER_BLOCK_TABLE = {{
     128, // BC3_SRGB
     128, // BC7_UNORM
     16,  // A4B4G4R4_UNORM
+    8,   // R4G4_UNORM
     128, // ASTC_2D_4X4_SRGB
     128, // ASTC_2D_8X8_SRGB
     128, // ASTC_2D_8X5_SRGB
diff --git a/src/video_core/texture_cache/format_lookup_table.cpp b/src/video_core/texture_cache/format_lookup_table.cpp
index afa807d5d3..20e64a7c2f 100644
--- a/src/video_core/texture_cache/format_lookup_table.cpp
+++ b/src/video_core/texture_cache/format_lookup_table.cpp
@@ -63,6 +63,10 @@ PixelFormat PixelFormatFromTextureInfo(TextureFormat format, ComponentType red,
         return PixelFormat::A1B5G5R5_UNORM;
     case Hash(TextureFormat::A4B4G4R4, UNORM):
         return PixelFormat::A4B4G4R4_UNORM;
+    case Hash(TextureFormat::G4R4, UNORM):
+        return PixelFormat::R4G4_UNORM;
+    case Hash(TextureFormat::A5B5G5R1, UNORM):
+        return PixelFormat::A5B5G5R1_UNORM;
     case Hash(TextureFormat::R8, UNORM):
         return PixelFormat::R8_UNORM;
     case Hash(TextureFormat::R8, SNORM):
@@ -143,6 +147,8 @@ PixelFormat PixelFormatFromTextureInfo(TextureFormat format, ComponentType red,
         return PixelFormat::S8_UINT_D24_UNORM;
     case Hash(TextureFormat::R8G24, UINT, UNORM, UNORM, UNORM, LINEAR):
         return PixelFormat::S8_UINT_D24_UNORM;
+    case Hash(TextureFormat::D24S8, UNORM, UINT, UINT, UINT, LINEAR):
+        return PixelFormat::D24_UNORM_S8_UINT;
     case Hash(TextureFormat::D32S8, FLOAT, UINT, UNORM, UNORM, LINEAR):
         return PixelFormat::D32_FLOAT_S8_UINT;
     case Hash(TextureFormat::BC1_RGBA, UNORM, LINEAR):
diff --git a/src/video_core/texture_cache/formatter.h b/src/video_core/texture_cache/formatter.h
index b2c81057ba..6f5afc5a99 100644
--- a/src/video_core/texture_cache/formatter.h
+++ b/src/video_core/texture_cache/formatter.h
@@ -38,6 +38,8 @@ struct fmt::formatter<VideoCore::Surface::PixelFormat> : fmt::formatter<fmt::str
                 return "A2B10G10R10_UINT";
             case PixelFormat::A1B5G5R5_UNORM:
                 return "A1B5G5R5_UNORM";
+            case PixelFormat::A5B5G5R1_UNORM:
+                return "A5B5G5R1_UNORM";
             case PixelFormat::R8_UNORM:
                 return "R8_UNORM";
             case PixelFormat::R8_SNORM:
@@ -152,6 +154,8 @@ struct fmt::formatter<VideoCore::Surface::PixelFormat> : fmt::formatter<fmt::str
                 return "BC7_SRGB";
             case PixelFormat::A4B4G4R4_UNORM:
                 return "A4B4G4R4_UNORM";
+            case PixelFormat::R4G4_UNORM:
+                return "R4G4_UNORM";
             case PixelFormat::ASTC_2D_4X4_SRGB:
                 return "ASTC_2D_4X4_SRGB";
             case PixelFormat::ASTC_2D_8X8_SRGB:
diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp
index f3a05ada9c..bd05a1f84c 100644
--- a/src/video_core/vulkan_common/vulkan_device.cpp
+++ b/src/video_core/vulkan_common/vulkan_device.cpp
@@ -45,6 +45,12 @@ constexpr std::array B5G6R5_UNORM_PACK16{
     VK_FORMAT_R5G6B5_UNORM_PACK16,
     VK_FORMAT_UNDEFINED,
 };
+
+constexpr std::array R4G4_UNORM_PACK8{
+    VK_FORMAT_R8_UNORM,
+    VK_FORMAT_UNDEFINED,
+};
+
 } // namespace Alternatives
 
 enum class NvidiaArchitecture {
@@ -95,6 +101,8 @@ constexpr const VkFormat* GetFormatAlternatives(VkFormat format) {
         return Alternatives::DEPTH16_UNORM_STENCIL8_UINT.data();
     case VK_FORMAT_B5G6R5_UNORM_PACK16:
         return Alternatives::B5G6R5_UNORM_PACK16.data();
+    case VK_FORMAT_R4G4_UNORM_PACK8:
+        return Alternatives::R4G4_UNORM_PACK8.data();
     default:
         return nullptr;
     }
@@ -122,6 +130,8 @@ std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties(vk::Physica
         VK_FORMAT_A8B8G8R8_SRGB_PACK32,
         VK_FORMAT_R5G6B5_UNORM_PACK16,
         VK_FORMAT_B5G6R5_UNORM_PACK16,
+        VK_FORMAT_R5G5B5A1_UNORM_PACK16,
+        VK_FORMAT_B5G5R5A1_UNORM_PACK16,
         VK_FORMAT_A2B10G10R10_UNORM_PACK32,
         VK_FORMAT_A2B10G10R10_UINT_PACK32,
         VK_FORMAT_A1R5G5B5_UNORM_PACK16,
@@ -160,7 +170,9 @@ std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties(vk::Physica
         VK_FORMAT_R16G16B16A16_SFLOAT,
         VK_FORMAT_B8G8R8A8_UNORM,
         VK_FORMAT_B8G8R8A8_SRGB,
+        VK_FORMAT_R4G4_UNORM_PACK8,
         VK_FORMAT_R4G4B4A4_UNORM_PACK16,
+        VK_FORMAT_B4G4R4A4_UNORM_PACK16,
         VK_FORMAT_D32_SFLOAT,
         VK_FORMAT_D16_UNORM,
         VK_FORMAT_S8_UINT,