diff --git a/src/core/libraries/gnmdriver/gnmdriver.cpp b/src/core/libraries/gnmdriver/gnmdriver.cpp
index ffec7030..645bcf42 100644
--- a/src/core/libraries/gnmdriver/gnmdriver.cpp
+++ b/src/core/libraries/gnmdriver/gnmdriver.cpp
@@ -2155,6 +2155,7 @@ int PS4_SYSV_ABI sceGnmSubmitCommandBuffersForWorkload() {
 
 int PS4_SYSV_ABI sceGnmSubmitDone() {
     LOG_DEBUG(Lib_GnmDriver, "called");
+    WaitGpuIdle();
     if (!liverpool->IsGpuIdle()) {
         submission_lock = true;
     }
diff --git a/src/emulator.cpp b/src/emulator.cpp
index a469a31c..e631698f 100644
--- a/src/emulator.cpp
+++ b/src/emulator.cpp
@@ -26,7 +26,6 @@
 #include "core/libraries/libs.h"
 #include "core/libraries/ngs2/ngs2.h"
 #include "core/libraries/rtc/rtc.h"
-#include "core/libraries/videoout/video_out.h"
 #include "core/linker.h"
 #include "core/memory.h"
 #include "emulator.h"
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp
index b0298cbb..11d2a1dd 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp
@@ -208,6 +208,9 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) {
     if (info.uses_group_quad) {
         ctx.AddCapability(spv::Capability::GroupNonUniformQuad);
     }
+    if (info.uses_group_ballot) {
+        ctx.AddCapability(spv::Capability::GroupNonUniformBallot);
+    }
     switch (program.info.stage) {
     case Stage::Compute: {
         const std::array<u32, 3> workgroup_size{ctx.runtime_info.cs_info.workgroup_size};
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
index 64ce532b..7df62a91 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
@@ -305,7 +305,7 @@ void EmitStoreBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id a
     const Id tex_buffer = ctx.OpLoad(buffer.image_type, buffer.id);
     const Id coord = ctx.OpIAdd(ctx.U32[1], address, buffer.coord_offset);
     if (buffer.is_integer) {
-        value = ctx.OpBitcast(ctx.U32[4], value);
+        value = ctx.OpBitcast(ctx.S32[4], value);
     }
     ctx.OpImageWrite(tex_buffer, coord, value);
 }
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp
index 898de8b5..2d13d09f 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp
@@ -27,7 +27,8 @@ Id EmitReadFirstLane(EmitContext& ctx, Id value) {
 }
 
 Id EmitReadLane(EmitContext& ctx, Id value, u32 lane) {
-    UNREACHABLE();
+    return ctx.OpGroupNonUniformBroadcast(ctx.U32[1], SubgroupScope(ctx), value,
+                                          ctx.ConstU32(lane));
 }
 
 Id EmitWriteLane(EmitContext& ctx, Id value, Id write_value, u32 lane) {
diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
index b65cbdf4..8554f861 100644
--- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
+++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
@@ -324,16 +324,18 @@ void EmitContext::DefineOutputs() {
 
 void EmitContext::DefinePushDataBlock() {
     // Create push constants block for instance steps rates
-    const Id struct_type{Name(TypeStruct(U32[1], U32[1], U32[4], U32[4]), "AuxData")};
+    const Id struct_type{Name(TypeStruct(U32[1], U32[1], U32[4], U32[4], U32[4]), "AuxData")};
     Decorate(struct_type, spv::Decoration::Block);
     MemberName(struct_type, 0, "sr0");
     MemberName(struct_type, 1, "sr1");
     MemberName(struct_type, 2, "buf_offsets0");
     MemberName(struct_type, 3, "buf_offsets1");
+    MemberName(struct_type, 4, "buf_offsets2");
     MemberDecorate(struct_type, 0, spv::Decoration::Offset, 0U);
     MemberDecorate(struct_type, 1, spv::Decoration::Offset, 4U);
     MemberDecorate(struct_type, 2, spv::Decoration::Offset, 8U);
     MemberDecorate(struct_type, 3, spv::Decoration::Offset, 24U);
+    MemberDecorate(struct_type, 4, spv::Decoration::Offset, 40U);
     push_data_block = DefineVar(struct_type, spv::StorageClass::PushConstant);
     Name(push_data_block, "push_data");
     interfaces.push_back(push_data_block);
diff --git a/src/shader_recompiler/frontend/translate/translate.cpp b/src/shader_recompiler/frontend/translate/translate.cpp
index 4e0c110c..c9144fac 100644
--- a/src/shader_recompiler/frontend/translate/translate.cpp
+++ b/src/shader_recompiler/frontend/translate/translate.cpp
@@ -171,7 +171,7 @@ T Translator::GetSrc(const InstOperand& operand) {
         }
     } else {
         if (operand.input_modifier.abs) {
-            LOG_WARNING(Render_Vulkan, "Input abs modifier on integer instruction");
+            value = ir.IAbs(value);
         }
         if (operand.input_modifier.neg) {
             UNREACHABLE();
diff --git a/src/shader_recompiler/frontend/translate/vector_memory.cpp b/src/shader_recompiler/frontend/translate/vector_memory.cpp
index 5af28336..f602e762 100644
--- a/src/shader_recompiler/frontend/translate/vector_memory.cpp
+++ b/src/shader_recompiler/frontend/translate/vector_memory.cpp
@@ -117,6 +117,10 @@ void Translator::EmitVectorMemory(const GcnInst& inst) {
         return BUFFER_ATOMIC(AtomicOp::Add, inst);
     case Opcode::BUFFER_ATOMIC_SWAP:
         return BUFFER_ATOMIC(AtomicOp::Swap, inst);
+    case Opcode::BUFFER_ATOMIC_UMIN:
+        return BUFFER_ATOMIC(AtomicOp::Umin, inst);
+    case Opcode::BUFFER_ATOMIC_UMAX:
+        return BUFFER_ATOMIC(AtomicOp::Umax, inst);
     default:
         LogMissingOpcode(inst);
     }
@@ -280,6 +284,7 @@ void Translator::IMAGE_GATHER(const GcnInst& inst) {
     info.has_bias.Assign(flags.test(MimgModifier::LodBias));
     info.has_lod_clamp.Assign(flags.test(MimgModifier::LodClamp));
     info.force_level0.Assign(flags.test(MimgModifier::Level0));
+    info.has_offset.Assign(flags.test(MimgModifier::Offset));
     // info.explicit_lod.Assign(explicit_lod);
     info.gather_comp.Assign(std::bit_width(mimg.dmask) - 1);
 
diff --git a/src/shader_recompiler/info.h b/src/shader_recompiler/info.h
index 0184a7f6..c4e16b7a 100644
--- a/src/shader_recompiler/info.h
+++ b/src/shader_recompiler/info.h
@@ -1,6 +1,5 @@
 // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
 // SPDX-License-Identifier: GPL-2.0-or-later
-
 #pragma once
 
 #include <span>
@@ -89,7 +88,7 @@ struct PushData {
 
     u32 step0;
     u32 step1;
-    std::array<u8, 32> buf_offsets;
+    std::array<u8, 48> buf_offsets;
 
     void AddOffset(u32 binding, u32 offset) {
         ASSERT(offset < 256 && binding < buf_offsets.size());
@@ -166,6 +165,7 @@ struct Info {
     bool has_image_query{};
     bool uses_lane_id{};
     bool uses_group_quad{};
+    bool uses_group_ballot{};
     bool uses_shared{};
     bool uses_fp16{};
     bool uses_step_rates{};
@@ -181,6 +181,7 @@ struct Info {
         const u32* base = user_data.data();
         if (ptr_index != IR::NumScalarRegs) {
             std::memcpy(&base, &user_data[ptr_index], sizeof(base));
+            base = reinterpret_cast<const u32*>(VAddr(base) & 0xFFFFFFFFFFFFULL);
         }
         std::memcpy(&data, base + dword_offset, sizeof(T));
         return data;
diff --git a/src/shader_recompiler/ir/passes/lower_shared_mem_to_registers.cpp b/src/shader_recompiler/ir/passes/lower_shared_mem_to_registers.cpp
index a87cf31b..76bfcf91 100644
--- a/src/shader_recompiler/ir/passes/lower_shared_mem_to_registers.cpp
+++ b/src/shader_recompiler/ir/passes/lower_shared_mem_to_registers.cpp
@@ -21,8 +21,7 @@ void LowerSharedMemToRegisters(IR::Program& program) {
                 const IR::Inst* prod = inst.Arg(0).InstRecursive();
                 const auto it = std::ranges::find_if(ds_writes, [&](const IR::Inst* write) {
                     const IR::Inst* write_prod = write->Arg(0).InstRecursive();
-                    return write_prod->Arg(1).U32() == prod->Arg(1).U32() &&
-                           write_prod->Arg(0) == prod->Arg(0);
+                    return write_prod->Arg(1).U32() == prod->Arg(1).U32();
                 });
                 ASSERT(it != ds_writes.end());
                 // Replace data read with value written.
diff --git a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp
index aa5d39ae..6b2aa8bb 100644
--- a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp
+++ b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp
@@ -98,22 +98,7 @@ bool UseFP16(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat num_format) {
 }
 
 IR::Type BufferDataType(const IR::Inst& inst, AmdGpu::NumberFormat num_format) {
-    switch (inst.GetOpcode()) {
-    case IR::Opcode::LoadBufferU32:
-    case IR::Opcode::LoadBufferU32x2:
-    case IR::Opcode::LoadBufferU32x3:
-    case IR::Opcode::LoadBufferU32x4:
-    case IR::Opcode::StoreBufferU32:
-    case IR::Opcode::StoreBufferU32x2:
-    case IR::Opcode::StoreBufferU32x3:
-    case IR::Opcode::StoreBufferU32x4:
-    case IR::Opcode::ReadConstBuffer:
-    case IR::Opcode::BufferAtomicIAdd32:
-    case IR::Opcode::BufferAtomicSwap32:
-        return IR::Type::U32;
-    default:
-        UNREACHABLE();
-    }
+    return IR::Type::U32;
 }
 
 bool IsImageAtomicInstruction(const IR::Inst& inst) {
@@ -223,12 +208,8 @@ public:
 
     u32 Add(const SamplerResource& desc) {
         const u32 index{Add(sampler_resources, desc, [this, &desc](const auto& existing) {
-            if (desc.sgpr_base == existing.sgpr_base &&
-                desc.dword_offset == existing.dword_offset) {
-                return true;
-            }
-            // Samplers with different bindings might still be the same.
-            return existing.GetSharp(info) == desc.GetSharp(info);
+            return desc.sgpr_base == existing.sgpr_base &&
+                   desc.dword_offset == existing.dword_offset;
         })};
         return index;
     }
diff --git a/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp b/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp
index 5ce024b4..63fe8a57 100644
--- a/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp
+++ b/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp
@@ -39,6 +39,11 @@ void Visit(Info& info, IR::Inst& inst) {
     case IR::Opcode::QuadShuffle:
         info.uses_group_quad = true;
         break;
+    case IR::Opcode::ReadLane:
+    case IR::Opcode::ReadFirstLane:
+    case IR::Opcode::WriteLane:
+        info.uses_group_ballot = true;
+        break;
     case IR::Opcode::Discard:
     case IR::Opcode::DiscardCond:
         info.has_discard = true;
diff --git a/src/shader_recompiler/specialization.h b/src/shader_recompiler/specialization.h
index 3dd75dbd..bbcafdb8 100644
--- a/src/shader_recompiler/specialization.h
+++ b/src/shader_recompiler/specialization.h
@@ -37,14 +37,14 @@ struct ImageSpecialization {
  * after the first compilation of a module.
  */
 struct StageSpecialization {
-    static constexpr size_t MaxStageResources = 32;
+    static constexpr size_t MaxStageResources = 64;
 
     const Shader::Info* info;
     RuntimeInfo runtime_info;
     std::bitset<MaxStageResources> bitset{};
     boost::container::small_vector<BufferSpecialization, 16> buffers;
     boost::container::small_vector<TextureBufferSpecialization, 8> tex_buffers;
-    boost::container::small_vector<ImageSpecialization, 8> images;
+    boost::container::small_vector<ImageSpecialization, 16> images;
     u32 start_binding{};
 
     explicit StageSpecialization(const Shader::Info& info_, RuntimeInfo runtime_info_,
diff --git a/src/video_core/amdgpu/pm4_cmds.h b/src/video_core/amdgpu/pm4_cmds.h
index fd7980c1..064b8995 100644
--- a/src/video_core/amdgpu/pm4_cmds.h
+++ b/src/video_core/amdgpu/pm4_cmds.h
@@ -187,6 +187,11 @@ struct PM4CmdSetData {
         BitField<28, 4, u32> index;      ///< Index for UCONFIG/CONTEXT on CI+
                                          ///< Program to zero for other opcodes and on SI
     };
+    u32 data[0];
+
+    [[nodiscard]] u32 Size() const {
+        return header.count << 2u;
+    }
 
     template <PM4ShaderType type = PM4ShaderType::ShaderGraphics, typename... Args>
     static constexpr u32* SetContextReg(u32* cmdbuf, Args... data) {
@@ -350,6 +355,16 @@ struct PM4CmdEventWriteEop {
     }
 };
 
+struct PM4CmdAcquireMem {
+    PM4Type3Header header;
+    u32 cp_coher_cntl;
+    u32 cp_coher_size_lo;
+    u32 cp_coher_size_hi;
+    u32 cp_coher_base_lo;
+    u32 cp_coher_base_hi;
+    u32 poll_interval;
+};
+
 enum class DmaDataDst : u32 {
     Memory = 0,
     Gds = 1,
@@ -467,6 +482,10 @@ struct PM4CmdWriteData {
     };
     u32 data[0];
 
+    u32 Size() const {
+        return (header.count.Value() - 2) * 4;
+    }
+
     template <typename T>
     void Address(T addr) {
         addr64 = static_cast<u64>(addr);
diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp
index 86af05bf..2ed0ddc8 100644
--- a/src/video_core/buffer_cache/buffer_cache.cpp
+++ b/src/video_core/buffer_cache/buffer_cache.cpp
@@ -577,9 +577,6 @@ bool BufferCache::SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr,
         return false;
     }
     Image& image = texture_cache.GetImage(image_id);
-    if (image.info.guest_size_bytes > size) {
-        return false;
-    }
     boost::container::small_vector<vk::BufferImageCopy, 8> copies;
     u32 offset = buffer.Offset(image.cpu_addr);
     const u32 num_layers = image.info.resources.layers;
@@ -604,11 +601,13 @@ bool BufferCache::SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr,
         });
         offset += mip_ofs * num_layers;
     }
-    scheduler.EndRendering();
-    image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits::eTransferRead);
-    const auto cmdbuf = scheduler.CommandBuffer();
-    cmdbuf.copyImageToBuffer(image.image, vk::ImageLayout::eTransferSrcOptimal, buffer.buffer,
-                             copies);
+    if (!copies.empty()) {
+        scheduler.EndRendering();
+        image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits::eTransferRead);
+        const auto cmdbuf = scheduler.CommandBuffer();
+        cmdbuf.copyImageToBuffer(image.image, vk::ImageLayout::eTransferSrcOptimal, buffer.buffer,
+                                 copies);
+    }
     return true;
 }
 
diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
index aeae0813..96358bf6 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
@@ -12,9 +12,11 @@
 namespace Vulkan {
 
 ComputePipeline::ComputePipeline(const Instance& instance_, Scheduler& scheduler_,
-                                 vk::PipelineCache pipeline_cache, u64 compute_key_,
-                                 const Shader::Info& info_, vk::ShaderModule module)
-    : instance{instance_}, scheduler{scheduler_}, compute_key{compute_key_}, info{&info_} {
+                                 DescriptorHeap& desc_heap_, vk::PipelineCache pipeline_cache,
+                                 u64 compute_key_, const Shader::Info& info_,
+                                 vk::ShaderModule module)
+    : instance{instance_}, scheduler{scheduler_}, desc_heap{desc_heap_}, compute_key{compute_key_},
+      info{&info_} {
     const vk::PipelineShaderStageCreateInfo shader_ci = {
         .stage = vk::ShaderStageFlagBits::eCompute,
         .module = module,
@@ -66,8 +68,12 @@ ComputePipeline::ComputePipeline(const Instance& instance_, Scheduler& scheduler
         .size = sizeof(Shader::PushData),
     };
 
+    uses_push_descriptors = binding < instance.MaxPushDescriptors();
+    const auto flags = uses_push_descriptors
+                           ? vk::DescriptorSetLayoutCreateFlagBits::ePushDescriptorKHR
+                           : vk::DescriptorSetLayoutCreateFlagBits{};
     const vk::DescriptorSetLayoutCreateInfo desc_layout_ci = {
-        .flags = vk::DescriptorSetLayoutCreateFlagBits::ePushDescriptorKHR,
+        .flags = flags,
         .bindingCount = static_cast<u32>(bindings.size()),
         .pBindings = bindings.data(),
     };
@@ -101,8 +107,8 @@ bool ComputePipeline::BindResources(VideoCore::BufferCache& buffer_cache,
                                     VideoCore::TextureCache& texture_cache) const {
     // Bind resource buffers and textures.
     boost::container::static_vector<vk::BufferView, 8> buffer_views;
-    boost::container::static_vector<vk::DescriptorBufferInfo, 16> buffer_infos;
-    boost::container::static_vector<vk::DescriptorImageInfo, 16> image_infos;
+    boost::container::static_vector<vk::DescriptorBufferInfo, 32> buffer_infos;
+    boost::container::static_vector<vk::DescriptorImageInfo, 32> image_infos;
     boost::container::small_vector<vk::WriteDescriptorSet, 16> set_writes;
     boost::container::small_vector<vk::BufferMemoryBarrier2, 16> buffer_barriers;
     Shader::PushData push_data{};
@@ -265,9 +271,21 @@ bool ComputePipeline::BindResources(VideoCore::BufferCache& buffer_cache,
         cmdbuf.pipelineBarrier2(dependencies);
     }
 
+    if (uses_push_descriptors) {
+        cmdbuf.pushDescriptorSetKHR(vk::PipelineBindPoint::eCompute, *pipeline_layout, 0,
+                                    set_writes);
+    } else {
+        const auto desc_set = desc_heap.Commit(*desc_layout);
+        for (auto& set_write : set_writes) {
+            set_write.dstSet = desc_set;
+        }
+        instance.GetDevice().updateDescriptorSets(set_writes, {});
+        cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eCompute, *pipeline_layout, 0, desc_set,
+                                  {});
+    }
+
     cmdbuf.pushConstants(*pipeline_layout, vk::ShaderStageFlagBits::eCompute, 0u, sizeof(push_data),
                          &push_data);
-    cmdbuf.pushDescriptorSetKHR(vk::PipelineBindPoint::eCompute, *pipeline_layout, 0, set_writes);
     return true;
 }
 
diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.h b/src/video_core/renderer_vulkan/vk_compute_pipeline.h
index 54eaf653..8a6213a2 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pipeline.h
+++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.h
@@ -16,12 +16,13 @@ namespace Vulkan {
 
 class Instance;
 class Scheduler;
+class DescriptorHeap;
 
 class ComputePipeline {
 public:
     explicit ComputePipeline(const Instance& instance, Scheduler& scheduler,
-                             vk::PipelineCache pipeline_cache, u64 compute_key,
-                             const Shader::Info& info, vk::ShaderModule module);
+                             DescriptorHeap& desc_heap, vk::PipelineCache pipeline_cache,
+                             u64 compute_key, const Shader::Info& info, vk::ShaderModule module);
     ~ComputePipeline();
 
     [[nodiscard]] vk::Pipeline Handle() const noexcept {
@@ -34,11 +35,13 @@ public:
 private:
     const Instance& instance;
     Scheduler& scheduler;
+    DescriptorHeap& desc_heap;
     vk::UniquePipeline pipeline;
     vk::UniquePipelineLayout pipeline_layout;
     vk::UniqueDescriptorSetLayout desc_layout;
     u64 compute_key;
     const Shader::Info* info;
+    bool uses_push_descriptors{};
 };
 
 } // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
index a548b70a..2f5209eb 100644
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
@@ -17,11 +17,11 @@
 namespace Vulkan {
 
 GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& scheduler_,
-                                   const GraphicsPipelineKey& key_,
+                                   DescriptorHeap& desc_heap_, const GraphicsPipelineKey& key_,
                                    vk::PipelineCache pipeline_cache,
                                    std::span<const Shader::Info*, MaxShaderStages> infos,
                                    std::span<const vk::ShaderModule> modules)
-    : instance{instance_}, scheduler{scheduler_}, key{key_} {
+    : instance{instance_}, scheduler{scheduler_}, desc_heap{desc_heap_}, key{key_} {
     const vk::Device device = instance.GetDevice();
     std::ranges::copy(infos, stages.begin());
     BuildDescSetLayout();
@@ -301,7 +301,6 @@ GraphicsPipeline::~GraphicsPipeline() = default;
 
 void GraphicsPipeline::BuildDescSetLayout() {
     u32 binding{};
-    boost::container::small_vector<vk::DescriptorSetLayoutBinding, 32> bindings;
     for (const auto* stage : stages) {
         if (!stage) {
             continue;
@@ -343,8 +342,12 @@ void GraphicsPipeline::BuildDescSetLayout() {
             });
         }
     }
+    uses_push_descriptors = binding < instance.MaxPushDescriptors();
+    const auto flags = uses_push_descriptors
+                           ? vk::DescriptorSetLayoutCreateFlagBits::ePushDescriptorKHR
+                           : vk::DescriptorSetLayoutCreateFlagBits{};
     const vk::DescriptorSetLayoutCreateInfo desc_layout_ci = {
-        .flags = vk::DescriptorSetLayoutCreateFlagBits::ePushDescriptorKHR,
+        .flags = flags,
         .bindingCount = static_cast<u32>(bindings.size()),
         .pBindings = bindings.data(),
     };
@@ -446,10 +449,10 @@ void GraphicsPipeline::BindResources(const Liverpool::Regs& regs,
             });
         }
 
-        boost::container::static_vector<AmdGpu::Image, 16> tsharps;
+        boost::container::static_vector<AmdGpu::Image, 32> tsharps;
         for (const auto& image_desc : stage->images) {
             const auto tsharp = image_desc.GetSharp(*stage);
-            if (tsharp) {
+            if (tsharp.GetDataFmt() != AmdGpu::DataFormat::FormatInvalid) {
                 tsharps.emplace_back(tsharp);
                 VideoCore::ImageInfo image_info{tsharp, image_desc.is_depth};
                 VideoCore::ImageViewInfo view_info{tsharp, image_desc.is_storage};
@@ -510,8 +513,18 @@ void GraphicsPipeline::BindResources(const Liverpool::Regs& regs,
     }
 
     if (!set_writes.empty()) {
-        cmdbuf.pushDescriptorSetKHR(vk::PipelineBindPoint::eGraphics, *pipeline_layout, 0,
-                                    set_writes);
+        if (uses_push_descriptors) {
+            cmdbuf.pushDescriptorSetKHR(vk::PipelineBindPoint::eGraphics, *pipeline_layout, 0,
+                                        set_writes);
+        } else {
+            const auto desc_set = desc_heap.Commit(*desc_layout);
+            for (auto& set_write : set_writes) {
+                set_write.dstSet = desc_set;
+            }
+            instance.GetDevice().updateDescriptorSets(set_writes, {});
+            cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eGraphics, *pipeline_layout, 0,
+                                      desc_set, {});
+        }
     }
     cmdbuf.pushConstants(*pipeline_layout,
                          vk::ShaderStageFlagBits::eVertex | vk::ShaderStageFlagBits::eFragment, 0U,
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
index c06ddd20..7778c417 100644
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
@@ -19,6 +19,7 @@ static constexpr u32 MaxShaderStages = 5;
 
 class Instance;
 class Scheduler;
+class DescriptorHeap;
 
 using Liverpool = AmdGpu::Liverpool;
 
@@ -59,7 +60,8 @@ struct GraphicsPipelineKey {
 class GraphicsPipeline {
 public:
     explicit GraphicsPipeline(const Instance& instance, Scheduler& scheduler,
-                              const GraphicsPipelineKey& key, vk::PipelineCache pipeline_cache,
+                              DescriptorHeap& desc_heap, const GraphicsPipelineKey& key,
+                              vk::PipelineCache pipeline_cache,
                               std::span<const Shader::Info*, MaxShaderStages> stages,
                               std::span<const vk::ShaderModule> modules);
     ~GraphicsPipeline();
@@ -98,11 +100,14 @@ private:
 private:
     const Instance& instance;
     Scheduler& scheduler;
+    DescriptorHeap& desc_heap;
     vk::UniquePipeline pipeline;
     vk::UniquePipelineLayout pipeline_layout;
     vk::UniqueDescriptorSetLayout desc_layout;
     std::array<const Shader::Info*, MaxShaderStages> stages{};
     GraphicsPipelineKey key;
+    bool uses_push_descriptors{};
+    boost::container::small_vector<vk::DescriptorSetLayoutBinding, 32> bindings;
 };
 
 } // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_instance.cpp b/src/video_core/renderer_vulkan/vk_instance.cpp
index a19ee1c7..769a808e 100644
--- a/src/video_core/renderer_vulkan/vk_instance.cpp
+++ b/src/video_core/renderer_vulkan/vk_instance.cpp
@@ -176,8 +176,10 @@ bool Instance::CreateDevice() {
         vk::PhysicalDevicePortabilitySubsetFeaturesKHR>();
     const vk::StructureChain properties_chain = physical_device.getProperties2<
         vk::PhysicalDeviceProperties2, vk::PhysicalDevicePortabilitySubsetPropertiesKHR,
-        vk::PhysicalDeviceExternalMemoryHostPropertiesEXT, vk::PhysicalDeviceVulkan11Properties>();
+        vk::PhysicalDeviceExternalMemoryHostPropertiesEXT, vk::PhysicalDeviceVulkan11Properties,
+        vk::PhysicalDevicePushDescriptorPropertiesKHR>();
     subgroup_size = properties_chain.get<vk::PhysicalDeviceVulkan11Properties>().subgroupSize;
+    push_descriptor_props = properties_chain.get<vk::PhysicalDevicePushDescriptorPropertiesKHR>();
     LOG_INFO(Render_Vulkan, "Physical device subgroup size {}", subgroup_size);
 
     features = feature_chain.get().features;
diff --git a/src/video_core/renderer_vulkan/vk_instance.h b/src/video_core/renderer_vulkan/vk_instance.h
index 52310955..a64c77a5 100644
--- a/src/video_core/renderer_vulkan/vk_instance.h
+++ b/src/video_core/renderer_vulkan/vk_instance.h
@@ -207,6 +207,11 @@ public:
         return properties.limits.maxTexelBufferElements;
     }
 
+    /// Returns the maximum number of push descriptors.
+    u32 MaxPushDescriptors() const {
+        return push_descriptor_props.maxPushDescriptors;
+    }
+
     /// Returns true if shaders can declare the ClipDistance attribute
     bool IsShaderClipDistanceSupported() const {
         return features.shaderClipDistance;
@@ -242,6 +247,7 @@ private:
     vk::PhysicalDevice physical_device;
     vk::UniqueDevice device;
     vk::PhysicalDeviceProperties properties;
+    vk::PhysicalDevicePushDescriptorPropertiesKHR push_descriptor_props;
     vk::PhysicalDeviceFeatures features;
     vk::DriverIdKHR driver_id;
     vk::UniqueDebugUtilsMessengerEXT debug_callback{};
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index b4b256bb..e19467b0 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -24,6 +24,15 @@ using Shader::VsOutput;
     return seed ^ (hash + 0x9e3779b9 + (seed << 6) + (seed >> 2));
 }
 
+constexpr static std::array DescriptorHeapSizes = {
+    vk::DescriptorPoolSize{vk::DescriptorType::eUniformBuffer, 8192},
+    vk::DescriptorPoolSize{vk::DescriptorType::eStorageBuffer, 1024},
+    vk::DescriptorPoolSize{vk::DescriptorType::eUniformTexelBuffer, 128},
+    vk::DescriptorPoolSize{vk::DescriptorType::eStorageTexelBuffer, 128},
+    vk::DescriptorPoolSize{vk::DescriptorType::eSampledImage, 8192},
+    vk::DescriptorPoolSize{vk::DescriptorType::eSampler, 1024},
+};
+
 void GatherVertexOutputs(Shader::VertexRuntimeInfo& info,
                          const AmdGpu::Liverpool::VsOutputControl& ctl) {
     const auto add_output = [&](VsOutput x, VsOutput y, VsOutput z, VsOutput w) {
@@ -120,7 +129,8 @@ Shader::RuntimeInfo PipelineCache::BuildRuntimeInfo(Shader::Stage stage) {
 
 PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_,
                              AmdGpu::Liverpool* liverpool_)
-    : instance{instance_}, scheduler{scheduler_}, liverpool{liverpool_} {
+    : instance{instance_}, scheduler{scheduler_}, liverpool{liverpool_},
+      desc_heap{instance, scheduler.GetMasterSemaphore(), DescriptorHeapSizes} {
     profile = Shader::Profile{
         .supported_spirv = instance.ApiVersion() >= VK_API_VERSION_1_3 ? 0x00010600U : 0x00010500U,
         .subgroup_size = instance.SubgroupSize(),
@@ -153,8 +163,8 @@ const GraphicsPipeline* PipelineCache::GetGraphicsPipeline() {
     }
     const auto [it, is_new] = graphics_pipelines.try_emplace(graphics_key);
     if (is_new) {
-        it.value() = std::make_unique<GraphicsPipeline>(instance, scheduler, graphics_key,
-                                                        *pipeline_cache, infos, modules);
+        it.value() = std::make_unique<GraphicsPipeline>(
+            instance, scheduler, desc_heap, graphics_key, *pipeline_cache, infos, modules);
     }
     const GraphicsPipeline* pipeline = it->second.get();
     return pipeline;
@@ -166,8 +176,8 @@ const ComputePipeline* PipelineCache::GetComputePipeline() {
     }
     const auto [it, is_new] = compute_pipelines.try_emplace(compute_key);
     if (is_new) {
-        it.value() = std::make_unique<ComputePipeline>(instance, scheduler, *pipeline_cache,
-                                                       compute_key, *infos[0], modules[0]);
+        it.value() = std::make_unique<ComputePipeline>(
+            instance, scheduler, desc_heap, *pipeline_cache, compute_key, *infos[0], modules[0]);
     }
     const ComputePipeline* pipeline = it->second.get();
     return pipeline;
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
index 96e2cd04..92dcf826 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
@@ -9,6 +9,7 @@
 #include "shader_recompiler/specialization.h"
 #include "video_core/renderer_vulkan/vk_compute_pipeline.h"
 #include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
+#include "video_core/renderer_vulkan/vk_resource_pool.h"
 
 namespace Shader {
 struct Info;
@@ -66,6 +67,7 @@ private:
     const Instance& instance;
     Scheduler& scheduler;
     AmdGpu::Liverpool* liverpool;
+    DescriptorHeap desc_heap;
     vk::UniquePipelineCache pipeline_cache;
     vk::UniquePipelineLayout pipeline_layout;
     Shader::Profile profile{};
diff --git a/src/video_core/renderer_vulkan/vk_resource_pool.cpp b/src/video_core/renderer_vulkan/vk_resource_pool.cpp
index f9f2ae0a..a5ee22c2 100644
--- a/src/video_core/renderer_vulkan/vk_resource_pool.cpp
+++ b/src/video_core/renderer_vulkan/vk_resource_pool.cpp
@@ -3,8 +3,8 @@
 
 #include <cstddef>
 #include <optional>
-#include <unordered_map>
 #include "common/assert.h"
+#include "common/scope_exit.h"
 #include "video_core/renderer_vulkan/vk_instance.h"
 #include "video_core/renderer_vulkan/vk_master_semaphore.h"
 #include "video_core/renderer_vulkan/vk_resource_pool.h"
@@ -103,88 +103,86 @@ vk::CommandBuffer CommandPool::Commit() {
     return cmd_buffers[index];
 }
 
-constexpr u32 DESCRIPTOR_SET_BATCH = 32;
-
-DescriptorHeap::DescriptorHeap(const Instance& instance, MasterSemaphore* master_semaphore,
-                               std::span<const vk::DescriptorSetLayoutBinding> bindings,
+DescriptorHeap::DescriptorHeap(const Instance& instance, MasterSemaphore* master_semaphore_,
+                               std::span<const vk::DescriptorPoolSize> pool_sizes_,
                                u32 descriptor_heap_count_)
-    : ResourcePool{master_semaphore, DESCRIPTOR_SET_BATCH}, device{instance.GetDevice()},
-      descriptor_heap_count{descriptor_heap_count_} {
-    // Create descriptor set layout.
-    const vk::DescriptorSetLayoutCreateInfo layout_ci = {
-        .bindingCount = static_cast<u32>(bindings.size()),
-        .pBindings = bindings.data(),
-    };
-    descriptor_set_layout = device.createDescriptorSetLayoutUnique(layout_ci);
-    if (instance.HasDebuggingToolAttached()) {
-        SetObjectName(device, *descriptor_set_layout, "DescriptorSetLayout");
-    }
-
-    // Build descriptor set pool counts.
-    std::unordered_map<vk::DescriptorType, u16> descriptor_type_counts;
-    for (const auto& binding : bindings) {
-        descriptor_type_counts[binding.descriptorType] += binding.descriptorCount;
-    }
-    for (const auto& [type, count] : descriptor_type_counts) {
-        auto& pool_size = pool_sizes.emplace_back();
-        pool_size.descriptorCount = count * descriptor_heap_count;
-        pool_size.type = type;
-    }
-
-    // Create descriptor pool
-    AppendDescriptorPool();
+    : device{instance.GetDevice()}, master_semaphore{master_semaphore_},
+      descriptor_heap_count{descriptor_heap_count_}, pool_sizes{pool_sizes_} {
+    CreateDescriptorPool();
 }
 
-DescriptorHeap::~DescriptorHeap() = default;
+DescriptorHeap::~DescriptorHeap() {
+    device.destroyDescriptorPool(curr_pool);
+    for (const auto [pool, tick] : pending_pools) {
+        master_semaphore->Wait(tick);
+        device.destroyDescriptorPool(pool);
+    }
+}
 
-void DescriptorHeap::Allocate(std::size_t begin, std::size_t end) {
-    ASSERT(end - begin == DESCRIPTOR_SET_BATCH);
-    descriptor_sets.resize(end);
-    hashes.resize(end);
+vk::DescriptorSet DescriptorHeap::Commit(vk::DescriptorSetLayout set_layout) {
+    const u64 set_key = std::bit_cast<u64>(set_layout);
+    const auto [it, _] = descriptor_sets.try_emplace(set_key);
 
-    std::array<vk::DescriptorSetLayout, DESCRIPTOR_SET_BATCH> layouts;
-    layouts.fill(*descriptor_set_layout);
+    // Check if allocated sets exist and pick one.
+    if (!it->second.empty()) {
+        const auto desc_set = it->second.back();
+        it.value().pop_back();
+        return desc_set;
+    }
+
+    DescSetBatch desc_sets(DescriptorSetBatch);
+    std::array<vk::DescriptorSetLayout, DescriptorSetBatch> layouts;
+    layouts.fill(set_layout);
 
-    u32 current_pool = 0;
     vk::DescriptorSetAllocateInfo alloc_info = {
-        .descriptorPool = *pools[current_pool],
-        .descriptorSetCount = DESCRIPTOR_SET_BATCH,
+        .descriptorPool = curr_pool,
+        .descriptorSetCount = DescriptorSetBatch,
         .pSetLayouts = layouts.data(),
     };
 
-    // Attempt to allocate the descriptor set batch. If the pool has run out of space, use a new
-    // one.
-    while (true) {
-        const auto result =
-            device.allocateDescriptorSets(&alloc_info, descriptor_sets.data() + begin);
-        if (result == vk::Result::eSuccess) {
-            break;
-        }
-        if (result == vk::Result::eErrorOutOfPoolMemory) {
-            current_pool++;
-            if (current_pool == pools.size()) {
-                LOG_INFO(Render_Vulkan, "Run out of pools, creating new one!");
-                AppendDescriptorPool();
-            }
-            alloc_info.descriptorPool = *pools[current_pool];
-        }
+    // Attempt to allocate the descriptor set batch.
+    auto result = device.allocateDescriptorSets(&alloc_info, desc_sets.data());
+    if (result == vk::Result::eSuccess) {
+        const auto desc_set = desc_sets.back();
+        desc_sets.pop_back();
+        it.value() = std::move(desc_sets);
+        return desc_set;
     }
+
+    // The pool has run out. Record current tick and place it in pending list.
+    ASSERT_MSG(result == vk::Result::eErrorOutOfPoolMemory,
+               "Unexpected error during descriptor set allocation {}", vk::to_string(result));
+    pending_pools.emplace_back(curr_pool, master_semaphore->CurrentTick());
+    if (const auto [pool, tick] = pending_pools.front(); master_semaphore->IsFree(tick)) {
+        curr_pool = pool;
+        pending_pools.pop_front();
+        device.resetDescriptorPool(curr_pool);
+    } else {
+        CreateDescriptorPool();
+    }
+
+    // Attempt to allocate again with fresh pool.
+    alloc_info.descriptorPool = curr_pool;
+    result = device.allocateDescriptorSets(&alloc_info, desc_sets.data());
+    ASSERT_MSG(result == vk::Result::eSuccess,
+               "Unexpected error during descriptor set allocation {}", vk::to_string(result));
+
+    // We've changed pool so also reset descriptor batch cache.
+    descriptor_sets.clear();
+    const auto desc_set = desc_sets.back();
+    desc_sets.pop_back();
+    descriptor_sets[set_key] = std::move(desc_sets);
+    return desc_set;
 }
 
-vk::DescriptorSet DescriptorHeap::Commit() {
-    const std::size_t index = CommitResource();
-    return descriptor_sets[index];
-}
-
-void DescriptorHeap::AppendDescriptorPool() {
+void DescriptorHeap::CreateDescriptorPool() {
     const vk::DescriptorPoolCreateInfo pool_info = {
         .flags = vk::DescriptorPoolCreateFlagBits::eUpdateAfterBind,
         .maxSets = descriptor_heap_count,
         .poolSizeCount = static_cast<u32>(pool_sizes.size()),
         .pPoolSizes = pool_sizes.data(),
     };
-    auto& pool = pools.emplace_back();
-    pool = device.createDescriptorPoolUnique(pool_info);
+    curr_pool = device.createDescriptorPool(pool_info);
 }
 
 } // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_resource_pool.h b/src/video_core/renderer_vulkan/vk_resource_pool.h
index b138b969..98c2ddb8 100644
--- a/src/video_core/renderer_vulkan/vk_resource_pool.h
+++ b/src/video_core/renderer_vulkan/vk_resource_pool.h
@@ -3,7 +3,9 @@
 
 #pragma once
 
+#include <deque>
 #include <vector>
+#include <boost/container/static_vector.hpp>
 #include <tsl/robin_map.h>
 
 #include "common/types.h"
@@ -62,32 +64,29 @@ private:
     std::vector<vk::CommandBuffer> cmd_buffers;
 };
 
-class DescriptorHeap final : public ResourcePool {
+class DescriptorHeap final {
+    static constexpr u32 DescriptorSetBatch = 32;
+
 public:
     explicit DescriptorHeap(const Instance& instance, MasterSemaphore* master_semaphore,
-                            std::span<const vk::DescriptorSetLayoutBinding> bindings,
+                            std::span<const vk::DescriptorPoolSize> pool_sizes,
                             u32 descriptor_heap_count = 1024);
-    ~DescriptorHeap() override;
+    ~DescriptorHeap();
 
-    const vk::DescriptorSetLayout& Layout() const {
-        return *descriptor_set_layout;
-    }
-
-    void Allocate(std::size_t begin, std::size_t end) override;
-
-    vk::DescriptorSet Commit();
+    vk::DescriptorSet Commit(vk::DescriptorSetLayout set_layout);
 
 private:
-    void AppendDescriptorPool();
+    void CreateDescriptorPool();
 
 private:
     vk::Device device;
-    vk::UniqueDescriptorSetLayout descriptor_set_layout;
+    MasterSemaphore* master_semaphore;
     u32 descriptor_heap_count;
-    std::vector<vk::DescriptorPoolSize> pool_sizes;
-    std::vector<vk::UniqueDescriptorPool> pools;
-    std::vector<vk::DescriptorSet> descriptor_sets;
-    std::vector<std::size_t> hashes;
+    std::span<const vk::DescriptorPoolSize> pool_sizes;
+    vk::DescriptorPool curr_pool;
+    std::deque<std::pair<vk::DescriptorPool, u64>> pending_pools;
+    using DescSetBatch = boost::container::static_vector<vk::DescriptorSet, DescriptorSetBatch>;
+    tsl::robin_map<u64, DescSetBatch> descriptor_sets;
 };
 
 } // namespace Vulkan
diff --git a/src/video_core/texture_cache/image.cpp b/src/video_core/texture_cache/image.cpp
index 2a5c4c43..d494322a 100644
--- a/src/video_core/texture_cache/image.cpp
+++ b/src/video_core/texture_cache/image.cpp
@@ -73,7 +73,6 @@ static vk::ImageUsageFlags ImageUsageFlags(const ImageInfo& info) {
         if (!info.IsBlockCoded() && !info.IsPacked()) {
             usage |= vk::ImageUsageFlagBits::eColorAttachment;
         }
-
         // In cases where an image is created as a render/depth target and cleared with compute,
         // we cannot predict whether it will be used as a storage image. A proper solution would
         // involve re-creating the resource with a new configuration and copying previous content
diff --git a/src/video_core/texture_cache/image_view.cpp b/src/video_core/texture_cache/image_view.cpp
index e30c1264..f94c1a37 100644
--- a/src/video_core/texture_cache/image_view.cpp
+++ b/src/video_core/texture_cache/image_view.cpp
@@ -69,7 +69,12 @@ vk::Format TrySwizzleFormat(vk::Format format, u32 dst_sel) {
 ImageViewInfo::ImageViewInfo(const AmdGpu::Image& image, bool is_storage_) noexcept
     : is_storage{is_storage_} {
     type = ConvertImageViewType(image.GetType());
-    format = Vulkan::LiverpoolToVK::SurfaceFormat(image.GetDataFmt(), image.GetNumberFmt());
+    const auto dfmt = image.GetDataFmt();
+    auto nfmt = image.GetNumberFmt();
+    if (is_storage && nfmt == AmdGpu::NumberFormat::Srgb) {
+        nfmt = AmdGpu::NumberFormat::Unorm;
+    }
+    format = Vulkan::LiverpoolToVK::SurfaceFormat(dfmt, nfmt);
     range.base.level = image.base_level;
     range.base.layer = image.base_array;
     range.extent.levels = image.last_level + 1;
@@ -143,7 +148,7 @@ ImageView::ImageView(const Vulkan::Instance& instance, const ImageViewInfo& info
             .aspectMask = aspect,
             .baseMipLevel = info.range.base.level,
             .levelCount = info.range.extent.levels - info.range.base.level,
-            .baseArrayLayer = info_.range.base.layer,
+            .baseArrayLayer = info.range.base.layer,
             .layerCount = info.range.extent.layers - info.range.base.layer,
         },
     };