diff --git a/CMakeLists.txt b/CMakeLists.txt
index 1e54f7a0..78d8421a 100755
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -664,6 +664,7 @@ set(SHADER_RECOMPILER src/shader_recompiler/exception.h
                       src/shader_recompiler/ir/passes/constant_propagation_pass.cpp
                       src/shader_recompiler/ir/passes/dead_code_elimination_pass.cpp
                       src/shader_recompiler/ir/passes/flatten_extended_userdata_pass.cpp
+                      src/shader_recompiler/ir/passes/hull_shader_transform.cpp
                       src/shader_recompiler/ir/passes/identity_removal_pass.cpp
                       src/shader_recompiler/ir/passes/ir_passes.h
                       src/shader_recompiler/ir/passes/lower_shared_mem_to_registers.cpp
@@ -683,6 +684,8 @@ set(SHADER_RECOMPILER src/shader_recompiler/exception.h
                       src/shader_recompiler/ir/opcodes.cpp
                       src/shader_recompiler/ir/opcodes.h
                       src/shader_recompiler/ir/opcodes.inc
+                      src/shader_recompiler/ir/patch.cpp
+                      src/shader_recompiler/ir/patch.h
                       src/shader_recompiler/ir/post_order.cpp
                       src/shader_recompiler/ir/post_order.h
                       src/shader_recompiler/ir/program.cpp
diff --git a/src/core/debug_state.cpp b/src/core/debug_state.cpp
index 64962492..c68fd469 100644
--- a/src/core/debug_state.cpp
+++ b/src/core/debug_state.cpp
@@ -177,10 +177,11 @@ void DebugStateImpl::PushRegsDump(uintptr_t base_addr, uintptr_t header_addr,
     }
 }
 
-void DebugStateImpl::CollectShader(const std::string& name, vk::ShaderModule module,
-                                   std::span<const u32> spv, std::span<const u32> raw_code,
-                                   std::span<const u32> patch_spv, bool is_patched) {
-    shader_dump_list.emplace_back(name, module, std::vector<u32>{spv.begin(), spv.end()},
+void DebugStateImpl::CollectShader(const std::string& name, Shader::LogicalStage l_stage,
+                                   vk::ShaderModule module, std::span<const u32> spv,
+                                   std::span<const u32> raw_code, std::span<const u32> patch_spv,
+                                   bool is_patched) {
+    shader_dump_list.emplace_back(name, l_stage, module, std::vector<u32>{spv.begin(), spv.end()},
                                   std::vector<u32>{raw_code.begin(), raw_code.end()},
                                   std::vector<u32>{patch_spv.begin(), patch_spv.end()}, is_patched);
 }
diff --git a/src/core/debug_state.h b/src/core/debug_state.h
index fa2e5cd9..0db5bc46 100644
--- a/src/core/debug_state.h
+++ b/src/core/debug_state.h
@@ -76,6 +76,7 @@ struct FrameDump {
 
 struct ShaderDump {
     std::string name;
+    Shader::LogicalStage l_stage;
     vk::ShaderModule module;
 
     std::vector<u32> spv;
@@ -90,16 +91,17 @@ struct ShaderDump {
     std::string cache_isa_disasm{};
     std::string cache_patch_disasm{};
 
-    ShaderDump(std::string name, vk::ShaderModule module, std::vector<u32> spv,
-               std::vector<u32> isa, std::vector<u32> patch_spv, bool is_patched)
-        : name(std::move(name)), module(module), spv(std::move(spv)), isa(std::move(isa)),
-          patch_spv(std::move(patch_spv)), is_patched(is_patched) {}
+    ShaderDump(std::string name, Shader::LogicalStage l_stage, vk::ShaderModule module,
+               std::vector<u32> spv, std::vector<u32> isa, std::vector<u32> patch_spv,
+               bool is_patched)
+        : name(std::move(name)), l_stage(l_stage), module(module), spv(std::move(spv)),
+          isa(std::move(isa)), patch_spv(std::move(patch_spv)), is_patched(is_patched) {}
 
     ShaderDump(const ShaderDump& other) = delete;
     ShaderDump(ShaderDump&& other) noexcept
-        : name{std::move(other.name)}, module{std::move(other.module)}, spv{std::move(other.spv)},
-          isa{std::move(other.isa)}, patch_spv{std::move(other.patch_spv)},
-          patch_source{std::move(other.patch_source)},
+        : name{std::move(other.name)}, l_stage(other.l_stage), module{std::move(other.module)},
+          spv{std::move(other.spv)}, isa{std::move(other.isa)},
+          patch_spv{std::move(other.patch_spv)}, patch_source{std::move(other.patch_source)},
           cache_spv_disasm{std::move(other.cache_spv_disasm)},
           cache_isa_disasm{std::move(other.cache_isa_disasm)},
           cache_patch_disasm{std::move(other.cache_patch_disasm)} {}
@@ -108,6 +110,7 @@ struct ShaderDump {
         if (this == &other)
             return *this;
         name = std::move(other.name);
+        l_stage = other.l_stage;
         module = std::move(other.module);
         spv = std::move(other.spv);
         isa = std::move(other.isa);
@@ -203,7 +206,8 @@ public:
     void PushRegsDump(uintptr_t base_addr, uintptr_t header_addr,
                       const AmdGpu::Liverpool::Regs& regs, bool is_compute = false);
 
-    void CollectShader(const std::string& name, vk::ShaderModule module, std::span<const u32> spv,
+    void CollectShader(const std::string& name, Shader::LogicalStage l_stage,
+                       vk::ShaderModule module, std::span<const u32> spv,
                        std::span<const u32> raw_code, std::span<const u32> patch_spv,
                        bool is_patched);
 };
diff --git a/src/core/devtools/widget/shader_list.cpp b/src/core/devtools/widget/shader_list.cpp
index 80c93971..2c97db7f 100644
--- a/src/core/devtools/widget/shader_list.cpp
+++ b/src/core/devtools/widget/shader_list.cpp
@@ -158,16 +158,17 @@ bool ShaderList::Selection::DrawShader(DebugStateType::ShaderDump& value) {
             DebugState.ShowDebugMessage(msg);
         }
         if (compile) {
-            static std::map<std::string, std::string> stage_arg = {
-                {"vs", "vert"},
-                {"gs", "geom"},
-                {"fs", "frag"},
-                {"cs", "comp"},
+            static std::map<Shader::LogicalStage, std::string> stage_arg = {
+                {Shader::LogicalStage::Vertex, "vert"},
+                {Shader::LogicalStage::TessellationControl, "tesc"},
+                {Shader::LogicalStage::TessellationEval, "tese"},
+                {Shader::LogicalStage::Geometry, "geom"},
+                {Shader::LogicalStage::Fragment, "frag"},
+                {Shader::LogicalStage::Compute, "comp"},
             };
-            auto stage = stage_arg.find(value.name.substr(0, 2));
+            auto stage = stage_arg.find(value.l_stage);
             if (stage == stage_arg.end()) {
-                DebugState.ShowDebugMessage(std::string{"Invalid shader stage: "} +
-                                            value.name.substr(0, 2));
+                DebugState.ShowDebugMessage(std::string{"Invalid shader stage"});
             } else {
                 std::string cmd =
                     fmt::format("glslc --target-env=vulkan1.3 --target-spv=spv1.6 "
diff --git a/src/core/libraries/gnmdriver/gnmdriver.cpp b/src/core/libraries/gnmdriver/gnmdriver.cpp
index dbf085fb..e85b8b89 100644
--- a/src/core/libraries/gnmdriver/gnmdriver.cpp
+++ b/src/core/libraries/gnmdriver/gnmdriver.cpp
@@ -1642,7 +1642,6 @@ s32 PS4_SYSV_ABI sceGnmSetGsShader(u32* cmdbuf, u32 size, const u32* gs_regs) {
 
 s32 PS4_SYSV_ABI sceGnmSetHsShader(u32* cmdbuf, u32 size, const u32* hs_regs, u32 param4) {
     LOG_TRACE(Lib_GnmDriver, "called");
-
     if (!cmdbuf || size < 0x1E) {
         return -1;
     }
@@ -1660,11 +1659,13 @@ s32 PS4_SYSV_ABI sceGnmSetHsShader(u32* cmdbuf, u32 size, const u32* hs_regs, u3
     cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 0x108u, hs_regs[0], 0u); // SPI_SHADER_PGM_LO_HS
     cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 0x10au, hs_regs[2],
                                      hs_regs[3]); // SPI_SHADER_PGM_RSRC1_HS/SPI_SHADER_PGM_RSRC2_HS
-    cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x286u, hs_regs[5],
-                                          hs_regs[5]);                 // VGT_HOS_MAX_TESS_LEVEL
+    cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x286u,
+                                          hs_regs[5],                  // VGT_HOS_MAX_TESS_LEVEL
+                                          hs_regs[6]);                 // VGT_HOS_MIN_TESS_LEVEL
     cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x2dbu, hs_regs[4]); // VGT_TF_PARAM
     cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x2d6u, param4);     // VGT_LS_HS_CONFIG
 
+    // right padding?
     WriteTrailingNop<11>(cmdbuf);
     return ORBIS_OK;
 }
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp
index ab9d6afa..e545e8e3 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp
@@ -1,6 +1,5 @@
 // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
 // SPDX-License-Identifier: GPL-2.0-or-later
-
 #include <span>
 #include <type_traits>
 #include <utility>
@@ -13,6 +12,7 @@
 #include "shader_recompiler/frontend/translate/translate.h"
 #include "shader_recompiler/ir/basic_block.h"
 #include "shader_recompiler/ir/program.h"
+#include "shader_recompiler/runtime_info.h"
 #include "video_core/amdgpu/types.h"
 
 namespace Shader::Backend::SPIRV {
@@ -72,7 +72,10 @@ ArgType Arg(EmitContext& ctx, const IR::Value& arg) {
         return arg.VectorReg();
     } else if constexpr (std::is_same_v<ArgType, const char*>) {
         return arg.StringLiteral();
+    } else if constexpr (std::is_same_v<ArgType, IR::Patch>) {
+        return arg.Patch();
     }
+    UNREACHABLE();
 }
 
 template <auto func, bool is_first_arg_inst, size_t... I>
@@ -206,6 +209,32 @@ Id DefineMain(EmitContext& ctx, const IR::Program& program) {
     return main;
 }
 
+spv::ExecutionMode ExecutionMode(AmdGpu::TessellationType primitive) {
+    switch (primitive) {
+    case AmdGpu::TessellationType::Isoline:
+        return spv::ExecutionMode::Isolines;
+    case AmdGpu::TessellationType::Triangle:
+        return spv::ExecutionMode::Triangles;
+    case AmdGpu::TessellationType::Quad:
+        return spv::ExecutionMode::Quads;
+    }
+    UNREACHABLE_MSG("Tessellation primitive {}", primitive);
+}
+
+spv::ExecutionMode ExecutionMode(AmdGpu::TessellationPartitioning spacing) {
+    switch (spacing) {
+    case AmdGpu::TessellationPartitioning::Integer:
+        return spv::ExecutionMode::SpacingEqual;
+    case AmdGpu::TessellationPartitioning::FracOdd:
+        return spv::ExecutionMode::SpacingFractionalOdd;
+    case AmdGpu::TessellationPartitioning::FracEven:
+        return spv::ExecutionMode::SpacingFractionalEven;
+    default:
+        break;
+    }
+    UNREACHABLE_MSG("Tessellation spacing {}", spacing);
+}
+
 void SetupCapabilities(const Info& info, const Profile& profile, EmitContext& ctx) {
     ctx.AddCapability(spv::Capability::Image1D);
     ctx.AddCapability(spv::Capability::Sampled1D);
@@ -248,36 +277,55 @@ void SetupCapabilities(const Info& info, const Profile& profile, EmitContext& ct
     if (info.uses_group_ballot) {
         ctx.AddCapability(spv::Capability::GroupNonUniformBallot);
     }
-    if (info.stage == Stage::Export || info.stage == Stage::Vertex) {
+    const auto stage = info.l_stage;
+    if (stage == LogicalStage::Vertex) {
         ctx.AddExtension("SPV_KHR_shader_draw_parameters");
         ctx.AddCapability(spv::Capability::DrawParameters);
     }
-    if (info.stage == Stage::Geometry) {
+    if (stage == LogicalStage::Geometry) {
         ctx.AddCapability(spv::Capability::Geometry);
     }
     if (info.stage == Stage::Fragment && profile.needs_manual_interpolation) {
         ctx.AddExtension("SPV_KHR_fragment_shader_barycentric");
         ctx.AddCapability(spv::Capability::FragmentBarycentricKHR);
     }
+    if (stage == LogicalStage::TessellationControl || stage == LogicalStage::TessellationEval) {
+        ctx.AddCapability(spv::Capability::Tessellation);
+    }
 }
 
-void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) {
-    const auto& info = program.info;
+void DefineEntryPoint(const Info& info, EmitContext& ctx, Id main) {
     const std::span interfaces(ctx.interfaces.data(), ctx.interfaces.size());
     spv::ExecutionModel execution_model{};
-    switch (program.info.stage) {
-    case Stage::Compute: {
+    switch (info.l_stage) {
+    case LogicalStage::Compute: {
         const std::array<u32, 3> workgroup_size{ctx.runtime_info.cs_info.workgroup_size};
         execution_model = spv::ExecutionModel::GLCompute;
         ctx.AddExecutionMode(main, spv::ExecutionMode::LocalSize, workgroup_size[0],
                              workgroup_size[1], workgroup_size[2]);
         break;
     }
-    case Stage::Export:
-    case Stage::Vertex:
+    case LogicalStage::Vertex:
         execution_model = spv::ExecutionModel::Vertex;
         break;
-    case Stage::Fragment:
+    case LogicalStage::TessellationControl:
+        execution_model = spv::ExecutionModel::TessellationControl;
+        ctx.AddCapability(spv::Capability::Tessellation);
+        ctx.AddExecutionMode(main, spv::ExecutionMode::OutputVertices,
+                             ctx.runtime_info.hs_info.NumOutputControlPoints());
+        break;
+    case LogicalStage::TessellationEval: {
+        execution_model = spv::ExecutionModel::TessellationEvaluation;
+        const auto& vs_info = ctx.runtime_info.vs_info;
+        ctx.AddExecutionMode(main, ExecutionMode(vs_info.tess_type));
+        ctx.AddExecutionMode(main, ExecutionMode(vs_info.tess_partitioning));
+        ctx.AddExecutionMode(main,
+                             vs_info.tess_topology == AmdGpu::TessellationTopology::TriangleCcw
+                                 ? spv::ExecutionMode::VertexOrderCcw
+                                 : spv::ExecutionMode::VertexOrderCw);
+        break;
+    }
+    case LogicalStage::Fragment:
         execution_model = spv::ExecutionModel::Fragment;
         if (ctx.profile.lower_left_origin_mode) {
             ctx.AddExecutionMode(main, spv::ExecutionMode::OriginLowerLeft);
@@ -292,7 +340,7 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) {
             ctx.AddExecutionMode(main, spv::ExecutionMode::DepthReplacing);
         }
         break;
-    case Stage::Geometry:
+    case LogicalStage::Geometry:
         execution_model = spv::ExecutionModel::Geometry;
         ctx.AddExecutionMode(main, GetInputPrimitiveType(ctx.runtime_info.gs_info.in_primitive));
         ctx.AddExecutionMode(main,
@@ -303,7 +351,7 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) {
                              ctx.runtime_info.gs_info.num_invocations);
         break;
     default:
-        throw NotImplementedException("Stage {}", u32(program.info.stage));
+        UNREACHABLE_MSG("Stage {}", u32(info.stage));
     }
     ctx.AddEntryPoint(execution_model, main, "main", interfaces);
 }
@@ -349,7 +397,7 @@ std::vector<u32> EmitSPIRV(const Profile& profile, const RuntimeInfo& runtime_in
                            const IR::Program& program, Bindings& binding) {
     EmitContext ctx{profile, runtime_info, program.info, binding};
     const Id main{DefineMain(ctx, program)};
-    DefineEntryPoint(program, ctx, main);
+    DefineEntryPoint(program.info, ctx, main);
     SetupCapabilities(program.info, profile, ctx);
     SetupFloatMode(ctx, profile, runtime_info, main);
     PatchPhiNodes(program, ctx);
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_barriers.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_barriers.cpp
index 22b3523a..611225e8 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_barriers.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_barriers.cpp
@@ -18,9 +18,16 @@ void MemoryBarrier(EmitContext& ctx, spv::Scope scope) {
 
 void EmitBarrier(EmitContext& ctx) {
     const auto execution{spv::Scope::Workgroup};
-    const auto memory{spv::Scope::Workgroup};
-    const auto memory_semantics{spv::MemorySemanticsMask::AcquireRelease |
-                                spv::MemorySemanticsMask::WorkgroupMemory};
+    spv::Scope memory;
+    spv::MemorySemanticsMask memory_semantics;
+    if (ctx.l_stage == Shader::LogicalStage::TessellationControl) {
+        memory = spv::Scope::Invocation;
+        memory_semantics = spv::MemorySemanticsMask::MaskNone;
+    } else {
+        memory = spv::Scope::Workgroup;
+        memory_semantics =
+            spv::MemorySemanticsMask::AcquireRelease | spv::MemorySemanticsMask::WorkgroupMemory;
+    }
     ctx.OpControlBarrier(ctx.ConstU32(static_cast<u32>(execution)),
                          ctx.ConstU32(static_cast<u32>(memory)),
                          ctx.ConstU32(static_cast<u32>(memory_semantics)));
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
index d005169c..f3db6af5 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
@@ -4,6 +4,9 @@
 #include "common/assert.h"
 #include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
 #include "shader_recompiler/backend/spirv/spirv_emit_context.h"
+#include "shader_recompiler/ir/attribute.h"
+#include "shader_recompiler/ir/patch.h"
+#include "shader_recompiler/runtime_info.h"
 
 #include <magic_enum/magic_enum.hpp>
 
@@ -45,13 +48,19 @@ Id VsOutputAttrPointer(EmitContext& ctx, VsOutput output) {
 
 Id OutputAttrPointer(EmitContext& ctx, IR::Attribute attr, u32 element) {
     if (IR::IsParam(attr)) {
-        const u32 index{u32(attr) - u32(IR::Attribute::Param0)};
-        const auto& info{ctx.output_params.at(index)};
-        ASSERT(info.num_components > 0);
-        if (info.num_components == 1) {
-            return info.id;
+        const u32 attr_index{u32(attr) - u32(IR::Attribute::Param0)};
+        if (ctx.stage == Stage::Local && ctx.runtime_info.ls_info.links_with_tcs) {
+            const auto component_ptr = ctx.TypePointer(spv::StorageClass::Output, ctx.F32[1]);
+            return ctx.OpAccessChain(component_ptr, ctx.output_attr_array, ctx.ConstU32(attr_index),
+                                     ctx.ConstU32(element));
         } else {
-            return ctx.OpAccessChain(info.pointer_type, info.id, ctx.ConstU32(element));
+            const auto& info{ctx.output_params.at(attr_index)};
+            ASSERT(info.num_components > 0);
+            if (info.num_components == 1) {
+                return info.id;
+            } else {
+                return ctx.OpAccessChain(info.pointer_type, info.id, ctx.ConstU32(element));
+            }
         }
     }
     if (IR::IsMrt(attr)) {
@@ -82,9 +91,13 @@ Id OutputAttrPointer(EmitContext& ctx, IR::Attribute attr, u32 element) {
 
 std::pair<Id, bool> OutputAttrComponentType(EmitContext& ctx, IR::Attribute attr) {
     if (IR::IsParam(attr)) {
-        const u32 index{u32(attr) - u32(IR::Attribute::Param0)};
-        const auto& info{ctx.output_params.at(index)};
-        return {info.component_type, info.is_integer};
+        if (ctx.stage == Stage::Local && ctx.runtime_info.ls_info.links_with_tcs) {
+            return {ctx.F32[1], false};
+        } else {
+            const u32 index{u32(attr) - u32(IR::Attribute::Param0)};
+            const auto& info{ctx.output_params.at(index)};
+            return {info.component_type, info.is_integer};
+        }
     }
     if (IR::IsMrt(attr)) {
         const u32 index{u32(attr) - u32(IR::Attribute::RenderTarget0)};
@@ -171,12 +184,11 @@ Id EmitReadStepRate(EmitContext& ctx, int rate_idx) {
                                       rate_idx == 0 ? ctx.u32_zero_value : ctx.u32_one_value));
 }
 
-Id EmitGetAttributeForGeometry(EmitContext& ctx, IR::Attribute attr, u32 comp, u32 index) {
+Id EmitGetAttributeForGeometry(EmitContext& ctx, IR::Attribute attr, u32 comp, Id index) {
     if (IR::IsPosition(attr)) {
         ASSERT(attr == IR::Attribute::Position0);
         const auto position_arr_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[4]);
-        const auto pointer{
-            ctx.OpAccessChain(position_arr_ptr, ctx.gl_in, ctx.ConstU32(index), ctx.ConstU32(0u))};
+        const auto pointer{ctx.OpAccessChain(position_arr_ptr, ctx.gl_in, index, ctx.ConstU32(0u))};
         const auto position_comp_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[1]);
         return ctx.OpLoad(ctx.F32[1],
                           ctx.OpAccessChain(position_comp_ptr, pointer, ctx.ConstU32(comp)));
@@ -186,7 +198,7 @@ Id EmitGetAttributeForGeometry(EmitContext& ctx, IR::Attribute attr, u32 comp, u
         const u32 param_id{u32(attr) - u32(IR::Attribute::Param0)};
         const auto param = ctx.input_params.at(param_id).id;
         const auto param_arr_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[4]);
-        const auto pointer{ctx.OpAccessChain(param_arr_ptr, param, ctx.ConstU32(index))};
+        const auto pointer{ctx.OpAccessChain(param_arr_ptr, param, index)};
         const auto position_comp_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[1]);
         return ctx.OpLoad(ctx.F32[1],
                           ctx.OpAccessChain(position_comp_ptr, pointer, ctx.ConstU32(comp)));
@@ -194,9 +206,27 @@ Id EmitGetAttributeForGeometry(EmitContext& ctx, IR::Attribute attr, u32 comp, u
     UNREACHABLE();
 }
 
-Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp, u32 index) {
-    if (ctx.info.stage == Stage::Geometry) {
+Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp, Id index) {
+    if (ctx.info.l_stage == LogicalStage::Geometry) {
         return EmitGetAttributeForGeometry(ctx, attr, comp, index);
+    } else if (ctx.info.l_stage == LogicalStage::TessellationControl ||
+               ctx.info.l_stage == LogicalStage::TessellationEval) {
+        if (IR::IsTessCoord(attr)) {
+            const u32 component = attr == IR::Attribute::TessellationEvaluationPointU ? 0 : 1;
+            const auto component_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[1]);
+            const auto pointer{
+                ctx.OpAccessChain(component_ptr, ctx.tess_coord, ctx.ConstU32(component))};
+            return ctx.OpLoad(ctx.F32[1], pointer);
+        } else if (IR::IsParam(attr)) {
+            const u32 param_id{u32(attr) - u32(IR::Attribute::Param0)};
+            const auto param = ctx.input_params.at(param_id).id;
+            const auto param_arr_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[4]);
+            const auto pointer{ctx.OpAccessChain(param_arr_ptr, param, index)};
+            const auto position_comp_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[1]);
+            return ctx.OpLoad(ctx.F32[1],
+                              ctx.OpAccessChain(position_comp_ptr, pointer, ctx.ConstU32(comp)));
+        }
+        UNREACHABLE();
     }
 
     if (IR::IsParam(attr)) {
@@ -242,8 +272,14 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp, u32 index) {
         }
         return coord;
     }
+    case IR::Attribute::TessellationEvaluationPointU:
+        return ctx.OpLoad(ctx.F32[1],
+                          ctx.OpAccessChain(ctx.input_f32, ctx.tess_coord, ctx.u32_zero_value));
+    case IR::Attribute::TessellationEvaluationPointV:
+        return ctx.OpLoad(ctx.F32[1],
+                          ctx.OpAccessChain(ctx.input_f32, ctx.tess_coord, ctx.ConstU32(1U)));
     default:
-        throw NotImplementedException("Read attribute {}", attr);
+        UNREACHABLE_MSG("Read attribute {}", attr);
     }
 }
 
@@ -266,10 +302,32 @@ Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp) {
         return ctx.OpSelect(ctx.U32[1], ctx.OpLoad(ctx.U1[1], ctx.front_facing), ctx.u32_one_value,
                             ctx.u32_zero_value);
     case IR::Attribute::PrimitiveId:
-        ASSERT(ctx.info.stage == Stage::Geometry);
         return ctx.OpLoad(ctx.U32[1], ctx.primitive_id);
+    case IR::Attribute::InvocationId:
+        ASSERT(ctx.info.l_stage == LogicalStage::Geometry ||
+               ctx.info.l_stage == LogicalStage::TessellationControl);
+        return ctx.OpLoad(ctx.U32[1], ctx.invocation_id);
+    case IR::Attribute::PatchVertices:
+        ASSERT(ctx.info.l_stage == LogicalStage::TessellationControl);
+        return ctx.OpLoad(ctx.U32[1], ctx.patch_vertices);
+    case IR::Attribute::PackedHullInvocationInfo: {
+        ASSERT(ctx.info.l_stage == LogicalStage::TessellationControl);
+        // [0:8]: patch id within VGT
+        // [8:12]: output control point id
+        // But 0:8 should be treated as 0 for attribute addressing purposes
+        if (ctx.runtime_info.hs_info.IsPassthrough()) {
+            // Gcn shader would run with 1 thread, but we need to run a thread for
+            // each output control point.
+            // If Gcn shader uses this value, we should make sure all threads in the
+            // Vulkan shader use 0
+            return ctx.ConstU32(0u);
+        } else {
+            const Id invocation_id = ctx.OpLoad(ctx.U32[1], ctx.invocation_id);
+            return ctx.OpShiftLeftLogical(ctx.U32[1], invocation_id, ctx.ConstU32(8u));
+        }
+    }
     default:
-        throw NotImplementedException("Read U32 attribute {}", attr);
+        UNREACHABLE_MSG("Read U32 attribute {}", attr);
     }
 }
 
@@ -287,6 +345,58 @@ void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 elemen
     }
 }
 
+Id EmitGetTessGenericAttribute(EmitContext& ctx, Id vertex_index, Id attr_index, Id comp_index) {
+    const auto attr_comp_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[1]);
+    return ctx.OpLoad(ctx.F32[1], ctx.OpAccessChain(attr_comp_ptr, ctx.input_attr_array,
+                                                    vertex_index, attr_index, comp_index));
+}
+
+void EmitSetTcsGenericAttribute(EmitContext& ctx, Id value, Id attr_index, Id comp_index) {
+    // Implied vertex index is invocation_id
+    const auto component_ptr = ctx.TypePointer(spv::StorageClass::Output, ctx.F32[1]);
+    Id pointer =
+        ctx.OpAccessChain(component_ptr, ctx.output_attr_array,
+                          ctx.OpLoad(ctx.U32[1], ctx.invocation_id), attr_index, comp_index);
+    ctx.OpStore(pointer, value);
+}
+
+Id EmitGetPatch(EmitContext& ctx, IR::Patch patch) {
+    const u32 index{IR::GenericPatchIndex(patch)};
+    const Id element{ctx.ConstU32(IR::GenericPatchElement(patch))};
+    const Id type{ctx.l_stage == LogicalStage::TessellationControl ? ctx.output_f32
+                                                                   : ctx.input_f32};
+    const Id pointer{ctx.OpAccessChain(type, ctx.patches.at(index), element)};
+    return ctx.OpLoad(ctx.F32[1], pointer);
+}
+
+void EmitSetPatch(EmitContext& ctx, IR::Patch patch, Id value) {
+    const Id pointer{[&] {
+        if (IR::IsGeneric(patch)) {
+            const u32 index{IR::GenericPatchIndex(patch)};
+            const Id element{ctx.ConstU32(IR::GenericPatchElement(patch))};
+            return ctx.OpAccessChain(ctx.output_f32, ctx.patches.at(index), element);
+        }
+        switch (patch) {
+        case IR::Patch::TessellationLodLeft:
+        case IR::Patch::TessellationLodRight:
+        case IR::Patch::TessellationLodTop:
+        case IR::Patch::TessellationLodBottom: {
+            const u32 index{static_cast<u32>(patch) - u32(IR::Patch::TessellationLodLeft)};
+            const Id index_id{ctx.ConstU32(index)};
+            return ctx.OpAccessChain(ctx.output_f32, ctx.output_tess_level_outer, index_id);
+        }
+        case IR::Patch::TessellationLodInteriorU:
+            return ctx.OpAccessChain(ctx.output_f32, ctx.output_tess_level_inner,
+                                     ctx.u32_zero_value);
+        case IR::Patch::TessellationLodInteriorV:
+            return ctx.OpAccessChain(ctx.output_f32, ctx.output_tess_level_inner, ctx.ConstU32(1u));
+        default:
+            UNREACHABLE_MSG("Patch {}", u32(patch));
+        }
+    }()};
+    ctx.OpStore(pointer, value);
+}
+
 template <u32 N>
 static Id EmitLoadBufferU32xN(EmitContext& ctx, u32 handle, Id address) {
     auto& buffer = ctx.buffers[handle];
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h
index 057b0d69..f71c61af 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h
@@ -9,6 +9,7 @@
 namespace Shader::IR {
 enum class Attribute : u64;
 enum class ScalarReg : u32;
+enum class Patch : u64;
 class Inst;
 class Value;
 } // namespace Shader::IR
@@ -27,8 +28,6 @@ Id EmitConditionRef(EmitContext& ctx, const IR::Value& value);
 void EmitReference(EmitContext&);
 void EmitPhiMove(EmitContext&);
 void EmitJoin(EmitContext& ctx);
-void EmitWorkgroupMemoryBarrier(EmitContext& ctx);
-void EmitDeviceMemoryBarrier(EmitContext& ctx);
 void EmitGetScc(EmitContext& ctx);
 void EmitGetExec(EmitContext& ctx);
 void EmitGetVcc(EmitContext& ctx);
@@ -85,9 +84,13 @@ Id EmitBufferAtomicAnd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id addres
 Id EmitBufferAtomicOr32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
 Id EmitBufferAtomicXor32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
 Id EmitBufferAtomicSwap32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
-Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp, u32 index);
+Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp, Id index);
 Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp);
 void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 comp);
+Id EmitGetTessGenericAttribute(EmitContext& ctx, Id vertex_index, Id attr_index, Id comp_index);
+void EmitSetTcsGenericAttribute(EmitContext& ctx, Id value, Id attr_index, Id comp_index);
+Id EmitGetPatch(EmitContext& ctx, IR::Patch patch);
+void EmitSetPatch(EmitContext& ctx, IR::Patch patch, Id value);
 void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, Id value);
 void EmitSetSampleMask(EmitContext& ctx, Id value);
 void EmitSetFragDepth(EmitContext& ctx, Id value);
diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
index 1ada2f1f..2e09e70a 100644
--- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
+++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
@@ -6,6 +6,7 @@
 #include "shader_recompiler/backend/spirv/spirv_emit_context.h"
 #include "shader_recompiler/frontend/fetch_shader.h"
 #include "shader_recompiler/ir/passes/srt.h"
+#include "shader_recompiler/runtime_info.h"
 #include "video_core/amdgpu/types.h"
 
 #include <boost/container/static_vector.hpp>
@@ -34,7 +35,7 @@ std::string_view StageName(Stage stage) {
     case Stage::Compute:
         return "cs";
     }
-    throw InvalidArgument("Invalid stage {}", u32(stage));
+    UNREACHABLE_MSG("Invalid hw stage {}", u32(stage));
 }
 
 static constexpr u32 NumVertices(AmdGpu::PrimitiveType type) {
@@ -65,7 +66,7 @@ void Name(EmitContext& ctx, Id object, std::string_view format_str, Args&&... ar
 EmitContext::EmitContext(const Profile& profile_, const RuntimeInfo& runtime_info_,
                          const Info& info_, Bindings& binding_)
     : Sirit::Module(profile_.supported_spirv), info{info_}, runtime_info{runtime_info_},
-      profile{profile_}, stage{info.stage}, binding{binding_} {
+      profile{profile_}, stage{info.stage}, l_stage{info.l_stage}, binding{binding_} {
     AddCapability(spv::Capability::Shader);
     DefineArithmeticTypes();
     DefineInterfaces();
@@ -268,9 +269,8 @@ void EmitContext::DefineInputs() {
             U32[1], spv::BuiltIn::SubgroupLocalInvocationId, spv::StorageClass::Input);
         Decorate(subgroup_local_invocation_id, spv::Decoration::Flat);
     }
-    switch (stage) {
-    case Stage::Export:
-    case Stage::Vertex: {
+    switch (l_stage) {
+    case LogicalStage::Vertex: {
         vertex_index = DefineVariable(U32[1], spv::BuiltIn::VertexIndex, spv::StorageClass::Input);
         base_vertex = DefineVariable(U32[1], spv::BuiltIn::BaseVertex, spv::StorageClass::Input);
         instance_id = DefineVariable(U32[1], spv::BuiltIn::InstanceIndex, spv::StorageClass::Input);
@@ -311,12 +311,11 @@ void EmitContext::DefineInputs() {
                 }
                 input_params[attrib.semantic] =
                     GetAttributeInfo(sharp.GetNumberFmt(), id, 4, false);
-                interfaces.push_back(id);
             }
         }
         break;
     }
-    case Stage::Fragment:
+    case LogicalStage::Fragment:
         frag_coord = DefineVariable(F32[4], spv::BuiltIn::FragCoord, spv::StorageClass::Input);
         frag_depth = DefineVariable(F32[1], spv::BuiltIn::FragDepth, spv::StorageClass::Output);
         front_facing = DefineVariable(U1[1], spv::BuiltIn::FrontFacing, spv::StorageClass::Input);
@@ -351,15 +350,14 @@ void EmitContext::DefineInputs() {
             }
             input_params[semantic] =
                 GetAttributeInfo(AmdGpu::NumberFormat::Float, attr_id, num_components, false);
-            interfaces.push_back(attr_id);
         }
         break;
-    case Stage::Compute:
+    case LogicalStage::Compute:
         workgroup_id = DefineVariable(U32[3], spv::BuiltIn::WorkgroupId, spv::StorageClass::Input);
         local_invocation_id =
             DefineVariable(U32[3], spv::BuiltIn::LocalInvocationId, spv::StorageClass::Input);
         break;
-    case Stage::Geometry: {
+    case LogicalStage::Geometry: {
         primitive_id = DefineVariable(U32[1], spv::BuiltIn::PrimitiveId, spv::StorageClass::Input);
         const auto gl_per_vertex =
             Name(TypeStruct(TypeVector(F32[1], 4), F32[1], TypeArray(F32[1], ConstU32(1u))),
@@ -389,15 +387,129 @@ void EmitContext::DefineInputs() {
         }
         break;
     }
+    case LogicalStage::TessellationControl: {
+        invocation_id =
+            DefineVariable(U32[1], spv::BuiltIn::InvocationId, spv::StorageClass::Input);
+        patch_vertices =
+            DefineVariable(U32[1], spv::BuiltIn::PatchVertices, spv::StorageClass::Input);
+        primitive_id = DefineVariable(U32[1], spv::BuiltIn::PrimitiveId, spv::StorageClass::Input);
+
+        const u32 num_attrs = runtime_info.hs_info.ls_stride >> 4;
+        if (num_attrs > 0) {
+            const Id per_vertex_type{TypeArray(F32[4], ConstU32(num_attrs))};
+            // The input vertex count isn't statically known, so make length 32 (what glslang does)
+            const Id patch_array_type{TypeArray(per_vertex_type, ConstU32(32u))};
+            input_attr_array = DefineInput(patch_array_type, 0);
+            Name(input_attr_array, "in_attrs");
+        }
+        break;
+    }
+    case LogicalStage::TessellationEval: {
+        tess_coord = DefineInput(F32[3], std::nullopt, spv::BuiltIn::TessCoord);
+        primitive_id = DefineVariable(U32[1], spv::BuiltIn::PrimitiveId, spv::StorageClass::Input);
+
+        const u32 num_attrs = runtime_info.vs_info.hs_output_cp_stride >> 4;
+        if (num_attrs > 0) {
+            const Id per_vertex_type{TypeArray(F32[4], ConstU32(num_attrs))};
+            // The input vertex count isn't statically known, so make length 32 (what glslang does)
+            const Id patch_array_type{TypeArray(per_vertex_type, ConstU32(32u))};
+            input_attr_array = DefineInput(patch_array_type, 0);
+            Name(input_attr_array, "in_attrs");
+        }
+
+        u32 patch_base_location = runtime_info.vs_info.hs_output_cp_stride >> 4;
+        for (size_t index = 0; index < 30; ++index) {
+            if (!(info.uses_patches & (1U << index))) {
+                continue;
+            }
+            const Id id{DefineInput(F32[4], patch_base_location + index)};
+            Decorate(id, spv::Decoration::Patch);
+            Name(id, fmt::format("patch_in{}", index));
+            patches[index] = id;
+        }
+        break;
+    }
     default:
         break;
     }
 }
 
 void EmitContext::DefineOutputs() {
-    switch (stage) {
-    case Stage::Export:
-    case Stage::Vertex: {
+    switch (l_stage) {
+    case LogicalStage::Vertex: {
+        // No point in defining builtin outputs (i.e. position) unless next stage is fragment?
+        // Might cause problems linking with tcs
+
+        output_position = DefineVariable(F32[4], spv::BuiltIn::Position, spv::StorageClass::Output);
+        const bool has_extra_pos_stores = info.stores.Get(IR::Attribute::Position1) ||
+                                          info.stores.Get(IR::Attribute::Position2) ||
+                                          info.stores.Get(IR::Attribute::Position3);
+        if (has_extra_pos_stores) {
+            const Id type{TypeArray(F32[1], ConstU32(8U))};
+            clip_distances =
+                DefineVariable(type, spv::BuiltIn::ClipDistance, spv::StorageClass::Output);
+            cull_distances =
+                DefineVariable(type, spv::BuiltIn::CullDistance, spv::StorageClass::Output);
+        }
+        if (stage == Shader::Stage::Local && runtime_info.ls_info.links_with_tcs) {
+            const u32 num_attrs = runtime_info.ls_info.ls_stride >> 4;
+            if (num_attrs > 0) {
+                const Id type{TypeArray(F32[4], ConstU32(num_attrs))};
+                output_attr_array = DefineOutput(type, 0);
+                Name(output_attr_array, "out_attrs");
+            }
+        } else {
+            for (u32 i = 0; i < IR::NumParams; i++) {
+                const IR::Attribute param{IR::Attribute::Param0 + i};
+                if (!info.stores.GetAny(param)) {
+                    continue;
+                }
+                const u32 num_components = info.stores.NumComponents(param);
+                const Id id{DefineOutput(F32[num_components], i)};
+                Name(id, fmt::format("out_attr{}", i));
+                output_params[i] =
+                    GetAttributeInfo(AmdGpu::NumberFormat::Float, id, num_components, true);
+            }
+        }
+        break;
+    }
+    case LogicalStage::TessellationControl: {
+        if (info.stores_tess_level_outer) {
+            const Id type{TypeArray(F32[1], ConstU32(4U))};
+            output_tess_level_outer =
+                DefineOutput(type, std::nullopt, spv::BuiltIn::TessLevelOuter);
+            Decorate(output_tess_level_outer, spv::Decoration::Patch);
+        }
+        if (info.stores_tess_level_inner) {
+            const Id type{TypeArray(F32[1], ConstU32(2U))};
+            output_tess_level_inner =
+                DefineOutput(type, std::nullopt, spv::BuiltIn::TessLevelInner);
+            Decorate(output_tess_level_inner, spv::Decoration::Patch);
+        }
+
+        const u32 num_attrs = runtime_info.hs_info.hs_output_cp_stride >> 4;
+        if (num_attrs > 0) {
+            const Id per_vertex_type{TypeArray(F32[4], ConstU32(num_attrs))};
+            // The input vertex count isn't statically known, so make length 32 (what glslang does)
+            const Id patch_array_type{TypeArray(
+                per_vertex_type, ConstU32(runtime_info.hs_info.NumOutputControlPoints()))};
+            output_attr_array = DefineOutput(patch_array_type, 0);
+            Name(output_attr_array, "out_attrs");
+        }
+
+        u32 patch_base_location = runtime_info.hs_info.hs_output_cp_stride >> 4;
+        for (size_t index = 0; index < 30; ++index) {
+            if (!(info.uses_patches & (1U << index))) {
+                continue;
+            }
+            const Id id{DefineOutput(F32[4], patch_base_location + index)};
+            Decorate(id, spv::Decoration::Patch);
+            Name(id, fmt::format("patch_out{}", index));
+            patches[index] = id;
+        }
+        break;
+    }
+    case LogicalStage::TessellationEval: {
         output_position = DefineVariable(F32[4], spv::BuiltIn::Position, spv::StorageClass::Output);
         const bool has_extra_pos_stores = info.stores.Get(IR::Attribute::Position1) ||
                                           info.stores.Get(IR::Attribute::Position2) ||
@@ -419,11 +531,10 @@ void EmitContext::DefineOutputs() {
             Name(id, fmt::format("out_attr{}", i));
             output_params[i] =
                 GetAttributeInfo(AmdGpu::NumberFormat::Float, id, num_components, true);
-            interfaces.push_back(id);
         }
         break;
     }
-    case Stage::Fragment:
+    case LogicalStage::Fragment:
         for (u32 i = 0; i < IR::NumRenderTargets; i++) {
             const IR::Attribute mrt{IR::Attribute::RenderTarget0 + i};
             if (!info.stores.GetAny(mrt)) {
@@ -435,22 +546,22 @@ void EmitContext::DefineOutputs() {
             const Id id{DefineOutput(type, i)};
             Name(id, fmt::format("frag_color{}", i));
             frag_outputs[i] = GetAttributeInfo(num_format, id, num_components, true);
-            interfaces.push_back(id);
         }
         break;
-    case Stage::Geometry: {
+    case LogicalStage::Geometry: {
         output_position = DefineVariable(F32[4], spv::BuiltIn::Position, spv::StorageClass::Output);
 
         for (u32 attr_id = 0; attr_id < info.gs_copy_data.num_attrs; attr_id++) {
             const Id id{DefineOutput(F32[4], attr_id)};
             Name(id, fmt::format("out_attr{}", attr_id));
             output_params[attr_id] = {id, output_f32, F32[1], 4u};
-            interfaces.push_back(id);
         }
         break;
     }
-    default:
+    case LogicalStage::Compute:
         break;
+    default:
+        UNREACHABLE();
     }
 }
 
diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.h b/src/shader_recompiler/backend/spirv/spirv_emit_context.h
index cd129332..583d96b9 100644
--- a/src/shader_recompiler/backend/spirv/spirv_emit_context.h
+++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.h
@@ -46,14 +46,18 @@ public:
     void DefineBufferOffsets();
     void DefineInterpolatedAttribs();
 
-    [[nodiscard]] Id DefineInput(Id type, u32 location) {
-        const Id input_id{DefineVar(type, spv::StorageClass::Input)};
-        Decorate(input_id, spv::Decoration::Location, location);
+    [[nodiscard]] Id DefineInput(Id type, std::optional<u32> location = std::nullopt,
+                                 std::optional<spv::BuiltIn> builtin = std::nullopt) {
+        const Id input_id{DefineVariable(type, builtin, spv::StorageClass::Input)};
+        if (location) {
+            Decorate(input_id, spv::Decoration::Location, *location);
+        }
         return input_id;
     }
 
-    [[nodiscard]] Id DefineOutput(Id type, std::optional<u32> location = std::nullopt) {
-        const Id output_id{DefineVar(type, spv::StorageClass::Output)};
+    [[nodiscard]] Id DefineOutput(Id type, std::optional<u32> location = std::nullopt,
+                                  std::optional<spv::BuiltIn> builtin = std::nullopt) {
+        const Id output_id{DefineVariable(type, builtin, spv::StorageClass::Output)};
         if (location) {
             Decorate(output_id, spv::Decoration::Location, *location);
         }
@@ -131,7 +135,8 @@ public:
     const Info& info;
     const RuntimeInfo& runtime_info;
     const Profile& profile;
-    Stage stage{};
+    Stage stage;
+    LogicalStage l_stage{};
 
     Id void_id{};
     Id U8{};
@@ -188,8 +193,15 @@ public:
     Id clip_distances{};
     Id cull_distances{};
 
+    Id patch_vertices{};
+    Id output_tess_level_outer{};
+    Id output_tess_level_inner{};
+    Id tess_coord;
+    std::array<Id, 30> patches{};
+
     Id workgroup_id{};
     Id local_invocation_id{};
+    Id invocation_id{}; // for instanced geoshaders or output vertices within TCS patch
     Id subgroup_local_invocation_id{};
     Id image_u32{};
 
@@ -252,6 +264,8 @@ public:
         bool is_loaded{};
         s32 buffer_handle{-1};
     };
+    Id input_attr_array;
+    Id output_attr_array;
     std::array<SpirvAttribute, IR::NumParams> input_params{};
     std::array<SpirvAttribute, IR::NumParams> output_params{};
     std::array<SpirvAttribute, IR::NumRenderTargets> frag_outputs{};
diff --git a/src/shader_recompiler/frontend/tessellation.h b/src/shader_recompiler/frontend/tessellation.h
new file mode 100644
index 00000000..bfcaa4fd
--- /dev/null
+++ b/src/shader_recompiler/frontend/tessellation.h
@@ -0,0 +1,38 @@
+// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#pragma once
+
+#include "common/types.h"
+
+namespace Shader {
+
+struct TessellationDataConstantBuffer {
+    u32 ls_stride;
+    u32 hs_cp_stride;      // HullStateConstants::m_cpStride != 0 ? HullStateConstants::m_cpStride :
+                           // ls_stride
+    u32 num_patches;       // num patches submitted in threadgroup
+    u32 hs_output_base;    // HullStateConstants::m_numInputCP::m_cpStride != 0 ?
+                           // HullStateConstants::m_numInputCP * ls_stride * num_patches : 0
+                           // basically 0 when passthrough
+    u32 patch_const_size;  // 16 * num_patch_attrs
+    u32 patch_const_base;  // hs_output_base + patch_output_size
+    u32 patch_output_size; // output_cp_stride * num_output_cp_per_patch
+    f32 off_chip_tessellation_factor_threshold;
+    u32 first_edge_tess_factor_index;
+};
+
+// Assign names to dword fields of TessellationDataConstantBuffer
+enum class TessConstantAttribute : u32 {
+    LsStride,
+    HsCpStride,
+    HsNumPatch,
+    HsOutputBase,
+    PatchConstSize,
+    PatchConstBase,
+    PatchOutputSize,
+    OffChipTessellationFactorThreshold,
+    FirstEdgeTessFactorIndex,
+};
+
+} // namespace Shader
\ No newline at end of file
diff --git a/src/shader_recompiler/frontend/translate/data_share.cpp b/src/shader_recompiler/frontend/translate/data_share.cpp
index 5914f9fe..116935b9 100644
--- a/src/shader_recompiler/frontend/translate/data_share.cpp
+++ b/src/shader_recompiler/frontend/translate/data_share.cpp
@@ -1,8 +1,8 @@
 // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
 // SPDX-License-Identifier: GPL-2.0-or-later
-
 #include "shader_recompiler/frontend/translate/translate.h"
 #include "shader_recompiler/ir/reg.h"
+#include "shader_recompiler/runtime_info.h"
 
 namespace Shader::Gcn {
 
@@ -73,10 +73,11 @@ void Translator::EmitDataShare(const GcnInst& inst) {
 void Translator::V_READFIRSTLANE_B32(const GcnInst& inst) {
     const IR::U32 value{GetSrc(inst.src[0])};
 
-    if (info.stage != Stage::Compute) {
-        SetDst(inst.dst[0], value);
-    } else {
+    if (info.l_stage == LogicalStage::Compute ||
+        info.l_stage == LogicalStage::TessellationControl) {
         SetDst(inst.dst[0], ir.ReadFirstLane(value));
+    } else {
+        SetDst(inst.dst[0], value);
     }
 }
 
diff --git a/src/shader_recompiler/frontend/translate/scalar_alu.cpp b/src/shader_recompiler/frontend/translate/scalar_alu.cpp
index 5b411d83..1ef0d82d 100644
--- a/src/shader_recompiler/frontend/translate/scalar_alu.cpp
+++ b/src/shader_recompiler/frontend/translate/scalar_alu.cpp
@@ -1,6 +1,8 @@
 // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
 // SPDX-License-Identifier: GPL-2.0-or-later
 
+#include <bit>
+#include "common/assert.h"
 #include "shader_recompiler/frontend/translate/translate.h"
 
 namespace Shader::Gcn {
@@ -78,8 +80,10 @@ void Translator::EmitScalarAlu(const GcnInst& inst) {
             return S_BFM_B32(inst);
         case Opcode::S_MUL_I32:
             return S_MUL_I32(inst);
+        case Opcode::S_BFE_I32:
+            return S_BFE(inst, true);
         case Opcode::S_BFE_U32:
-            return S_BFE_U32(inst);
+            return S_BFE(inst, false);
         case Opcode::S_ABSDIFF_I32:
             return S_ABSDIFF_I32(inst);
 
@@ -434,12 +438,12 @@ void Translator::S_MUL_I32(const GcnInst& inst) {
     SetDst(inst.dst[0], ir.IMul(GetSrc(inst.src[0]), GetSrc(inst.src[1])));
 }
 
-void Translator::S_BFE_U32(const GcnInst& inst) {
+void Translator::S_BFE(const GcnInst& inst, bool is_signed) {
     const IR::U32 src0{GetSrc(inst.src[0])};
     const IR::U32 src1{GetSrc(inst.src[1])};
     const IR::U32 offset{ir.BitwiseAnd(src1, ir.Imm32(0x1F))};
     const IR::U32 count{ir.BitFieldExtract(src1, ir.Imm32(16), ir.Imm32(7))};
-    const IR::U32 result{ir.BitFieldExtract(src0, offset, count)};
+    const IR::U32 result{ir.BitFieldExtract(src0, offset, count, is_signed)};
     SetDst(inst.dst[0], result);
     ir.SetScc(ir.INotEqual(result, ir.Imm32(0)));
 }
diff --git a/src/shader_recompiler/frontend/translate/translate.cpp b/src/shader_recompiler/frontend/translate/translate.cpp
index 97978ff6..3031e664 100644
--- a/src/shader_recompiler/frontend/translate/translate.cpp
+++ b/src/shader_recompiler/frontend/translate/translate.cpp
@@ -8,6 +8,8 @@
 #include "shader_recompiler/frontend/fetch_shader.h"
 #include "shader_recompiler/frontend/translate/translate.h"
 #include "shader_recompiler/info.h"
+#include "shader_recompiler/ir/attribute.h"
+#include "shader_recompiler/ir/reg.h"
 #include "shader_recompiler/runtime_info.h"
 #include "video_core/amdgpu/resource.h"
 #include "video_core/amdgpu/types.h"
@@ -34,9 +36,8 @@ void Translator::EmitPrologue() {
     }
 
     IR::VectorReg dst_vreg = IR::VectorReg::V0;
-    switch (info.stage) {
-    case Stage::Vertex:
-    case Stage::Export:
+    switch (info.l_stage) {
+    case LogicalStage::Vertex:
         // v0: vertex ID, always present
         ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::VertexId));
         // v1: instance ID, step rate 0
@@ -52,7 +53,7 @@ void Translator::EmitPrologue() {
             ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::InstanceId));
         }
         break;
-    case Stage::Fragment:
+    case LogicalStage::Fragment:
         dst_vreg = IR::VectorReg::V0;
         if (runtime_info.fs_info.addr_flags.persp_sample_ena) {
             ++dst_vreg; // I
@@ -122,7 +123,30 @@ void Translator::EmitPrologue() {
             }
         }
         break;
-    case Stage::Compute:
+    case LogicalStage::TessellationControl: {
+        // Should be laid out like:
+        // [0:8]: patch id within VGT
+        // [8:12]: output control point id
+        ir.SetVectorReg(IR::VectorReg::V1,
+                        ir.GetAttributeU32(IR::Attribute::PackedHullInvocationInfo));
+        // TODO PrimitiveId is probably V2 but haven't seen it yet
+        break;
+    }
+    case LogicalStage::TessellationEval:
+        ir.SetVectorReg(IR::VectorReg::V0,
+                        ir.GetAttribute(IR::Attribute::TessellationEvaluationPointU));
+        ir.SetVectorReg(IR::VectorReg::V1,
+                        ir.GetAttribute(IR::Attribute::TessellationEvaluationPointV));
+        // V2 is similar to PrimitiveID but not the same. It seems to only be used in
+        // compiler-generated address calculations. Its probably the patch id within the
+        // patches running locally on a given VGT (or CU, whichever is the granularity of LDS
+        // memory)
+        // Set to 0. See explanation in comment describing hull/domain passes
+        ir.SetVectorReg(IR::VectorReg::V2, ir.Imm32(0u));
+        // V3 is the actual PrimitiveID as intended by the shader author.
+        ir.SetVectorReg(IR::VectorReg::V3, ir.GetAttributeU32(IR::Attribute::PrimitiveId));
+        break;
+    case LogicalStage::Compute:
         ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::LocalInvocationId, 0));
         ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::LocalInvocationId, 1));
         ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::LocalInvocationId, 2));
@@ -137,7 +161,7 @@ void Translator::EmitPrologue() {
             ir.SetScalarReg(dst_sreg++, ir.GetAttributeU32(IR::Attribute::WorkgroupId, 2));
         }
         break;
-    case Stage::Geometry:
+    case LogicalStage::Geometry:
         switch (runtime_info.gs_info.out_primitive[0]) {
         case AmdGpu::GsOutputPrimitiveType::TriangleStrip:
             ir.SetVectorReg(IR::VectorReg::V3, ir.Imm32(2u)); // vertex 2
@@ -152,7 +176,7 @@ void Translator::EmitPrologue() {
         ir.SetVectorReg(IR::VectorReg::V2, ir.GetAttributeU32(IR::Attribute::PrimitiveId));
         break;
     default:
-        throw NotImplementedException("Unknown shader stage");
+        UNREACHABLE_MSG("Unknown shader stage");
     }
 }
 
@@ -503,7 +527,8 @@ void Translate(IR::Block* block, u32 pc, std::span<const GcnInst> inst_list, Inf
 
         // Special case for emitting fetch shader.
         if (inst.opcode == Opcode::S_SWAPPC_B64) {
-            ASSERT(info.stage == Stage::Vertex || info.stage == Stage::Export);
+            ASSERT(info.stage == Stage::Vertex || info.stage == Stage::Export ||
+                   info.stage == Stage::Local);
             translator.EmitFetch(inst);
             continue;
         }
diff --git a/src/shader_recompiler/frontend/translate/translate.h b/src/shader_recompiler/frontend/translate/translate.h
index 198cea27..60bad186 100644
--- a/src/shader_recompiler/frontend/translate/translate.h
+++ b/src/shader_recompiler/frontend/translate/translate.h
@@ -94,7 +94,8 @@ public:
     void S_ASHR_I32(const GcnInst& inst);
     void S_BFM_B32(const GcnInst& inst);
     void S_MUL_I32(const GcnInst& inst);
-    void S_BFE_U32(const GcnInst& inst);
+    void S_BFE(const GcnInst& inst, bool is_signed);
+    void S_BFE_I32(const GcnInst& inst);
     void S_ABSDIFF_I32(const GcnInst& inst);
     void S_NOT_B32(const GcnInst& inst);
 
@@ -217,7 +218,7 @@ public:
 
     // VOP3a
     void V_MAD_F32(const GcnInst& inst);
-    void V_MAD_I32_I24(const GcnInst& inst, bool is_signed = false);
+    void V_MAD_I32_I24(const GcnInst& inst, bool is_signed = true);
     void V_MAD_U32_U24(const GcnInst& inst);
     void V_CUBEID_F32(const GcnInst& inst);
     void V_CUBESC_F32(const GcnInst& inst);
diff --git a/src/shader_recompiler/frontend/translate/vector_alu.cpp b/src/shader_recompiler/frontend/translate/vector_alu.cpp
index 3e9e677a..2b32ca2c 100644
--- a/src/shader_recompiler/frontend/translate/vector_alu.cpp
+++ b/src/shader_recompiler/frontend/translate/vector_alu.cpp
@@ -1060,8 +1060,14 @@ void Translator::V_CUBEMA_F32(const GcnInst& inst) {
 
 void Translator::V_BFE_U32(bool is_signed, const GcnInst& inst) {
     const IR::U32 src0{GetSrc(inst.src[0])};
-    const IR::U32 src1{ir.BitwiseAnd(GetSrc(inst.src[1]), ir.Imm32(0x1F))};
-    const IR::U32 src2{ir.BitwiseAnd(GetSrc(inst.src[2]), ir.Imm32(0x1F))};
+    IR::U32 src1{GetSrc(inst.src[1])};
+    IR::U32 src2{GetSrc(inst.src[2])};
+    if (!src1.IsImmediate()) {
+        src1 = ir.BitwiseAnd(src1, ir.Imm32(0x1F));
+    }
+    if (!src2.IsImmediate()) {
+        src2 = ir.BitwiseAnd(src2, ir.Imm32(0x1F));
+    }
     SetDst(inst.dst[0], ir.BitFieldExtract(src0, src1, src2, is_signed));
 }
 
diff --git a/src/shader_recompiler/frontend/translate/vector_memory.cpp b/src/shader_recompiler/frontend/translate/vector_memory.cpp
index eadd1c4d..072b1f88 100644
--- a/src/shader_recompiler/frontend/translate/vector_memory.cpp
+++ b/src/shader_recompiler/frontend/translate/vector_memory.cpp
@@ -189,7 +189,8 @@ void Translator::BUFFER_LOAD(u32 num_dwords, bool is_typed, const GcnInst& inst)
     buffer_info.index_enable.Assign(mtbuf.idxen);
     buffer_info.offset_enable.Assign(mtbuf.offen);
     buffer_info.inst_offset.Assign(mtbuf.offset);
-    buffer_info.ring_access.Assign(is_ring);
+    buffer_info.globally_coherent.Assign(mtbuf.glc);
+    buffer_info.system_coherent.Assign(mtbuf.slc);
     if (is_typed) {
         const auto dmft = static_cast<AmdGpu::DataFormat>(mtbuf.dfmt);
         const auto nfmt = static_cast<AmdGpu::NumberFormat>(mtbuf.nfmt);
@@ -247,11 +248,15 @@ void Translator::BUFFER_STORE(u32 num_dwords, bool is_typed, const GcnInst& inst
     const IR::ScalarReg sharp{inst.src[2].code * 4};
     const IR::Value soffset{GetSrc(inst.src[3])};
 
-    if (info.stage != Stage::Export && info.stage != Stage::Geometry) {
+    if (info.stage != Stage::Export && info.stage != Stage::Hull && info.stage != Stage::Geometry) {
         ASSERT_MSG(soffset.IsImmediate() && soffset.U32() == 0,
                    "Non immediate offset not supported");
     }
 
+    if (info.stage == Stage::Hull) {
+        // printf("here\n"); // break
+    }
+
     IR::Value address = [&] -> IR::Value {
         if (is_ring) {
             return ir.CompositeConstruct(ir.GetVectorReg(vaddr), soffset);
@@ -269,7 +274,8 @@ void Translator::BUFFER_STORE(u32 num_dwords, bool is_typed, const GcnInst& inst
     buffer_info.index_enable.Assign(mtbuf.idxen);
     buffer_info.offset_enable.Assign(mtbuf.offen);
     buffer_info.inst_offset.Assign(mtbuf.offset);
-    buffer_info.ring_access.Assign(is_ring);
+    buffer_info.globally_coherent.Assign(mtbuf.glc);
+    buffer_info.system_coherent.Assign(mtbuf.slc);
     if (is_typed) {
         const auto dmft = static_cast<AmdGpu::DataFormat>(mtbuf.dfmt);
         const auto nfmt = static_cast<AmdGpu::NumberFormat>(mtbuf.nfmt);
diff --git a/src/shader_recompiler/info.h b/src/shader_recompiler/info.h
index 494bbb4b..dbea2af8 100644
--- a/src/shader_recompiler/info.h
+++ b/src/shader_recompiler/info.h
@@ -11,6 +11,7 @@
 #include "common/types.h"
 #include "shader_recompiler/backend/bindings.h"
 #include "shader_recompiler/frontend/copy_shader.h"
+#include "shader_recompiler/frontend/tessellation.h"
 #include "shader_recompiler/ir/attribute.h"
 #include "shader_recompiler/ir/passes/srt.h"
 #include "shader_recompiler/ir/reg.h"
@@ -163,6 +164,7 @@ struct Info {
     UserDataMask ud_mask{};
 
     CopyShaderData gs_copy_data;
+    u32 uses_patches{};
 
     BufferResourceList buffers;
     TextureBufferResourceList texture_buffers;
@@ -173,8 +175,12 @@ struct Info {
     PersistentSrtInfo srt_info;
     std::vector<u32> flattened_ud_buf;
 
+    IR::ScalarReg tess_consts_ptr_base = IR::ScalarReg::Max;
+    s32 tess_consts_dword_offset = -1;
+
     std::span<const u32> user_data;
     Stage stage;
+    LogicalStage l_stage;
 
     u64 pgm_hash{};
     VAddr pgm_base;
@@ -190,14 +196,16 @@ struct Info {
     bool uses_shared{};
     bool uses_fp16{};
     bool uses_fp64{};
+    bool stores_tess_level_outer{};
+    bool stores_tess_level_inner{};
     bool translation_failed{}; // indicates that shader has unsupported instructions
     bool has_readconst{};
     u8 mrt_mask{0u};
     bool has_fetch_shader{false};
     u32 fetch_shader_sgpr_base{0u};
 
-    explicit Info(Stage stage_, ShaderParams params)
-        : stage{stage_}, pgm_hash{params.hash}, pgm_base{params.Base()},
+    explicit Info(Stage stage_, LogicalStage l_stage_, ShaderParams params)
+        : stage{stage_}, l_stage{l_stage_}, pgm_hash{params.hash}, pgm_base{params.Base()},
           user_data{params.user_data} {}
 
     template <typename T>
@@ -244,6 +252,16 @@ struct Info {
             srt_info.walker_func(user_data.data(), flattened_ud_buf.data());
         }
     }
+
+    void ReadTessConstantBuffer(TessellationDataConstantBuffer& tess_constants) const {
+        ASSERT(tess_consts_dword_offset >= 0); // We've already tracked the V# UD
+        auto buf = ReadUdReg<AmdGpu::Buffer>(static_cast<u32>(tess_consts_ptr_base),
+                                             static_cast<u32>(tess_consts_dword_offset));
+        VAddr tess_constants_addr = buf.base_address;
+        memcpy(&tess_constants,
+               reinterpret_cast<TessellationDataConstantBuffer*>(tess_constants_addr),
+               sizeof(tess_constants));
+    }
 };
 
 constexpr AmdGpu::Buffer BufferResource::GetSharp(const Info& info) const noexcept {
diff --git a/src/shader_recompiler/ir/attribute.cpp b/src/shader_recompiler/ir/attribute.cpp
index e219dfb6..6a267e21 100644
--- a/src/shader_recompiler/ir/attribute.cpp
+++ b/src/shader_recompiler/ir/attribute.cpp
@@ -104,6 +104,8 @@ std::string NameOf(Attribute attribute) {
         return "VertexId";
     case Attribute::InstanceId:
         return "InstanceId";
+    case Attribute::PrimitiveId:
+        return "PrimitiveId";
     case Attribute::FragCoord:
         return "FragCoord";
     case Attribute::IsFrontFace:
@@ -114,6 +116,16 @@ std::string NameOf(Attribute attribute) {
         return "LocalInvocationId";
     case Attribute::LocalInvocationIndex:
         return "LocalInvocationIndex";
+    case Attribute::InvocationId:
+        return "InvocationId";
+    case Attribute::PatchVertices:
+        return "PatchVertices";
+    case Attribute::TessellationEvaluationPointU:
+        return "TessellationEvaluationPointU";
+    case Attribute::TessellationEvaluationPointV:
+        return "TessellationEvaluationPointV";
+    case Attribute::PackedHullInvocationInfo:
+        return "PackedHullInvocationInfo";
     default:
         break;
     }
diff --git a/src/shader_recompiler/ir/attribute.h b/src/shader_recompiler/ir/attribute.h
index 0890e88f..bcb2b44a 100644
--- a/src/shader_recompiler/ir/attribute.h
+++ b/src/shader_recompiler/ir/attribute.h
@@ -72,8 +72,13 @@ enum class Attribute : u64 {
     LocalInvocationId = 75,
     LocalInvocationIndex = 76,
     FragCoord = 77,
-    InstanceId0 = 78, // step rate 0
-    InstanceId1 = 79, // step rate 1
+    InstanceId0 = 78,  // step rate 0
+    InstanceId1 = 79,  // step rate 1
+    InvocationId = 80, // TCS id in output patch and instanced geometry shader id
+    PatchVertices = 81,
+    TessellationEvaluationPointU = 82,
+    TessellationEvaluationPointV = 83,
+    PackedHullInvocationInfo = 84, // contains patch id within the VGT and invocation ID
     Max,
 };
 
@@ -85,6 +90,11 @@ constexpr bool IsPosition(Attribute attribute) noexcept {
     return attribute >= Attribute::Position0 && attribute <= Attribute::Position3;
 }
 
+constexpr bool IsTessCoord(Attribute attribute) noexcept {
+    return attribute >= Attribute::TessellationEvaluationPointU &&
+           attribute <= Attribute::TessellationEvaluationPointV;
+}
+
 constexpr bool IsParam(Attribute attribute) noexcept {
     return attribute >= Attribute::Param0 && attribute <= Attribute::Param31;
 }
diff --git a/src/shader_recompiler/ir/basic_block.cpp b/src/shader_recompiler/ir/basic_block.cpp
index b4d1a78c..a312eabd 100644
--- a/src/shader_recompiler/ir/basic_block.cpp
+++ b/src/shader_recompiler/ir/basic_block.cpp
@@ -94,6 +94,8 @@ static std::string ArgToIndex(std::map<const Inst*, size_t>& inst_to_index, size
         return fmt::format("{}", arg.VectorReg());
     case Type::Attribute:
         return fmt::format("{}", arg.Attribute());
+    case Type::Patch:
+        return fmt::format("{}", arg.Patch());
     default:
         return "<unknown immediate type>";
     }
diff --git a/src/shader_recompiler/ir/ir_emitter.cpp b/src/shader_recompiler/ir/ir_emitter.cpp
index 3ebc82e6..21df5339 100644
--- a/src/shader_recompiler/ir/ir_emitter.cpp
+++ b/src/shader_recompiler/ir/ir_emitter.cpp
@@ -266,8 +266,8 @@ void IREmitter::SetM0(const U32& value) {
     Inst(Opcode::SetM0, value);
 }
 
-F32 IREmitter::GetAttribute(IR::Attribute attribute, u32 comp, u32 index) {
-    return Inst<F32>(Opcode::GetAttribute, attribute, Imm32(comp), Imm32(index));
+F32 IREmitter::GetAttribute(IR::Attribute attribute, u32 comp, IR::Value index) {
+    return Inst<F32>(Opcode::GetAttribute, attribute, Imm32(comp), index);
 }
 
 U32 IREmitter::GetAttributeU32(IR::Attribute attribute, u32 comp) {
@@ -278,6 +278,24 @@ void IREmitter::SetAttribute(IR::Attribute attribute, const F32& value, u32 comp
     Inst(Opcode::SetAttribute, attribute, value, Imm32(comp));
 }
 
+F32 IREmitter::GetTessGenericAttribute(const U32& vertex_index, const U32& attr_index,
+                                       const U32& comp_index) {
+    return Inst<F32>(IR::Opcode::GetTessGenericAttribute, vertex_index, attr_index, comp_index);
+}
+
+void IREmitter::SetTcsGenericAttribute(const F32& value, const U32& attr_index,
+                                       const U32& comp_index) {
+    Inst(Opcode::SetTcsGenericAttribute, value, attr_index, comp_index);
+}
+
+F32 IREmitter::GetPatch(Patch patch) {
+    return Inst<F32>(Opcode::GetPatch, patch);
+}
+
+void IREmitter::SetPatch(Patch patch, const F32& value) {
+    Inst(Opcode::SetPatch, patch, value);
+}
+
 Value IREmitter::LoadShared(int bit_size, bool is_signed, const U32& offset) {
     switch (bit_size) {
     case 32:
@@ -552,6 +570,19 @@ Value IREmitter::CompositeConstruct(const Value& e1, const Value& e2, const Valu
     }
 }
 
+Value IREmitter::CompositeConstruct(std::span<const Value> elements) {
+    switch (elements.size()) {
+    case 2:
+        return CompositeConstruct(elements[0], elements[1]);
+    case 3:
+        return CompositeConstruct(elements[0], elements[1], elements[2]);
+    case 4:
+        return CompositeConstruct(elements[0], elements[1], elements[2], elements[3]);
+    default:
+        UNREACHABLE_MSG("Composite construct with greater than 4 elements");
+    }
+}
+
 Value IREmitter::CompositeExtract(const Value& vector, size_t element) {
     const auto read{[&](Opcode opcode, size_t limit) -> Value {
         if (element >= limit) {
diff --git a/src/shader_recompiler/ir/ir_emitter.h b/src/shader_recompiler/ir/ir_emitter.h
index 068aba14..95713565 100644
--- a/src/shader_recompiler/ir/ir_emitter.h
+++ b/src/shader_recompiler/ir/ir_emitter.h
@@ -10,6 +10,7 @@
 #include "shader_recompiler/ir/attribute.h"
 #include "shader_recompiler/ir/basic_block.h"
 #include "shader_recompiler/ir/condition.h"
+#include "shader_recompiler/ir/patch.h"
 #include "shader_recompiler/ir/value.h"
 
 namespace Shader::IR {
@@ -80,10 +81,18 @@ public:
 
     [[nodiscard]] U1 Condition(IR::Condition cond);
 
-    [[nodiscard]] F32 GetAttribute(Attribute attribute, u32 comp = 0, u32 index = 0);
+    [[nodiscard]] F32 GetAttribute(Attribute attribute, u32 comp = 0,
+                                   IR::Value index = IR::Value(u32(0u)));
     [[nodiscard]] U32 GetAttributeU32(Attribute attribute, u32 comp = 0);
     void SetAttribute(Attribute attribute, const F32& value, u32 comp = 0);
 
+    [[nodiscard]] F32 GetTessGenericAttribute(const U32& vertex_index, const U32& attr_index,
+                                              const U32& comp_index);
+    void SetTcsGenericAttribute(const F32& value, const U32& attr_index, const U32& comp_index);
+
+    [[nodiscard]] F32 GetPatch(Patch patch);
+    void SetPatch(Patch patch, const F32& value);
+
     [[nodiscard]] Value LoadShared(int bit_size, bool is_signed, const U32& offset);
     void WriteShared(int bit_size, const Value& value, const U32& offset);
 
@@ -138,6 +147,8 @@ public:
     [[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2, const Value& e3);
     [[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2, const Value& e3,
                                            const Value& e4);
+    [[nodiscard]] Value CompositeConstruct(std::span<const Value> values);
+
     [[nodiscard]] Value CompositeExtract(const Value& vector, size_t element);
     [[nodiscard]] Value CompositeInsert(const Value& vector, const Value& object, size_t element);
 
@@ -335,6 +346,7 @@ private:
     template <typename T = Value, typename... Args>
     T Inst(Opcode op, Args... args) {
         auto it{block->PrependNewInst(insertion_point, op, {Value{args}...})};
+        it->SetParent(block);
         return T{Value{&*it}};
     }
 
@@ -352,6 +364,7 @@ private:
         u32 raw_flags{};
         std::memcpy(&raw_flags, &flags.proxy, sizeof(flags.proxy));
         auto it{block->PrependNewInst(insertion_point, op, {Value{args}...}, raw_flags)};
+        it->SetParent(block);
         return T{Value{&*it}};
     }
 };
diff --git a/src/shader_recompiler/ir/microinstruction.cpp b/src/shader_recompiler/ir/microinstruction.cpp
index 9b4ad63d..6e7bbe66 100644
--- a/src/shader_recompiler/ir/microinstruction.cpp
+++ b/src/shader_recompiler/ir/microinstruction.cpp
@@ -52,6 +52,8 @@ bool Inst::MayHaveSideEffects() const noexcept {
     case Opcode::Discard:
     case Opcode::DiscardCond:
     case Opcode::SetAttribute:
+    case Opcode::SetTcsGenericAttribute:
+    case Opcode::SetPatch:
     case Opcode::StoreBufferU32:
     case Opcode::StoreBufferU32x2:
     case Opcode::StoreBufferU32x3:
diff --git a/src/shader_recompiler/ir/opcodes.h b/src/shader_recompiler/ir/opcodes.h
index be640297..cd73ace7 100644
--- a/src/shader_recompiler/ir/opcodes.h
+++ b/src/shader_recompiler/ir/opcodes.h
@@ -30,7 +30,7 @@ constexpr Type Opaque{Type::Opaque};
 constexpr Type ScalarReg{Type::ScalarReg};
 constexpr Type VectorReg{Type::VectorReg};
 constexpr Type Attribute{Type::Attribute};
-constexpr Type SystemValue{Type::SystemValue};
+constexpr Type Patch{Type::Patch};
 constexpr Type U1{Type::U1};
 constexpr Type U8{Type::U8};
 constexpr Type U16{Type::U16};
diff --git a/src/shader_recompiler/ir/opcodes.inc b/src/shader_recompiler/ir/opcodes.inc
index 47727582..470f9fbe 100644
--- a/src/shader_recompiler/ir/opcodes.inc
+++ b/src/shader_recompiler/ir/opcodes.inc
@@ -60,6 +60,10 @@ OPCODE(SetGotoVariable,                                     Void,           U32,
 OPCODE(GetAttribute,                                        F32,            Attribute,      U32,            U32,                                            )
 OPCODE(GetAttributeU32,                                     U32,            Attribute,      U32,                                                            )
 OPCODE(SetAttribute,                                        Void,           Attribute,      F32,            U32,                                            )
+OPCODE(GetPatch,                                            F32,            Patch,                                                                          )
+OPCODE(SetPatch,                                            Void,           Patch,          F32,                                                            )
+OPCODE(GetTessGenericAttribute,                             F32,            U32,            U32,            U32,                                            )
+OPCODE(SetTcsGenericAttribute,                              Void,           F32,            U32,            U32,                                            )
 
 // Flags
 OPCODE(GetScc,                                              U1,             Void,                                                                           )
diff --git a/src/shader_recompiler/ir/passes/constant_propagation_pass.cpp b/src/shader_recompiler/ir/passes/constant_propagation_pass.cpp
index 9624ce6a..16b07e1a 100644
--- a/src/shader_recompiler/ir/passes/constant_propagation_pass.cpp
+++ b/src/shader_recompiler/ir/passes/constant_propagation_pass.cpp
@@ -216,6 +216,18 @@ void FoldAdd(IR::Block& block, IR::Inst& inst) {
     }
 }
 
+template <typename T>
+void FoldMul(IR::Block& block, IR::Inst& inst) {
+    if (!FoldCommutative<T>(inst, [](T a, T b) { return a * b; })) {
+        return;
+    }
+    const IR::Value rhs{inst.Arg(1)};
+    if (rhs.IsImmediate() && Arg<T>(rhs) == 0) {
+        inst.ReplaceUsesWithAndRemove(IR::Value(0u));
+        return;
+    }
+}
+
 void FoldCmpClass(IR::Block& block, IR::Inst& inst) {
     ASSERT_MSG(inst.Arg(1).IsImmediate(), "Unable to resolve compare operation");
     const auto class_mask = static_cast<IR::FloatClassFunc>(inst.Arg(1).U32());
@@ -292,7 +304,19 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) {
         FoldWhenAllImmediates(inst, [](u32 a) { return static_cast<float>(a); });
         return;
     case IR::Opcode::IMul32:
-        FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a * b; });
+        FoldMul<u32>(block, inst);
+        return;
+    case IR::Opcode::UDiv32:
+        FoldWhenAllImmediates(inst, [](u32 a, u32 b) {
+            ASSERT_MSG(b != 0, "Folding UDiv32 with divisor 0");
+            return a / b;
+        });
+        return;
+    case IR::Opcode::UMod32:
+        FoldWhenAllImmediates(inst, [](u32 a, u32 b) {
+            ASSERT_MSG(b != 0, "Folding UMod32 with modulo 0");
+            return a % b;
+        });
         return;
     case IR::Opcode::FPCmpClass32:
         FoldCmpClass(block, inst);
diff --git a/src/shader_recompiler/ir/passes/constant_propogation.h b/src/shader_recompiler/ir/passes/constant_propogation.h
new file mode 100644
index 00000000..313a3cc6
--- /dev/null
+++ b/src/shader_recompiler/ir/passes/constant_propogation.h
@@ -0,0 +1,4 @@
+// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#pragma once
\ No newline at end of file
diff --git a/src/shader_recompiler/ir/passes/hull_shader_transform.cpp b/src/shader_recompiler/ir/passes/hull_shader_transform.cpp
new file mode 100644
index 00000000..5cf02b6d
--- /dev/null
+++ b/src/shader_recompiler/ir/passes/hull_shader_transform.cpp
@@ -0,0 +1,744 @@
+// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+#include "common/assert.h"
+#include "shader_recompiler/info.h"
+#include "shader_recompiler/ir/attribute.h"
+#include "shader_recompiler/ir/breadth_first_search.h"
+#include "shader_recompiler/ir/ir_emitter.h"
+#include "shader_recompiler/ir/opcodes.h"
+#include "shader_recompiler/ir/pattern_matching.h"
+#include "shader_recompiler/ir/program.h"
+#include "shader_recompiler/runtime_info.h"
+
+namespace Shader::Optimization {
+
+/**
+ * Tessellation shaders pass outputs to the next shader using LDS.
+ * The Hull shader stage receives input control points stored in LDS.
+ *
+ * These passes attempt to resolve LDS accesses to attribute accesses and correctly
+ * write to the tessellation factor tables.
+ *
+ * The LDS layout is:
+ * - TCS inputs for patch 0
+ * - TCS inputs for patch 1
+ * - TCS inputs for patch 2
+ * - ...
+ * - TCS outputs for patch 0
+ * - TCS outputs for patch 1
+ * - TCS outputs for patch 2
+ * - ...
+ * - PatchConst TCS outputs for patch 0
+ * - PatchConst TCS outputs for patch 1
+ * - PatchConst TCS outputs for patch 2
+ *
+ *
+ * If the Hull stage does not write any new control points the driver will
+ * optimize LDS layout so input and output control point spaces overlap.
+ * (Passthrough)
+ *
+ * The gnm driver requires a V# holding special constants to be bound
+ * for reads by the shader.
+ * The Hull and Domain shaders read values from this buffer which
+ * contain size and offset information required to address input, output,
+ * or PatchConst attributes within the current patch.
+ * See the TessellationDataConstantBuffer struct to see the layout of this V#.
+ *
+ * Tessellation factors are stored to a special tessellation factor V# that is automatically bound
+ * by the driver. This is the input to the fixed function tessellator that actually subdivides the
+ * domain. We translate these to writes to SPIR-V builtins for tessellation factors in the Hull
+ * shader.
+ * The offset into the tess factor buffer determines which factor the shader is writing.
+ * Additionally, most hull shaders seem to redundantly write tess factors to PatchConst
+ * attributes, even if dead in the domain shader. We just treat these as generic PatchConst writes.
+ *
+ * LDS reads in the Hull shader can be from input control points, and in the the Domain shader can
+ * be hs output control points (output from the perspective of the Hull shader) and patchconst
+ * values.
+ * LDS stores in the Hull shader can either be output control point writes or per-patch
+ * (PatchConst) data writes. The Domain shader exports attributes using EXP instructions, unless its
+ * followed by the geometry stage (but we havent seen this yet), so nothing special there.
+ * The address calculations can vary significantly and can't be easily pattern matched. We are at
+ * the mercy of instruction selection the ps4 compiler wanted to use.
+ * Generally though, they could look something like this:
+ * Input control point:
+ *     addr = PatchIdInVgt * input_cp_stride * #input_cp_per_patch + index * input_cp_stride
+ *          + attr# * 16 + component
+ * Output control point:
+ *     addr = #patches * input_cp_stride * #input_cp_per_patch
+ *          + PatchIdInVgt * output_patch_stride + InvocationID * output_cp_stride
+            + attr# * 16 + component
+ * Per patch output:
+ *     addr = #patches * input_cp_stride * #cp_per_input_patch
+ *          + #patches * output_patch_stride
+ *          + PatchIdInVgt * per_patch_output_stride + attr# * 16 + component
+ *
+ * output_patch_stride and output_cp_stride are usually compile time constants in the gcn
+ *
+ * Hull shaders can probably also read output control points corresponding to other threads, like
+ * shared memory (but we havent seen this yet).
+ * ^ This is an UNREACHABLE for now. We may need to insert additional barriers if this happens.
+ * They should also be able to read PatchConst values,
+ * although not sure if this happens in practice.
+ *
+ * To determine which type of attribute (input, output, patchconst) we the check the users of
+ * TessConstants V# reads to deduce which type of attribute a given load/store to LDS
+ * is touching.
+ *
+ * In the Hull shader, both the PatchId within the VGT group (PatchIdInVgt) and the output control
+ * point id (InvocationId) are packed in VGPR1 by the driver like
+ * V1 = InvocationId << 8 | PatchIdInVgt
+ * The shader typically uses V_BFE_(U|S)32 to extract them. We use the starting bit_pos to determine
+ * which is which.
+ *
+ * This pass does not attempt to deduce the exact attribute referenced in a LDS load/store.
+ * Instead, it feeds the address in the LDS load/store to the get/set Insts we use for TCS in/out's,
+ * TES in's, and PatchConst in/out's.
+ *
+ * TCS/TES Input attributes:
+ * We define input attributes using an array in the shader roughly like this:
+ * // equivalent GLSL in TCS
+ * layout (location = 0) in vec4 in_attrs[][NUM_INPUT_ATTRIBUTES];
+ *
+ * Here the NUM_INPUT_ATTRIBUTES is derived from the ls_stride member of the TessConstants V#.
+ * We divide ls_stride (in bytes) by 16 to get the number of vec4 attributes.
+ * For TES, the number of attributes comes from hs_cp_stride / 16.
+ * The first (outer) dimension is unsized but corresponds to the number of vertices in the hs input
+ * patch (for Hull) or the hs output patch (for Domain).
+ *
+ * For input reads in TCS or TES, we emit SPIR-V like:
+ * float value = in_attrs[addr / ls_stride][(addr % ls_stride) >> 4][(addr & 0xF) >> 2];
+ *
+ * For output writes, we assume the control point index is InvocationId, since high level languages
+ * impose that restriction (although maybe it's technically possible on hardware). So SPIR-V looks
+ * like this:
+ * layout (location = 0) in vec4 in_attrs[][NUM_OUTPUT_ATTRIBUTES];
+ * out_attrs[InvocationId][(addr % hs_cp_stride) >> 4][(addr & 0xF) >> 2] = value;
+ *
+ * NUM_OUTPUT_ATTRIBUTES is derived by hs_cp_stride / 16, so it can link with the TES in_attrs
+ * variable.
+ *
+ * Another challenge is the fact that the GCN shader needs to address attributes from LDS as a whole
+ * which contains the attributes from many patches. On the other hand, higher level shading
+ * languages restrict attribute access to the patch of the current thread, which is naturally a
+ * restriction in SPIR-V also.
+ * The addresses the ps4 compiler generates for loads/stores and the fact that LDS holds many
+ * patches' attributes are just implementation details of the ps4 driver/compiler. To deal with
+ * this, we can replace certain TessConstant V# reads with 0, which only contribute to the base
+ * address of the current patch's attributes in LDS and not the indexes within the local patch.
+ *
+ * (A perfect implementation might need emulation of the VGTs in mesh/compute, loading/storing
+ * attributes to buffers and not caring about whether they are hs input, hs output, or patchconst
+ * attributes)
+ *
+ */
+
+namespace {
+
+using namespace Shader::Optimiation::PatternMatching;
+
+static void InitTessConstants(IR::ScalarReg sharp_ptr_base, s32 sharp_dword_offset,
+                              Shader::Info& info, Shader::RuntimeInfo& runtime_info,
+                              TessellationDataConstantBuffer& tess_constants) {
+    info.tess_consts_ptr_base = sharp_ptr_base;
+    info.tess_consts_dword_offset = sharp_dword_offset;
+    info.ReadTessConstantBuffer(tess_constants);
+    if (info.l_stage == LogicalStage::TessellationControl) {
+        runtime_info.hs_info.InitFromTessConstants(tess_constants);
+    } else {
+        runtime_info.vs_info.InitFromTessConstants(tess_constants);
+    }
+
+    return;
+}
+
+struct TessSharpLocation {
+    IR::ScalarReg ptr_base;
+    u32 dword_off;
+};
+
+std::optional<TessSharpLocation> FindTessConstantSharp(IR::Inst* read_const_buffer) {
+    IR::Value sharp_ptr_base;
+    IR::Value sharp_dword_offset;
+
+    IR::Value rv = IR::Value{read_const_buffer};
+    IR::Value handle = read_const_buffer->Arg(0);
+
+    if (M_COMPOSITECONSTRUCTU32X4(M_GETUSERDATA(MatchImm(sharp_dword_offset)), MatchIgnore(),
+                                  MatchIgnore(), MatchIgnore())
+            .Match(handle)) {
+        return TessSharpLocation{.ptr_base = IR::ScalarReg::Max,
+                                 .dword_off = static_cast<u32>(sharp_dword_offset.ScalarReg())};
+    } else if (M_COMPOSITECONSTRUCTU32X4(
+                   M_READCONST(M_COMPOSITECONSTRUCTU32X2(M_GETUSERDATA(MatchImm(sharp_ptr_base)),
+                                                         MatchIgnore()),
+                               MatchImm(sharp_dword_offset)),
+                   MatchIgnore(), MatchIgnore(), MatchIgnore())
+                   .Match(handle)) {
+        return TessSharpLocation{.ptr_base = sharp_ptr_base.ScalarReg(),
+                                 .dword_off = sharp_dword_offset.U32()};
+    }
+    return {};
+}
+
+// Walker that helps deduce what type of attribute a DS instruction is reading
+// or writing, which could be an input control point, output control point,
+// or per-patch constant (PatchConst).
+// For certain ReadConstBuffer instructions using the tess constants V#,, we visit the users
+// recursively and increment a counter on the Load/WriteShared users.
+// Namely NumPatch (from m_hsNumPatch), HsOutputBase (m_hsOutputBase),
+// and PatchConstBase (m_patchConstBase).
+// In addr calculations, the term NumPatch * ls_stride * #input_cp_in_patch
+// is used as an addend to skip the region for input control points, and similarly
+// NumPatch * hs_cp_stride * #output_cp_in_patch is used to skip the region
+// for output control points.
+//
+// TODO: this will break if AMD compiler used distributive property like
+// TcsNumPatches * (ls_stride * #input_cp_in_patch + hs_cp_stride * #output_cp_in_patch)
+class TessConstantUseWalker {
+public:
+    void MarkTessAttributeUsers(IR::Inst* read_const_buffer, TessConstantAttribute attr) {
+        u32 inc;
+        switch (attr) {
+        case TessConstantAttribute::HsNumPatch:
+        case TessConstantAttribute::HsOutputBase:
+            inc = 1;
+            break;
+        case TessConstantAttribute::PatchConstBase:
+            inc = 2;
+            break;
+        default:
+            UNREACHABLE();
+        }
+
+        for (IR::Use use : read_const_buffer->Uses()) {
+            MarkTessAttributeUsersHelper(use, inc);
+        }
+
+        ++seq_num;
+    }
+
+private:
+    void MarkTessAttributeUsersHelper(IR::Use use, u32 inc) {
+        IR::Inst* inst = use.user;
+
+        switch (use.user->GetOpcode()) {
+        case IR::Opcode::LoadSharedU32:
+        case IR::Opcode::LoadSharedU64:
+        case IR::Opcode::LoadSharedU128:
+        case IR::Opcode::WriteSharedU32:
+        case IR::Opcode::WriteSharedU64:
+        case IR::Opcode::WriteSharedU128: {
+            u32 counter = inst->Flags<u32>();
+            inst->SetFlags<u32>(counter + inc);
+            // Stop here
+            return;
+        }
+        case IR::Opcode::Phi: {
+            struct PhiCounter {
+                u16 seq_num;
+                u8 unique_edge;
+                u8 counter;
+            };
+
+            PhiCounter count = inst->Flags<PhiCounter>();
+            ASSERT_MSG(count.counter == 0 || count.unique_edge == use.operand);
+            // the point of seq_num is to tell us if we've already traversed this
+            // phi on the current walk. Alternatively we could keep a set of phi's
+            // seen on the current walk. This is to handle phi cycles
+            if (count.seq_num == 0) {
+                // First time we've encountered this phi
+                count.seq_num = seq_num;
+                // Mark the phi as having been traversed originally through this edge
+                count.unique_edge = use.operand;
+                count.counter = inc;
+            } else if (count.seq_num < seq_num) {
+                count.seq_num = seq_num;
+                // For now, assume we are visiting this phi via the same edge
+                // as on other walks. If not, some dataflow analysis might be necessary
+                ASSERT(count.unique_edge == use.operand);
+                count.counter += inc;
+            } else {
+                // count.seq_num == seq_num
+                // there's a cycle, and we've already been here on this walk
+                return;
+            }
+            inst->SetFlags<PhiCounter>(count);
+            break;
+        }
+        default:
+            break;
+        }
+
+        for (IR::Use use : inst->Uses()) {
+            MarkTessAttributeUsersHelper(use, inc);
+        }
+    }
+
+    u32 seq_num{1u};
+};
+
+enum class AttributeRegion : u32 { InputCP, OutputCP, PatchConst };
+
+static AttributeRegion GetAttributeRegionKind(IR::Inst* ring_access, const Shader::Info& info,
+                                              const Shader::RuntimeInfo& runtime_info) {
+    u32 count = ring_access->Flags<u32>();
+    if (count == 0) {
+        return AttributeRegion::InputCP;
+    } else if (info.l_stage == LogicalStage::TessellationControl &&
+               runtime_info.hs_info.IsPassthrough()) {
+        ASSERT(count <= 1);
+        return AttributeRegion::PatchConst;
+    } else {
+        ASSERT(count <= 2);
+        return AttributeRegion(count);
+    }
+}
+
+static bool IsDivisibleByStride(IR::Value term, u32 stride) {
+    IR::Value a, b;
+    if (MatchU32(stride).Match(term)) {
+        return true;
+    } else if (M_BITFIELDUEXTRACT(MatchValue(a), MatchU32(0), MatchU32(24)).Match(term) ||
+               M_BITFIELDSEXTRACT(MatchValue(a), MatchU32(0), MatchU32(24)).Match(term)) {
+        return IsDivisibleByStride(a, stride);
+    } else if (M_IMUL32(MatchValue(a), MatchValue(b)).Match(term)) {
+        return IsDivisibleByStride(a, stride) || IsDivisibleByStride(b, stride);
+    }
+    return false;
+}
+
+// Return true if we can eliminate any addends
+static bool TryOptimizeAddendInModulo(IR::Value addend, u32 stride, std::vector<IR::U32>& addends) {
+    IR::Value a, b;
+    if (M_IADD32(MatchValue(a), MatchValue(b)).Match(addend)) {
+        bool ret = false;
+        ret = TryOptimizeAddendInModulo(a, stride, addends);
+        ret |= TryOptimizeAddendInModulo(b, stride, addends);
+        return ret;
+    } else if (!IsDivisibleByStride(addend, stride)) {
+        addends.push_back(IR::U32{addend});
+        return false;
+    } else {
+        return true;
+    }
+}
+
+// In calculation (a + b + ...) % stride
+// Use this fact
+// (a + b) mod N = (a mod N + b mod N) mod N
+// If any addend is divisible by stride, then we can replace it with 0 in the attribute
+// or component index calculation
+static IR::U32 TryOptimizeAddressModulo(IR::U32 addr, u32 stride, IR::IREmitter& ir) {
+    std::vector<IR::U32> addends;
+    if (TryOptimizeAddendInModulo(addr, stride, addends)) {
+        addr = ir.Imm32(0);
+        for (auto& addend : addends) {
+            addr = ir.IAdd(addr, addend);
+        }
+    }
+    return addr;
+}
+
+// TODO: can optimize div in control point index similarly to mod
+
+// Read a TCS input (InputCP region) or TES input (OutputCP region)
+static IR::F32 ReadTessInputComponent(IR::U32 addr, const u32 stride, IR::IREmitter& ir,
+                                      u32 off_dw) {
+    if (off_dw > 0) {
+        addr = ir.IAdd(addr, ir.Imm32(off_dw));
+    }
+    const IR::U32 control_point_index = ir.IDiv(addr, ir.Imm32(stride));
+    const IR::U32 addr_for_attrs = TryOptimizeAddressModulo(addr, stride, ir);
+    const IR::U32 attr_index =
+        ir.ShiftRightLogical(ir.IMod(addr_for_attrs, ir.Imm32(stride)), ir.Imm32(4u));
+    const IR::U32 comp_index =
+        ir.ShiftRightLogical(ir.BitwiseAnd(addr_for_attrs, ir.Imm32(0xFU)), ir.Imm32(2u));
+    return ir.GetTessGenericAttribute(control_point_index, attr_index, comp_index);
+}
+
+} // namespace
+
+void HullShaderTransform(IR::Program& program, RuntimeInfo& runtime_info) {
+    const Info& info = program.info;
+
+    for (IR::Block* block : program.blocks) {
+        for (IR::Inst& inst : block->Instructions()) {
+            const auto opcode = inst.GetOpcode();
+            switch (opcode) {
+            case IR::Opcode::StoreBufferU32:
+            case IR::Opcode::StoreBufferU32x2:
+            case IR::Opcode::StoreBufferU32x3:
+            case IR::Opcode::StoreBufferU32x4: {
+                const auto info = inst.Flags<IR::BufferInstInfo>();
+                if (!info.globally_coherent) {
+                    break;
+                }
+                IR::IREmitter ir{*block, IR::Block::InstructionList::s_iterator_to(inst)};
+                const auto GetValue = [&](IR::Value data) -> IR::F32 {
+                    if (auto* inst = data.TryInstRecursive();
+                        inst && inst->GetOpcode() == IR::Opcode::BitCastU32F32) {
+                        return IR::F32{inst->Arg(0)};
+                    }
+                    return ir.BitCast<IR::F32, IR::U32>(IR::U32{data});
+                };
+                const u32 num_dwords = u32(opcode) - u32(IR::Opcode::StoreBufferU32) + 1;
+                IR::U32 index = IR::U32{inst.Arg(1)};
+                ASSERT(index.IsImmediate());
+                const u32 gcn_factor_idx = (info.inst_offset.Value() + index.U32()) >> 2;
+
+                const IR::Value data = inst.Arg(2);
+                auto get_factor_attr = [&](u32 gcn_factor_idx) -> IR::Patch {
+                    // The hull outputs tess factors in different formats depending on the shader.
+                    // For triangle domains, it seems to pack the entries into 4 consecutive floats,
+                    // with the 3 edge factors followed by the 1 interior factor.
+                    // For quads, it does 4 edge factors then 2 interior.
+                    // There is a tess factor stride member of the GNMX hull constants struct in
+                    // a hull program shader binary archive, but this doesn't seem to be
+                    // communicated to the driver.
+                    // The layout seems to be implied by the type of the abstract domain.
+                    switch (runtime_info.hs_info.tess_type) {
+                    case AmdGpu::TessellationType::Quad:
+                        ASSERT(gcn_factor_idx < 6);
+                        return IR::PatchFactor(gcn_factor_idx);
+                    case AmdGpu::TessellationType::Triangle:
+                        ASSERT(gcn_factor_idx < 4);
+                        if (gcn_factor_idx == 3) {
+                            return IR::Patch::TessellationLodInteriorU;
+                        }
+                        return IR::PatchFactor(gcn_factor_idx);
+                    default:
+                        // Point domain types haven't been seen so far
+                        UNREACHABLE_MSG("Unhandled tess type");
+                    }
+                };
+
+                inst.Invalidate();
+                if (num_dwords == 1) {
+                    ir.SetPatch(get_factor_attr(gcn_factor_idx), GetValue(data));
+                    break;
+                }
+                auto* inst = data.TryInstRecursive();
+                ASSERT(inst && (inst->GetOpcode() == IR::Opcode::CompositeConstructU32x2 ||
+                                inst->GetOpcode() == IR::Opcode::CompositeConstructU32x3 ||
+                                inst->GetOpcode() == IR::Opcode::CompositeConstructU32x4));
+                for (s32 i = 0; i < num_dwords; i++) {
+                    ir.SetPatch(get_factor_attr(gcn_factor_idx + i), GetValue(inst->Arg(i)));
+                }
+                break;
+            }
+
+            case IR::Opcode::WriteSharedU32:
+            case IR::Opcode::WriteSharedU64:
+            case IR::Opcode::WriteSharedU128: {
+                IR::IREmitter ir{*block, IR::Block::InstructionList::s_iterator_to(inst)};
+                const u32 num_dwords = opcode == IR::Opcode::WriteSharedU32
+                                           ? 1
+                                           : (opcode == IR::Opcode::WriteSharedU64 ? 2 : 4);
+                const IR::U32 addr{inst.Arg(0)};
+                const IR::U32 data{inst.Arg(1).Resolve()};
+
+                const auto SetOutput = [&](IR::U32 addr, IR::U32 value, AttributeRegion output_kind,
+                                           u32 off_dw) {
+                    const IR::F32 data_component = ir.BitCast<IR::F32, IR::U32>(value);
+
+                    if (output_kind == AttributeRegion::OutputCP) {
+                        if (off_dw > 0) {
+                            addr = ir.IAdd(addr, ir.Imm32(off_dw));
+                        }
+                        u32 stride = runtime_info.hs_info.hs_output_cp_stride;
+                        // Invocation ID array index is implicit, handled by SPIRV backend
+                        const IR::U32 addr_for_attrs = TryOptimizeAddressModulo(addr, stride, ir);
+                        const IR::U32 attr_index = ir.ShiftRightLogical(
+                            ir.IMod(addr_for_attrs, ir.Imm32(stride)), ir.Imm32(4u));
+                        const IR::U32 comp_index = ir.ShiftRightLogical(
+                            ir.BitwiseAnd(addr_for_attrs, ir.Imm32(0xFU)), ir.Imm32(2u));
+                        ir.SetTcsGenericAttribute(data_component, attr_index, comp_index);
+                    } else {
+                        ASSERT(output_kind == AttributeRegion::PatchConst);
+                        ASSERT_MSG(addr.IsImmediate(), "patch addr non imm, inst {}",
+                                   fmt::ptr(addr.Inst()));
+                        ir.SetPatch(IR::PatchGeneric((addr.U32() >> 2) + off_dw), data_component);
+                    }
+                };
+
+                AttributeRegion region = GetAttributeRegionKind(&inst, info, runtime_info);
+                if (num_dwords == 1) {
+                    SetOutput(addr, data, region, 0);
+                } else {
+                    for (auto i = 0; i < num_dwords; i++) {
+                        SetOutput(addr, IR::U32{data.Inst()->Arg(i)}, region, i);
+                    }
+                }
+                inst.Invalidate();
+                break;
+            }
+
+            case IR::Opcode::LoadSharedU32: {
+            case IR::Opcode::LoadSharedU64:
+            case IR::Opcode::LoadSharedU128:
+                IR::IREmitter ir{*block, IR::Block::InstructionList::s_iterator_to(inst)};
+                const IR::U32 addr{inst.Arg(0)};
+                AttributeRegion region = GetAttributeRegionKind(&inst, info, runtime_info);
+                const u32 num_dwords = opcode == IR::Opcode::LoadSharedU32
+                                           ? 1
+                                           : (opcode == IR::Opcode::LoadSharedU64 ? 2 : 4);
+                ASSERT_MSG(region == AttributeRegion::InputCP,
+                           "Unhandled read of output or patchconst attribute in hull shader");
+                IR::Value attr_read;
+                if (num_dwords == 1) {
+                    attr_read = ir.BitCast<IR::U32>(
+                        ReadTessInputComponent(addr, runtime_info.hs_info.ls_stride, ir, 0));
+                } else {
+                    boost::container::static_vector<IR::Value, 4> read_components;
+                    for (auto i = 0; i < num_dwords; i++) {
+                        const IR::F32 component =
+                            ReadTessInputComponent(addr, runtime_info.hs_info.ls_stride, ir, i);
+                        read_components.push_back(ir.BitCast<IR::U32>(component));
+                    }
+                    attr_read = ir.CompositeConstruct(read_components);
+                }
+                inst.ReplaceUsesWithAndRemove(attr_read);
+                break;
+            }
+
+            default:
+                break;
+            }
+        }
+    }
+
+    if (runtime_info.hs_info.IsPassthrough()) {
+        // Copy input attributes to output attributes, indexed by InvocationID
+        // Passthrough should imply that input and output patches have same number of vertices
+        IR::Block* entry_block = *program.blocks.begin();
+        auto it = std::ranges::find_if(entry_block->Instructions(), [](IR::Inst& inst) {
+            return inst.GetOpcode() == IR::Opcode::Prologue;
+        });
+        ASSERT(it != entry_block->end());
+        ++it;
+        ASSERT(it != entry_block->end());
+        ++it;
+        // Prologue
+        // SetExec #true
+        // <- insert here
+        // ...
+        IR::IREmitter ir{*entry_block, it};
+
+        ASSERT(runtime_info.hs_info.ls_stride % 16 == 0);
+        u32 num_attributes = runtime_info.hs_info.ls_stride / 16;
+        const auto invocation_id = ir.GetAttributeU32(IR::Attribute::InvocationId);
+        for (u32 attr_no = 0; attr_no < num_attributes; attr_no++) {
+            for (u32 comp = 0; comp < 4; comp++) {
+                IR::F32 attr_read =
+                    ir.GetTessGenericAttribute(invocation_id, ir.Imm32(attr_no), ir.Imm32(comp));
+                // InvocationId is implicit index for output control point writes
+                ir.SetTcsGenericAttribute(attr_read, ir.Imm32(attr_no), ir.Imm32(comp));
+            }
+        }
+        // We could wrap the rest of the program in an if stmt
+        // CopyInputAttrsToOutputs(); // psuedocode
+        // if (InvocationId == 0) {
+        //     PatchConstFunction();
+        // }
+        // But as long as we treat invocation ID as 0 for all threads, shouldn't matter functionally
+    }
+}
+
+void DomainShaderTransform(IR::Program& program, RuntimeInfo& runtime_info) {
+    Info& info = program.info;
+
+    for (IR::Block* block : program.blocks) {
+        for (IR::Inst& inst : block->Instructions()) {
+            IR::IREmitter ir{*block, IR::Block::InstructionList::s_iterator_to(inst)};
+            const auto opcode = inst.GetOpcode();
+            switch (inst.GetOpcode()) {
+            case IR::Opcode::LoadSharedU32: {
+            case IR::Opcode::LoadSharedU64:
+            case IR::Opcode::LoadSharedU128:
+                const IR::U32 addr{inst.Arg(0)};
+                AttributeRegion region = GetAttributeRegionKind(&inst, info, runtime_info);
+                const u32 num_dwords = opcode == IR::Opcode::LoadSharedU32
+                                           ? 1
+                                           : (opcode == IR::Opcode::LoadSharedU64 ? 2 : 4);
+                const auto GetInput = [&](IR::U32 addr, u32 off_dw) -> IR::F32 {
+                    if (region == AttributeRegion::OutputCP) {
+                        return ReadTessInputComponent(
+                            addr, runtime_info.vs_info.hs_output_cp_stride, ir, off_dw);
+                    } else {
+                        ASSERT(region == AttributeRegion::PatchConst);
+                        return ir.GetPatch(IR::PatchGeneric((addr.U32() >> 2) + off_dw));
+                    }
+                };
+                IR::Value attr_read;
+                if (num_dwords == 1) {
+                    attr_read = ir.BitCast<IR::U32>(GetInput(addr, 0));
+                } else {
+                    boost::container::static_vector<IR::Value, 4> read_components;
+                    for (auto i = 0; i < num_dwords; i++) {
+                        const IR::F32 component = GetInput(addr, i);
+                        read_components.push_back(ir.BitCast<IR::U32>(component));
+                    }
+                    attr_read = ir.CompositeConstruct(read_components);
+                }
+                inst.ReplaceUsesWithAndRemove(attr_read);
+                break;
+            }
+            default:
+                break;
+            }
+        }
+    }
+}
+
+// Run before either hull or domain transform
+void TessellationPreprocess(IR::Program& program, RuntimeInfo& runtime_info) {
+    TessellationDataConstantBuffer tess_constants;
+    Shader::Info& info = program.info;
+    // Find the TessellationDataConstantBuffer V#
+    for (IR::Block* block : program.blocks) {
+        for (IR::Inst& inst : block->Instructions()) {
+            auto found_tess_consts_sharp = [&]() -> bool {
+                switch (inst.GetOpcode()) {
+                case IR::Opcode::LoadSharedU32:
+                case IR::Opcode::LoadSharedU64:
+                case IR::Opcode::LoadSharedU128:
+                case IR::Opcode::WriteSharedU32:
+                case IR::Opcode::WriteSharedU64:
+                case IR::Opcode::WriteSharedU128: {
+                    IR::Value addr = inst.Arg(0);
+                    auto read_const_buffer = IR::BreadthFirstSearch(
+                        addr, [](IR::Inst* maybe_tess_const) -> std::optional<IR::Inst*> {
+                            if (maybe_tess_const->GetOpcode() == IR::Opcode::ReadConstBuffer) {
+                                return maybe_tess_const;
+                            }
+                            return std::nullopt;
+                        });
+                    if (read_const_buffer) {
+                        auto sharp_location = FindTessConstantSharp(read_const_buffer.value());
+                        if (sharp_location) {
+                            if (info.tess_consts_dword_offset >= 0) {
+                                // Its possible theres a readconstbuffer that contributes to an
+                                // LDS address and isnt a TessConstant V# read. Could improve on
+                                // this somehow
+                                ASSERT_MSG(static_cast<s32>(sharp_location->dword_off) ==
+                                                   info.tess_consts_dword_offset &&
+                                               sharp_location->ptr_base ==
+                                                   info.tess_consts_ptr_base,
+                                           "TessConstants V# is ambiguous");
+                            }
+                            InitTessConstants(sharp_location->ptr_base,
+                                              static_cast<s32>(sharp_location->dword_off), info,
+                                              runtime_info, tess_constants);
+                            return true;
+                        }
+                        UNREACHABLE_MSG("Failed to match tess constant sharp");
+                    }
+                    return false;
+                }
+                default:
+                    return false;
+                }
+            }();
+
+            if (found_tess_consts_sharp) {
+                break;
+            }
+        }
+    }
+
+    ASSERT(info.tess_consts_dword_offset >= 0);
+
+    TessConstantUseWalker walker;
+
+    for (IR::Block* block : program.blocks) {
+        for (IR::Inst& inst : block->Instructions()) {
+            if (inst.GetOpcode() == IR::Opcode::ReadConstBuffer) {
+                auto sharp_location = FindTessConstantSharp(&inst);
+                if (sharp_location && sharp_location->ptr_base == info.tess_consts_ptr_base &&
+                    sharp_location->dword_off == info.tess_consts_dword_offset) {
+                    // The shader is reading from the TessConstants V#
+                    IR::Value index = inst.Arg(1);
+
+                    ASSERT_MSG(index.IsImmediate(),
+                               "Tessellation constant read with dynamic index");
+                    u32 off_dw = index.U32();
+                    ASSERT(off_dw <=
+                           static_cast<u32>(TessConstantAttribute::FirstEdgeTessFactorIndex));
+
+                    auto tess_const_attr = static_cast<TessConstantAttribute>(off_dw);
+                    switch (tess_const_attr) {
+                    case TessConstantAttribute::LsStride:
+                        // If not, we may need to make this runtime state for TES
+                        ASSERT(info.l_stage == LogicalStage::TessellationControl);
+                        inst.ReplaceUsesWithAndRemove(IR::Value(tess_constants.ls_stride));
+                        break;
+                    case TessConstantAttribute::HsCpStride:
+                        inst.ReplaceUsesWithAndRemove(IR::Value(tess_constants.hs_cp_stride));
+                        break;
+                    case TessConstantAttribute::HsNumPatch:
+                    case TessConstantAttribute::HsOutputBase:
+                    case TessConstantAttribute::PatchConstBase:
+                        walker.MarkTessAttributeUsers(&inst, tess_const_attr);
+                        // We should be able to safely set these to 0 so that indexing happens only
+                        // within the local patch in the recompiled Vulkan shader. This assumes
+                        // these values only contribute to address calculations for in/out
+                        // attributes in the original gcn shader.
+                        // See the explanation for why we set V2 to 0 when emitting the prologue.
+                        inst.ReplaceUsesWithAndRemove(IR::Value(0u));
+                        break;
+                    case Shader::TessConstantAttribute::PatchConstSize:
+                    case Shader::TessConstantAttribute::PatchOutputSize:
+                    case Shader::TessConstantAttribute::OffChipTessellationFactorThreshold:
+                    case Shader::TessConstantAttribute::FirstEdgeTessFactorIndex:
+                        // May need to replace PatchConstSize and PatchOutputSize with 0
+                        break;
+                    default:
+                        UNREACHABLE_MSG("Read past end of TessConstantsBuffer");
+                    }
+                }
+            }
+        }
+    }
+
+    // These pattern matching are neccessary for now unless we support dynamic indexing of
+    // PatchConst attributes and tess factors. PatchConst should be easy, turn those into a single
+    // vec4 array like in/out attrs. Not sure about tess factors.
+    if (info.l_stage == LogicalStage::TessellationControl) {
+        // Replace the BFEs on V1 (packed with patch id within VGT and output cp id)
+        for (IR::Block* block : program.blocks) {
+            for (auto it = block->Instructions().begin(); it != block->Instructions().end(); it++) {
+                IR::Inst& inst = *it;
+                if (M_BITFIELDUEXTRACT(
+                        M_GETATTRIBUTEU32(MatchAttribute(IR::Attribute::PackedHullInvocationInfo),
+                                          MatchIgnore()),
+                        MatchU32(0), MatchU32(8))
+                        .Match(IR::Value{&inst})) {
+                    IR::IREmitter emit(*block, it);
+                    // This is the patch id within the VGT, not the actual PrimitiveId
+                    // in the draw
+                    IR::Value replacement(0u);
+                    inst.ReplaceUsesWithAndRemove(replacement);
+                } else if (M_BITFIELDUEXTRACT(
+                               M_GETATTRIBUTEU32(
+                                   MatchAttribute(IR::Attribute::PackedHullInvocationInfo),
+                                   MatchIgnore()),
+                               MatchU32(8), MatchU32(5))
+                               .Match(IR::Value{&inst})) {
+                    IR::IREmitter ir(*block, it);
+                    IR::Value replacement;
+                    if (runtime_info.hs_info.IsPassthrough()) {
+                        // Deal with annoying pattern in BB where InvocationID use makes no
+                        // sense (in addr calculation for patchconst or tess factor write)
+                        replacement = ir.Imm32(0);
+                    } else {
+                        replacement = ir.GetAttributeU32(IR::Attribute::InvocationId);
+                    }
+                    inst.ReplaceUsesWithAndRemove(replacement);
+                }
+            }
+        }
+    }
+}
+
+} // namespace Shader::Optimization
diff --git a/src/shader_recompiler/ir/passes/ir_passes.h b/src/shader_recompiler/ir/passes/ir_passes.h
index 7bd47992..61f43e7e 100644
--- a/src/shader_recompiler/ir/passes/ir_passes.h
+++ b/src/shader_recompiler/ir/passes/ir_passes.h
@@ -18,5 +18,8 @@ void CollectShaderInfoPass(IR::Program& program);
 void LowerSharedMemToRegisters(IR::Program& program);
 void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtime_info,
                            Stage stage);
+void TessellationPreprocess(IR::Program& program, RuntimeInfo& runtime_info);
+void HullShaderTransform(IR::Program& program, RuntimeInfo& runtime_info);
+void DomainShaderTransform(IR::Program& program, RuntimeInfo& runtime_info);
 
 } // namespace Shader::Optimization
diff --git a/src/shader_recompiler/ir/passes/ring_access_elimination.cpp b/src/shader_recompiler/ir/passes/ring_access_elimination.cpp
index eb1be296..d6f1efb1 100644
--- a/src/shader_recompiler/ir/passes/ring_access_elimination.cpp
+++ b/src/shader_recompiler/ir/passes/ring_access_elimination.cpp
@@ -1,11 +1,13 @@
 // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
 // SPDX-License-Identifier: GPL-2.0-or-later
 
+#include "common/assert.h"
 #include "shader_recompiler/ir/ir_emitter.h"
 #include "shader_recompiler/ir/opcodes.h"
 #include "shader_recompiler/ir/program.h"
 #include "shader_recompiler/ir/reg.h"
 #include "shader_recompiler/recompiler.h"
+#include "shader_recompiler/runtime_info.h"
 
 namespace Shader::Optimization {
 
@@ -23,12 +25,45 @@ void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtim
     };
 
     switch (stage) {
+    case Stage::Local: {
+        ForEachInstruction([=](IR::IREmitter& ir, IR::Inst& inst) {
+            const auto opcode = inst.GetOpcode();
+            switch (opcode) {
+            case IR::Opcode::WriteSharedU64:
+            case IR::Opcode::WriteSharedU32: {
+                bool is_composite = opcode == IR::Opcode::WriteSharedU64;
+                u32 num_components = opcode == IR::Opcode::WriteSharedU32 ? 1 : 2;
+
+                u32 offset = 0;
+                const auto* addr = inst.Arg(0).InstRecursive();
+                if (addr->GetOpcode() == IR::Opcode::IAdd32) {
+                    ASSERT(addr->Arg(1).IsImmediate());
+                    offset = addr->Arg(1).U32();
+                }
+                IR::Value data = inst.Arg(1).Resolve();
+                for (s32 i = 0; i < num_components; i++) {
+                    const auto attrib = IR::Attribute::Param0 + (offset / 16);
+                    const auto comp = (offset / 4) % 4;
+                    const IR::U32 value = IR::U32{is_composite ? data.Inst()->Arg(i) : data};
+                    ir.SetAttribute(attrib, ir.BitCast<IR::F32, IR::U32>(value), comp);
+                    offset += 4;
+                }
+                inst.Invalidate();
+                break;
+            }
+            default:
+                break;
+            }
+        });
+        break;
+    }
     case Stage::Export: {
         ForEachInstruction([=](IR::IREmitter& ir, IR::Inst& inst) {
             const auto opcode = inst.GetOpcode();
             switch (opcode) {
             case IR::Opcode::StoreBufferU32: {
-                if (!inst.Flags<IR::BufferInstInfo>().ring_access) {
+                const auto info = inst.Flags<IR::BufferInstInfo>();
+                if (!info.system_coherent || !info.globally_coherent) {
                     break;
                 }
 
@@ -61,12 +96,13 @@ void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtim
             const auto opcode = inst.GetOpcode();
             switch (opcode) {
             case IR::Opcode::LoadBufferU32: {
-                if (!inst.Flags<IR::BufferInstInfo>().ring_access) {
+                const auto info = inst.Flags<IR::BufferInstInfo>();
+                if (!info.system_coherent || !info.globally_coherent) {
                     break;
                 }
 
                 const auto shl_inst = inst.Arg(1).TryInstRecursive();
-                const auto vertex_id = shl_inst->Arg(0).Resolve().U32() >> 2;
+                const auto vertex_id = ir.Imm32(shl_inst->Arg(0).Resolve().U32() >> 2);
                 const auto offset = inst.Arg(1).TryInstRecursive()->Arg(1);
                 const auto bucket = offset.Resolve().U32() / 256u;
                 const auto attrib = bucket < 4 ? IR::Attribute::Position0
@@ -80,7 +116,8 @@ void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtim
                 break;
             }
             case IR::Opcode::StoreBufferU32: {
-                if (!inst.Flags<IR::BufferInstInfo>().ring_access) {
+                const auto buffer_info = inst.Flags<IR::BufferInstInfo>();
+                if (!buffer_info.system_coherent || !buffer_info.globally_coherent) {
                     break;
                 }
 
diff --git a/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp b/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp
index 8b93d72e..c34b59b8 100644
--- a/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp
+++ b/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp
@@ -17,6 +17,22 @@ void Visit(Info& info, IR::Inst& inst) {
     case IR::Opcode::GetUserData:
         info.ud_mask.Set(inst.Arg(0).ScalarReg());
         break;
+    case IR::Opcode::SetPatch: {
+        const auto patch = inst.Arg(0).Patch();
+        if (patch <= IR::Patch::TessellationLodBottom) {
+            info.stores_tess_level_outer = true;
+        } else if (patch <= IR::Patch::TessellationLodInteriorV) {
+            info.stores_tess_level_inner = true;
+        } else {
+            info.uses_patches |= 1U << IR::GenericPatchIndex(patch);
+        }
+        break;
+    }
+    case IR::Opcode::GetPatch: {
+        const auto patch = inst.Arg(0).Patch();
+        info.uses_patches |= 1U << IR::GenericPatchIndex(patch);
+        break;
+    }
     case IR::Opcode::LoadSharedU32:
     case IR::Opcode::LoadSharedU64:
     case IR::Opcode::WriteSharedU32:
diff --git a/src/shader_recompiler/ir/patch.cpp b/src/shader_recompiler/ir/patch.cpp
new file mode 100644
index 00000000..2485bc5b
--- /dev/null
+++ b/src/shader_recompiler/ir/patch.cpp
@@ -0,0 +1,28 @@
+// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include "shader_recompiler/ir/patch.h"
+
+namespace Shader::IR {
+
+std::string NameOf(Patch patch) {
+    switch (patch) {
+    case Patch::TessellationLodLeft:
+        return "TessellationLodLeft";
+    case Patch::TessellationLodTop:
+        return "TessellationLodTop";
+    case Patch::TessellationLodRight:
+        return "TessellationLodRight";
+    case Patch::TessellationLodBottom:
+        return "TessellationLodBottom";
+    case Patch::TessellationLodInteriorU:
+        return "TessellationLodInteriorU";
+    case Patch::TessellationLodInteriorV:
+        return "TessellationLodInteriorV";
+    default:
+        const u32 index = u32(patch) - u32(Patch::Component0);
+        return fmt::format("Component{}", index);
+    }
+}
+
+} // namespace Shader::IR
diff --git a/src/shader_recompiler/ir/patch.h b/src/shader_recompiler/ir/patch.h
new file mode 100644
index 00000000..65d2192e
--- /dev/null
+++ b/src/shader_recompiler/ir/patch.h
@@ -0,0 +1,173 @@
+// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#pragma once
+
+#include <fmt/format.h>
+#include "common/types.h"
+
+namespace Shader::IR {
+
+enum class Patch : u64 {
+    TessellationLodLeft,
+    TessellationLodTop,
+    TessellationLodRight,
+    TessellationLodBottom,
+    TessellationLodInteriorU,
+    TessellationLodInteriorV,
+    Component0,
+    Component1,
+    Component2,
+    Component3,
+    Component4,
+    Component5,
+    Component6,
+    Component7,
+    Component8,
+    Component9,
+    Component10,
+    Component11,
+    Component12,
+    Component13,
+    Component14,
+    Component15,
+    Component16,
+    Component17,
+    Component18,
+    Component19,
+    Component20,
+    Component21,
+    Component22,
+    Component23,
+    Component24,
+    Component25,
+    Component26,
+    Component27,
+    Component28,
+    Component29,
+    Component30,
+    Component31,
+    Component32,
+    Component33,
+    Component34,
+    Component35,
+    Component36,
+    Component37,
+    Component38,
+    Component39,
+    Component40,
+    Component41,
+    Component42,
+    Component43,
+    Component44,
+    Component45,
+    Component46,
+    Component47,
+    Component48,
+    Component49,
+    Component50,
+    Component51,
+    Component52,
+    Component53,
+    Component54,
+    Component55,
+    Component56,
+    Component57,
+    Component58,
+    Component59,
+    Component60,
+    Component61,
+    Component62,
+    Component63,
+    Component64,
+    Component65,
+    Component66,
+    Component67,
+    Component68,
+    Component69,
+    Component70,
+    Component71,
+    Component72,
+    Component73,
+    Component74,
+    Component75,
+    Component76,
+    Component77,
+    Component78,
+    Component79,
+    Component80,
+    Component81,
+    Component82,
+    Component83,
+    Component84,
+    Component85,
+    Component86,
+    Component87,
+    Component88,
+    Component89,
+    Component90,
+    Component91,
+    Component92,
+    Component93,
+    Component94,
+    Component95,
+    Component96,
+    Component97,
+    Component98,
+    Component99,
+    Component100,
+    Component101,
+    Component102,
+    Component103,
+    Component104,
+    Component105,
+    Component106,
+    Component107,
+    Component108,
+    Component109,
+    Component110,
+    Component111,
+    Component112,
+    Component113,
+    Component114,
+    Component115,
+    Component116,
+    Component117,
+    Component118,
+    Component119,
+};
+static_assert(static_cast<u64>(Patch::Component119) == 125);
+
+constexpr bool IsGeneric(Patch patch) noexcept {
+    return patch >= Patch::Component0 && patch <= Patch::Component119;
+}
+
+constexpr Patch PatchFactor(u32 index) {
+    return static_cast<Patch>(index);
+}
+
+constexpr Patch PatchGeneric(u32 index) {
+    return static_cast<Patch>(static_cast<u32>(Patch::Component0) + index);
+}
+
+constexpr u32 GenericPatchIndex(Patch patch) {
+    return (static_cast<u32>(patch) - static_cast<u32>(Patch::Component0)) / 4;
+}
+
+constexpr u32 GenericPatchElement(Patch patch) {
+    return (static_cast<u32>(patch) - static_cast<u32>(Patch::Component0)) % 4;
+}
+
+[[nodiscard]] std::string NameOf(Patch patch);
+
+} // namespace Shader::IR
+
+template <>
+struct fmt::formatter<Shader::IR::Patch> {
+    constexpr auto parse(format_parse_context& ctx) {
+        return ctx.begin();
+    }
+    auto format(const Shader::IR::Patch patch, format_context& ctx) const {
+        return fmt::format_to(ctx.out(), "{}", Shader::IR::NameOf(patch));
+    }
+};
diff --git a/src/shader_recompiler/ir/pattern_matching.h b/src/shader_recompiler/ir/pattern_matching.h
new file mode 100644
index 00000000..1279f14c
--- /dev/null
+++ b/src/shader_recompiler/ir/pattern_matching.h
@@ -0,0 +1,127 @@
+// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include "shader_recompiler/ir/attribute.h"
+#include "shader_recompiler/ir/value.h"
+
+namespace Shader::Optimiation::PatternMatching {
+
+// Attempt at pattern matching for Insts and Values
+// Needs improvement, mostly a convenience
+
+template <typename Derived>
+struct MatchObject {
+    inline bool Match(IR::Value v) {
+        return static_cast<Derived*>(this)->Match(v);
+    }
+};
+
+struct MatchValue : MatchObject<MatchValue> {
+    MatchValue(IR::Value& return_val_) : return_val(return_val_) {}
+
+    inline bool Match(IR::Value v) {
+        return_val = v;
+        return true;
+    }
+
+private:
+    IR::Value& return_val;
+};
+
+struct MatchIgnore : MatchObject<MatchIgnore> {
+    MatchIgnore() {}
+
+    inline bool Match(IR::Value v) {
+        return true;
+    }
+};
+
+struct MatchImm : MatchObject<MatchImm> {
+    MatchImm(IR::Value& v) : return_val(v) {}
+
+    inline bool Match(IR::Value v) {
+        if (!v.IsImmediate()) {
+            return false;
+        }
+
+        return_val = v;
+        return true;
+    }
+
+private:
+    IR::Value& return_val;
+};
+
+struct MatchAttribute : MatchObject<MatchAttribute> {
+    MatchAttribute(IR::Attribute attribute_) : attribute(attribute_) {}
+
+    inline bool Match(IR::Value v) {
+        return v.Type() == IR::Type::Attribute && v.Attribute() == attribute;
+    }
+
+private:
+    IR::Attribute attribute;
+};
+
+struct MatchU32 : MatchObject<MatchU32> {
+    MatchU32(u32 imm_) : imm(imm_) {}
+
+    inline bool Match(IR::Value v) {
+        return v.IsImmediate() && v.Type() == IR::Type::U32 && v.U32() == imm;
+    }
+
+private:
+    u32 imm;
+};
+
+template <IR::Opcode opcode, typename... Args>
+struct MatchInstObject : MatchObject<MatchInstObject<opcode>> {
+    static_assert(sizeof...(Args) == IR::NumArgsOf(opcode));
+    MatchInstObject(Args&&... args) : pattern(std::forward_as_tuple(args...)) {}
+
+    inline bool Match(IR::Value v) {
+        IR::Inst* inst = v.TryInstRecursive();
+        if (!inst || inst->GetOpcode() != opcode) {
+            return false;
+        }
+
+        bool matched = true;
+
+        [&]<std::size_t... Is>(std::index_sequence<Is...>) {
+            ((matched = matched && std::get<Is>(pattern).Match(inst->Arg(Is))), ...);
+        }(std::make_index_sequence<sizeof...(Args)>{});
+
+        return matched;
+    }
+
+private:
+    using MatchArgs = std::tuple<Args&...>;
+    MatchArgs pattern;
+};
+
+template <IR::Opcode opcode, typename... Args>
+inline auto MakeInstPattern(Args&&... args) {
+    return MatchInstObject<opcode, Args...>(std::forward<Args>(args)...);
+}
+
+// Conveniences. TODO probably simpler way of doing this
+#define M_READCONST(...) MakeInstPattern<IR::Opcode::ReadConst>(__VA_ARGS__)
+#define M_GETUSERDATA(...) MakeInstPattern<IR::Opcode::GetUserData>(__VA_ARGS__)
+#define M_BITFIELDUEXTRACT(...) MakeInstPattern<IR::Opcode::BitFieldUExtract>(__VA_ARGS__)
+#define M_BITFIELDSEXTRACT(...) MakeInstPattern<IR::Opcode::BitFieldSExtract>(__VA_ARGS__)
+#define M_GETATTRIBUTEU32(...) MakeInstPattern<IR::Opcode::GetAttributeU32>(__VA_ARGS__)
+#define M_UMOD32(...) MakeInstPattern<IR::Opcode::UMod32>(__VA_ARGS__)
+#define M_SHIFTRIGHTLOGICAL32(...) MakeInstPattern<IR::Opcode::ShiftRightLogical32>(__VA_ARGS__)
+#define M_IADD32(...) MakeInstPattern<IR::Opcode::IAdd32>(__VA_ARGS__)
+#define M_IMUL32(...) MakeInstPattern<IR::Opcode::IMul32>(__VA_ARGS__)
+#define M_BITWISEAND32(...) MakeInstPattern<IR::Opcode::BitwiseAnd32>(__VA_ARGS__)
+#define M_GETTESSGENERICATTRIBUTE(...)                                                             \
+    MakeInstPattern<IR::Opcode::GetTessGenericAttribute>(__VA_ARGS__)
+#define M_SETTCSGENERICATTRIBUTE(...)                                                              \
+    MakeInstPattern<IR::Opcode::SetTcsGenericAttribute>(__VA_ARGS__)
+#define M_COMPOSITECONSTRUCTU32X2(...)                                                             \
+    MakeInstPattern<IR::Opcode::CompositeConstructU32x2>(__VA_ARGS__)
+#define M_COMPOSITECONSTRUCTU32X4(...)                                                             \
+    MakeInstPattern<IR::Opcode::CompositeConstructU32x4>(__VA_ARGS__)
+
+} // namespace Shader::Optimiation::PatternMatching
\ No newline at end of file
diff --git a/src/shader_recompiler/ir/reg.h b/src/shader_recompiler/ir/reg.h
index ca2e9ceb..19e0da3d 100644
--- a/src/shader_recompiler/ir/reg.h
+++ b/src/shader_recompiler/ir/reg.h
@@ -49,7 +49,8 @@ union BufferInstInfo {
     BitField<0, 1, u32> index_enable;
     BitField<1, 1, u32> offset_enable;
     BitField<2, 12, u32> inst_offset;
-    BitField<14, 1, u32> ring_access; // global + system coherency
+    BitField<14, 1, u32> system_coherent;
+    BitField<15, 1, u32> globally_coherent;
 };
 
 enum class ScalarReg : u32 {
diff --git a/src/shader_recompiler/ir/type.h b/src/shader_recompiler/ir/type.h
index ec855a77..0f043fb6 100644
--- a/src/shader_recompiler/ir/type.h
+++ b/src/shader_recompiler/ir/type.h
@@ -15,7 +15,7 @@ enum class Type {
     ScalarReg = 1 << 1,
     VectorReg = 1 << 2,
     Attribute = 1 << 3,
-    SystemValue = 1 << 4,
+    Patch = 1 << 4,
     U1 = 1 << 5,
     U8 = 1 << 6,
     U16 = 1 << 7,
diff --git a/src/shader_recompiler/ir/value.cpp b/src/shader_recompiler/ir/value.cpp
index 889e9955..8826b80f 100644
--- a/src/shader_recompiler/ir/value.cpp
+++ b/src/shader_recompiler/ir/value.cpp
@@ -16,6 +16,8 @@ Value::Value(IR::VectorReg reg) noexcept : type{Type::VectorReg}, vreg{reg} {}
 
 Value::Value(IR::Attribute value) noexcept : type{Type::Attribute}, attribute{value} {}
 
+Value::Value(IR::Patch patch) noexcept : type{Type::Patch}, patch{patch} {}
+
 Value::Value(bool value) noexcept : type{Type::U1}, imm_u1{value} {}
 
 Value::Value(u8 value) noexcept : type{Type::U8}, imm_u8{value} {}
diff --git a/src/shader_recompiler/ir/value.h b/src/shader_recompiler/ir/value.h
index dbe8b5cc..ed1e5536 100644
--- a/src/shader_recompiler/ir/value.h
+++ b/src/shader_recompiler/ir/value.h
@@ -16,6 +16,7 @@
 #include "shader_recompiler/exception.h"
 #include "shader_recompiler/ir/attribute.h"
 #include "shader_recompiler/ir/opcodes.h"
+#include "shader_recompiler/ir/patch.h"
 #include "shader_recompiler/ir/reg.h"
 #include "shader_recompiler/ir/type.h"
 
@@ -34,6 +35,7 @@ public:
     explicit Value(IR::ScalarReg reg) noexcept;
     explicit Value(IR::VectorReg reg) noexcept;
     explicit Value(IR::Attribute value) noexcept;
+    explicit Value(IR::Patch patch) noexcept;
     explicit Value(bool value) noexcept;
     explicit Value(u8 value) noexcept;
     explicit Value(u16 value) noexcept;
@@ -56,6 +58,7 @@ public:
     [[nodiscard]] IR::ScalarReg ScalarReg() const;
     [[nodiscard]] IR::VectorReg VectorReg() const;
     [[nodiscard]] IR::Attribute Attribute() const;
+    [[nodiscard]] IR::Patch Patch() const;
     [[nodiscard]] bool U1() const;
     [[nodiscard]] u8 U8() const;
     [[nodiscard]] u16 U16() const;
@@ -75,6 +78,7 @@ private:
         IR::ScalarReg sreg;
         IR::VectorReg vreg;
         IR::Attribute attribute;
+        IR::Patch patch;
         bool imm_u1;
         u8 imm_u8;
         u16 imm_u16;
@@ -330,6 +334,11 @@ inline IR::Attribute Value::Attribute() const {
     return attribute;
 }
 
+inline IR::Patch Value::Patch() const {
+    DEBUG_ASSERT(type == Type::Patch);
+    return patch;
+}
+
 inline bool Value::U1() const {
     if (IsIdentity()) {
         return inst->Arg(0).U1();
diff --git a/src/shader_recompiler/recompiler.cpp b/src/shader_recompiler/recompiler.cpp
index 64f842c4..ad57adb6 100644
--- a/src/shader_recompiler/recompiler.cpp
+++ b/src/shader_recompiler/recompiler.cpp
@@ -1,6 +1,9 @@
 // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
 // SPDX-License-Identifier: GPL-2.0-or-later
 
+#include "common/config.h"
+#include "common/io_file.h"
+#include "common/path_util.h"
 #include "shader_recompiler/frontend/control_flow_graph.h"
 #include "shader_recompiler/frontend/decode.h"
 #include "shader_recompiler/frontend/structured_control_flow.h"
@@ -29,7 +32,7 @@ IR::BlockList GenerateBlocks(const IR::AbstractSyntaxList& syntax_list) {
 }
 
 IR::Program TranslateProgram(std::span<const u32> code, Pools& pools, Info& info,
-                             const RuntimeInfo& runtime_info, const Profile& profile) {
+                             RuntimeInfo& runtime_info, const Profile& profile) {
     // Ensure first instruction is expected.
     constexpr u32 token_mov_vcchi = 0xBEEB03FF;
     if (code[0] != token_mov_vcchi) {
@@ -60,12 +63,29 @@ IR::Program TranslateProgram(std::span<const u32> code, Pools& pools, Info& info
     program.post_order_blocks = Shader::IR::PostOrder(program.syntax_list.front());
 
     // Run optimization passes
+    const auto stage = program.info.stage;
+
     Shader::Optimization::SsaRewritePass(program.post_order_blocks);
+    Shader::Optimization::IdentityRemovalPass(program.blocks);
+    if (info.l_stage == LogicalStage::TessellationControl) {
+        // Tess passes require previous const prop passes for now (for simplicity). TODO allow
+        // fine grained folding or opportunistic folding we set an operand to an immediate
+        Shader::Optimization::ConstantPropagationPass(program.post_order_blocks);
+        Shader::Optimization::TessellationPreprocess(program, runtime_info);
+        Shader::Optimization::ConstantPropagationPass(program.post_order_blocks);
+        Shader::Optimization::HullShaderTransform(program, runtime_info);
+    } else if (info.l_stage == LogicalStage::TessellationEval) {
+        Shader::Optimization::ConstantPropagationPass(program.post_order_blocks);
+        Shader::Optimization::TessellationPreprocess(program, runtime_info);
+        Shader::Optimization::ConstantPropagationPass(program.post_order_blocks);
+        Shader::Optimization::DomainShaderTransform(program, runtime_info);
+    }
     Shader::Optimization::ConstantPropagationPass(program.post_order_blocks);
-    if (program.info.stage != Stage::Compute) {
+    Shader::Optimization::RingAccessElimination(program, runtime_info, stage);
+    if (stage != Stage::Compute) {
         Shader::Optimization::LowerSharedMemToRegisters(program);
     }
-    Shader::Optimization::RingAccessElimination(program, runtime_info, program.info.stage);
+    Shader::Optimization::ConstantPropagationPass(program.post_order_blocks);
     Shader::Optimization::FlattenExtendedUserdataPass(program);
     Shader::Optimization::ResourceTrackingPass(program);
     Shader::Optimization::IdentityRemovalPass(program.blocks);
diff --git a/src/shader_recompiler/recompiler.h b/src/shader_recompiler/recompiler.h
index f8acf6c9..8180c29b 100644
--- a/src/shader_recompiler/recompiler.h
+++ b/src/shader_recompiler/recompiler.h
@@ -28,6 +28,6 @@ struct Pools {
 };
 
 [[nodiscard]] IR::Program TranslateProgram(std::span<const u32> code, Pools& pools, Info& info,
-                                           const RuntimeInfo& runtime_info, const Profile& profile);
+                                           RuntimeInfo& runtime_info, const Profile& profile);
 
 } // namespace Shader
diff --git a/src/shader_recompiler/runtime_info.h b/src/shader_recompiler/runtime_info.h
index 4c779a36..23e23c11 100644
--- a/src/shader_recompiler/runtime_info.h
+++ b/src/shader_recompiler/runtime_info.h
@@ -7,6 +7,7 @@
 #include <span>
 #include <boost/container/static_vector.hpp>
 #include "common/types.h"
+#include "shader_recompiler/frontend/tessellation.h"
 #include "video_core/amdgpu/liverpool.h"
 #include "video_core/amdgpu/types.h"
 
@@ -21,12 +22,31 @@ enum class Stage : u32 {
     Local,
     Compute,
 };
-constexpr u32 MaxStageTypes = 7;
+
+// Vertex intentionally comes after TCS/TES due to order of compilation
+enum class LogicalStage : u32 {
+    Fragment,
+    TessellationControl,
+    TessellationEval,
+    Vertex,
+    Geometry,
+    Compute,
+    NumLogicalStages
+};
+
+constexpr u32 MaxStageTypes = static_cast<u32>(LogicalStage::NumLogicalStages);
 
 [[nodiscard]] constexpr Stage StageFromIndex(size_t index) noexcept {
     return static_cast<Stage>(index);
 }
 
+struct LocalRuntimeInfo {
+    u32 ls_stride;
+    bool links_with_tcs;
+
+    auto operator<=>(const LocalRuntimeInfo&) const noexcept = default;
+};
+
 struct ExportRuntimeInfo {
     u32 vertex_data_size;
 
@@ -64,9 +84,57 @@ struct VertexRuntimeInfo {
     u32 num_outputs;
     std::array<VsOutputMap, 3> outputs;
     bool emulate_depth_negative_one_to_one{};
+    // Domain
+    AmdGpu::TessellationType tess_type;
+    AmdGpu::TessellationTopology tess_topology;
+    AmdGpu::TessellationPartitioning tess_partitioning;
+    u32 hs_output_cp_stride{};
 
     bool operator==(const VertexRuntimeInfo& other) const noexcept {
-        return emulate_depth_negative_one_to_one == other.emulate_depth_negative_one_to_one;
+        return emulate_depth_negative_one_to_one == other.emulate_depth_negative_one_to_one &&
+               tess_type == other.tess_type && tess_topology == other.tess_topology &&
+               tess_partitioning == other.tess_partitioning &&
+               hs_output_cp_stride == other.hs_output_cp_stride;
+    }
+
+    void InitFromTessConstants(Shader::TessellationDataConstantBuffer& tess_constants) {
+        hs_output_cp_stride = tess_constants.hs_cp_stride;
+    }
+};
+
+struct HullRuntimeInfo {
+    // from registers
+    u32 num_input_control_points;
+    u32 num_threads;
+    AmdGpu::TessellationType tess_type;
+
+    // from tess constants buffer
+    u32 ls_stride;
+    u32 hs_output_cp_stride;
+    u32 hs_output_base;
+
+    auto operator<=>(const HullRuntimeInfo&) const noexcept = default;
+
+    // It might be possible for a non-passthrough TCS to have these conditions, in some
+    // dumb situation.
+    // In that case, it should be fine to assume passthrough and declare some extra
+    // output control points and attributes that shouldnt be read by the TES anyways
+    bool IsPassthrough() const {
+        return hs_output_base == 0 && ls_stride == hs_output_cp_stride && num_threads == 1;
+    };
+
+    // regs.ls_hs_config.hs_output_control_points contains the number of threads, which
+    // isn't exactly the number of output control points.
+    // For passthrough shaders, the register field is set to 1, so use the number of
+    // input control points
+    u32 NumOutputControlPoints() const {
+        return IsPassthrough() ? num_input_control_points : num_threads;
+    }
+
+    void InitFromTessConstants(Shader::TessellationDataConstantBuffer& tess_constants) {
+        ls_stride = tess_constants.ls_stride;
+        hs_output_cp_stride = tess_constants.hs_cp_stride;
+        hs_output_base = tess_constants.hs_output_base;
     }
 };
 
@@ -150,8 +218,10 @@ struct RuntimeInfo {
     AmdGpu::FpDenormMode fp_denorm_mode32;
     AmdGpu::FpRoundMode fp_round_mode32;
     union {
+        LocalRuntimeInfo ls_info;
         ExportRuntimeInfo es_info;
         VertexRuntimeInfo vs_info;
+        HullRuntimeInfo hs_info;
         GeometryRuntimeInfo gs_info;
         FragmentRuntimeInfo fs_info;
         ComputeRuntimeInfo cs_info;
@@ -174,6 +244,10 @@ struct RuntimeInfo {
             return es_info == other.es_info;
         case Stage::Geometry:
             return gs_info == other.gs_info;
+        case Stage::Hull:
+            return hs_info == other.hs_info;
+        case Stage::Local:
+            return ls_info == other.ls_info;
         default:
             return true;
         }
diff --git a/src/shader_recompiler/specialization.h b/src/shader_recompiler/specialization.h
index 9b5dd8fa..5799c4c9 100644
--- a/src/shader_recompiler/specialization.h
+++ b/src/shader_recompiler/specialization.h
@@ -127,6 +127,18 @@ struct StageSpecialization {
                      [](auto& spec, const auto& desc, AmdGpu::Sampler sharp) {
                          spec.force_unnormalized = sharp.force_unnormalized;
                      });
+
+        // Initialize runtime_info fields that rely on analysis in tessellation passes
+        if (info->l_stage == LogicalStage::TessellationControl ||
+            info->l_stage == LogicalStage::TessellationEval) {
+            Shader::TessellationDataConstantBuffer tess_constants;
+            info->ReadTessConstantBuffer(tess_constants);
+            if (info->l_stage == LogicalStage::TessellationControl) {
+                runtime_info.hs_info.InitFromTessConstants(tess_constants);
+            } else {
+                runtime_info.vs_info.InitFromTessConstants(tess_constants);
+            }
+        }
     }
 
     void ForEachSharp(auto& spec_list, auto& desc_list, auto&& func) {
diff --git a/src/video_core/amdgpu/liverpool.h b/src/video_core/amdgpu/liverpool.h
index 9bc3454d..b6172d37 100644
--- a/src/video_core/amdgpu/liverpool.h
+++ b/src/video_core/amdgpu/liverpool.h
@@ -143,6 +143,13 @@ struct Liverpool {
         }
     };
 
+    struct HsTessFactorClamp {
+        // I've only seen min=0.0, max=1.0 so far.
+        // TODO why is max set to 1.0? Makes no sense
+        float hs_max_tess;
+        float hs_min_tess;
+    };
+
     struct ComputeProgram {
         u32 dispatch_initiator;
         u32 dim_x;
@@ -956,6 +963,7 @@ struct Liverpool {
         enum VgtStages : u32 {
             Vs = 0u, // always enabled
             EsGs = 0xB0u,
+            LsHs = 0x45u,
         };
 
         VgtStages raw;
@@ -963,7 +971,8 @@ struct Liverpool {
         BitField<2, 1, u32> hs_en;
         BitField<3, 2, u32> es_en;
         BitField<5, 1, u32> gs_en;
-        BitField<6, 1, u32> vs_en;
+        BitField<6, 2, u32> vs_en;
+        BitField<8, 1, u32> dynamic_hs;
 
         bool IsStageEnabled(u32 stage) const {
             switch (stage) {
@@ -1059,6 +1068,28 @@ struct Liverpool {
         };
     };
 
+    union LsHsConfig {
+        u32 raw;
+        BitField<0, 8, u32> num_patches;
+        BitField<8, 6, u32> hs_input_control_points;
+        BitField<14, 6, u32> hs_output_control_points;
+    };
+
+    union TessellationConfig {
+        u32 raw;
+        BitField<0, 2, TessellationType> type;
+        BitField<2, 3, TessellationPartitioning> partitioning;
+        BitField<5, 3, TessellationTopology> topology;
+    };
+
+    union TessFactorMemoryBase {
+        u32 base;
+
+        u64 MemoryBase() const {
+            return static_cast<u64>(base) << 8;
+        }
+    };
+
     union Eqaa {
         u32 raw;
         BitField<0, 1, u32> max_anchor_samples;
@@ -1109,7 +1140,7 @@ struct Liverpool {
             ShaderProgram es_program;
             INSERT_PADDING_WORDS(0x2C);
             ShaderProgram hs_program;
-            INSERT_PADDING_WORDS(0x2C);
+            INSERT_PADDING_WORDS(0x2D48 - 0x2d08 - 20);
             ShaderProgram ls_program;
             INSERT_PADDING_WORDS(0xA4);
             ComputeProgram cs_program;
@@ -1176,7 +1207,9 @@ struct Liverpool {
             PolygonControl polygon_control;
             ViewportControl viewport_control;
             VsOutputControl vs_output_control;
-            INSERT_PADDING_WORDS(0xA290 - 0xA207 - 1);
+            INSERT_PADDING_WORDS(0xA287 - 0xA207 - 1);
+            HsTessFactorClamp hs_clamp;
+            INSERT_PADDING_WORDS(0xA290 - 0xA287 - 2);
             GsMode vgt_gs_mode;
             INSERT_PADDING_WORDS(1);
             ModeControl mode_control;
@@ -1200,9 +1233,10 @@ struct Liverpool {
             BitField<0, 11, u32> vgt_gs_max_vert_out;
             INSERT_PADDING_WORDS(0xA2D5 - 0xA2CE - 1);
             ShaderStageEnable stage_enable;
-            INSERT_PADDING_WORDS(1);
+            LsHsConfig ls_hs_config;
             u32 vgt_gs_vert_itemsize[4];
-            INSERT_PADDING_WORDS(4);
+            TessellationConfig tess_config;
+            INSERT_PADDING_WORDS(3);
             PolygonOffset poly_offset;
             GsInstances vgt_gs_instance_cnt;
             StreamOutConfig vgt_strmout_config;
@@ -1216,6 +1250,8 @@ struct Liverpool {
             INSERT_PADDING_WORDS(0xC24C - 0xC243);
             u32 num_indices;
             VgtNumInstances num_instances;
+            INSERT_PADDING_WORDS(0xC250 - 0xC24D - 1);
+            TessFactorMemoryBase vgt_tf_memory_base;
         };
         std::array<u32, NumRegs> reg_array{};
 
@@ -1431,6 +1467,7 @@ static_assert(GFX6_3D_REG_INDEX(color_control) == 0xA202);
 static_assert(GFX6_3D_REG_INDEX(clipper_control) == 0xA204);
 static_assert(GFX6_3D_REG_INDEX(viewport_control) == 0xA206);
 static_assert(GFX6_3D_REG_INDEX(vs_output_control) == 0xA207);
+static_assert(GFX6_3D_REG_INDEX(hs_clamp) == 0xA287);
 static_assert(GFX6_3D_REG_INDEX(vgt_gs_mode) == 0xA290);
 static_assert(GFX6_3D_REG_INDEX(mode_control) == 0xA292);
 static_assert(GFX6_3D_REG_INDEX(vgt_gs_out_prim_type) == 0xA29B);
@@ -1445,6 +1482,7 @@ static_assert(GFX6_3D_REG_INDEX(vgt_gsvs_ring_itemsize) == 0xA2AC);
 static_assert(GFX6_3D_REG_INDEX(vgt_gs_max_vert_out) == 0xA2CE);
 static_assert(GFX6_3D_REG_INDEX(stage_enable) == 0xA2D5);
 static_assert(GFX6_3D_REG_INDEX(vgt_gs_vert_itemsize[0]) == 0xA2D7);
+static_assert(GFX6_3D_REG_INDEX(tess_config) == 0xA2DB);
 static_assert(GFX6_3D_REG_INDEX(poly_offset) == 0xA2DF);
 static_assert(GFX6_3D_REG_INDEX(vgt_gs_instance_cnt) == 0xA2E4);
 static_assert(GFX6_3D_REG_INDEX(vgt_strmout_config) == 0xA2E5);
@@ -1456,6 +1494,7 @@ static_assert(GFX6_3D_REG_INDEX(color_buffers[0].slice) == 0xA31A);
 static_assert(GFX6_3D_REG_INDEX(color_buffers[7].base_address) == 0xA381);
 static_assert(GFX6_3D_REG_INDEX(primitive_type) == 0xC242);
 static_assert(GFX6_3D_REG_INDEX(num_instances) == 0xC24D);
+static_assert(GFX6_3D_REG_INDEX(vgt_tf_memory_base) == 0xc250);
 
 #undef GFX6_3D_REG_INDEX
 
diff --git a/src/video_core/amdgpu/types.h b/src/video_core/amdgpu/types.h
index 6b95ed91..fa849166 100644
--- a/src/video_core/amdgpu/types.h
+++ b/src/video_core/amdgpu/types.h
@@ -3,6 +3,8 @@
 
 #pragma once
 
+#include <string_view>
+#include <fmt/format.h>
 #include "common/types.h"
 
 namespace AmdGpu {
@@ -21,6 +23,69 @@ enum class FpDenormMode : u32 {
     InOutAllow = 3,
 };
 
+enum class TessellationType : u32 {
+    Isoline = 0,
+    Triangle = 1,
+    Quad = 2,
+};
+
+constexpr std::string_view NameOf(TessellationType type) {
+    switch (type) {
+    case TessellationType::Isoline:
+        return "Isoline";
+    case TessellationType::Triangle:
+        return "Triangle";
+    case TessellationType::Quad:
+        return "Quad";
+    default:
+        return "Unknown";
+    }
+}
+
+enum class TessellationPartitioning : u32 {
+    Integer = 0,
+    Pow2 = 1,
+    FracOdd = 2,
+    FracEven = 3,
+};
+
+constexpr std::string_view NameOf(TessellationPartitioning partitioning) {
+    switch (partitioning) {
+    case TessellationPartitioning::Integer:
+        return "Integer";
+    case TessellationPartitioning::Pow2:
+        return "Pow2";
+    case TessellationPartitioning::FracOdd:
+        return "FracOdd";
+    case TessellationPartitioning::FracEven:
+        return "FracEven";
+    default:
+        return "Unknown";
+    }
+}
+
+enum class TessellationTopology : u32 {
+    Point = 0,
+    Line = 1,
+    TriangleCw = 2,
+    TriangleCcw = 3,
+};
+
+constexpr std::string_view NameOf(TessellationTopology topology) {
+    switch (topology) {
+    case TessellationTopology::Point:
+        return "Point";
+    case TessellationTopology::Line:
+        return "Line";
+    case TessellationTopology::TriangleCw:
+        return "TriangleCw";
+    case TessellationTopology::TriangleCcw:
+        return "TriangleCcw";
+    default:
+        return "Unknown";
+    }
+}
+
 // See `VGT_PRIMITIVE_TYPE` description in [Radeon Sea Islands 3D/Compute Register Reference Guide]
 enum class PrimitiveType : u32 {
     None = 0,
@@ -118,3 +183,33 @@ enum class NumberFormat : u32 {
 };
 
 } // namespace AmdGpu
+
+template <>
+struct fmt::formatter<AmdGpu::TessellationType> {
+    constexpr auto parse(format_parse_context& ctx) {
+        return ctx.begin();
+    }
+    auto format(AmdGpu::TessellationType type, format_context& ctx) const {
+        return fmt::format_to(ctx.out(), "{}", AmdGpu::NameOf(type));
+    }
+};
+
+template <>
+struct fmt::formatter<AmdGpu::TessellationPartitioning> {
+    constexpr auto parse(format_parse_context& ctx) {
+        return ctx.begin();
+    }
+    auto format(AmdGpu::TessellationPartitioning type, format_context& ctx) const {
+        return fmt::format_to(ctx.out(), "{}", AmdGpu::NameOf(type));
+    }
+};
+
+template <>
+struct fmt::formatter<AmdGpu::TessellationTopology> {
+    constexpr auto parse(format_parse_context& ctx) {
+        return ctx.begin();
+    }
+    auto format(AmdGpu::TessellationTopology type, format_context& ctx) const {
+        return fmt::format_to(ctx.out(), "{}", AmdGpu::NameOf(type));
+    }
+};
diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
index 8d495ab0..a39b1837 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
@@ -16,7 +16,7 @@ ComputePipeline::ComputePipeline(const Instance& instance_, Scheduler& scheduler
                                  ComputePipelineKey compute_key_, const Shader::Info& info_,
                                  vk::ShaderModule module)
     : Pipeline{instance_, scheduler_, desc_heap_, pipeline_cache, true}, compute_key{compute_key_} {
-    auto& info = stages[int(Shader::Stage::Compute)];
+    auto& info = stages[int(Shader::LogicalStage::Compute)];
     info = &info_;
 
     const vk::PipelineShaderStageCreateInfo shader_ci = {
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
index 79553757..222ffb5a 100644
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
@@ -8,6 +8,7 @@
 
 #include "common/assert.h"
 #include "common/scope_exit.h"
+#include "shader_recompiler/runtime_info.h"
 #include "video_core/amdgpu/resource.h"
 #include "video_core/buffer_cache/buffer_cache.h"
 #include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
@@ -52,7 +53,7 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
     boost::container::static_vector<vk::VertexInputBindingDescription, 32> vertex_bindings;
     boost::container::static_vector<vk::VertexInputAttributeDescription, 32> vertex_attributes;
     if (fetch_shader && !instance.IsVertexInputDynamicState()) {
-        const auto& vs_info = GetStage(Shader::Stage::Vertex);
+        const auto& vs_info = GetStage(Shader::LogicalStage::Vertex);
         for (const auto& attrib : fetch_shader->attributes) {
             if (attrib.UsesStepRates()) {
                 // Skip attribute binding as the data will be pulled by shader
@@ -106,6 +107,10 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
                    key.primitive_restart_index == 0xFFFFFFFF,
                "Primitive restart index other than -1 is not supported yet");
 
+    const vk::PipelineTessellationStateCreateInfo tessellation_state = {
+        .patchControlPoints = key.patch_control_points,
+    };
+
     const vk::PipelineRasterizationStateCreateInfo raster_state = {
         .depthClampEnable = false,
         .rasterizerDiscardEnable = false,
@@ -204,7 +209,7 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
 
     boost::container::static_vector<vk::PipelineShaderStageCreateInfo, MaxShaderStages>
         shader_stages;
-    auto stage = u32(Shader::Stage::Vertex);
+    auto stage = u32(Shader::LogicalStage::Vertex);
     if (infos[stage]) {
         shader_stages.emplace_back(vk::PipelineShaderStageCreateInfo{
             .stage = vk::ShaderStageFlagBits::eVertex,
@@ -212,7 +217,7 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
             .pName = "main",
         });
     }
-    stage = u32(Shader::Stage::Geometry);
+    stage = u32(Shader::LogicalStage::Geometry);
     if (infos[stage]) {
         shader_stages.emplace_back(vk::PipelineShaderStageCreateInfo{
             .stage = vk::ShaderStageFlagBits::eGeometry,
@@ -220,7 +225,23 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
             .pName = "main",
         });
     }
-    stage = u32(Shader::Stage::Fragment);
+    stage = u32(Shader::LogicalStage::TessellationControl);
+    if (infos[stage]) {
+        shader_stages.emplace_back(vk::PipelineShaderStageCreateInfo{
+            .stage = vk::ShaderStageFlagBits::eTessellationControl,
+            .module = modules[stage],
+            .pName = "main",
+        });
+    }
+    stage = u32(Shader::LogicalStage::TessellationEval);
+    if (infos[stage]) {
+        shader_stages.emplace_back(vk::PipelineShaderStageCreateInfo{
+            .stage = vk::ShaderStageFlagBits::eTessellationEvaluation,
+            .module = modules[stage],
+            .pName = "main",
+        });
+    }
+    stage = u32(Shader::LogicalStage::Fragment);
     if (infos[stage]) {
         shader_stages.emplace_back(vk::PipelineShaderStageCreateInfo{
             .stage = vk::ShaderStageFlagBits::eFragment,
@@ -301,6 +322,8 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
         .pStages = shader_stages.data(),
         .pVertexInputState = !instance.IsVertexInputDynamicState() ? &vertex_input_info : nullptr,
         .pInputAssemblyState = &input_assembly,
+        .pTessellationState =
+            stages[u32(Shader::LogicalStage::TessellationControl)] ? &tessellation_state : nullptr,
         .pViewportState = &viewport_info,
         .pRasterizationState = &raster_state,
         .pMultisampleState = &multisampling,
@@ -327,7 +350,6 @@ void GraphicsPipeline::BuildDescSetLayout() {
         if (!stage) {
             continue;
         }
-
         if (stage->has_readconst) {
             bindings.push_back({
                 .binding = binding++,
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
index 703a0680..444c8517 100644
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
@@ -52,6 +52,7 @@ struct GraphicsPipelineKey {
     std::array<Liverpool::BlendControl, Liverpool::NumColorBuffers> blend_controls;
     std::array<vk::ColorComponentFlags, Liverpool::NumColorBuffers> write_masks;
     std::array<vk::Format, MaxVertexBufferCount> vertex_buffer_formats;
+    u32 patch_control_points;
 
     bool operator==(const GraphicsPipelineKey& key) const noexcept {
         return std::memcmp(this, &key, sizeof(key)) == 0;
@@ -73,7 +74,7 @@ public:
 
     bool IsEmbeddedVs() const noexcept {
         static constexpr size_t EmbeddedVsHash = 0x9b2da5cf47f8c29f;
-        return key.stage_hashes[u32(Shader::Stage::Vertex)] == EmbeddedVsHash;
+        return key.stage_hashes[u32(Shader::LogicalStage::Vertex)] == EmbeddedVsHash;
     }
 
     auto GetWriteMasks() const {
diff --git a/src/video_core/renderer_vulkan/vk_instance.cpp b/src/video_core/renderer_vulkan/vk_instance.cpp
index e844150b..76efb215 100644
--- a/src/video_core/renderer_vulkan/vk_instance.cpp
+++ b/src/video_core/renderer_vulkan/vk_instance.cpp
@@ -327,6 +327,7 @@ bool Instance::CreateDevice() {
                 .imageCubeArray = features.imageCubeArray,
                 .independentBlend = features.independentBlend,
                 .geometryShader = features.geometryShader,
+                .tessellationShader = features.tessellationShader,
                 .logicOp = features.logicOp,
                 .depthBiasClamp = features.depthBiasClamp,
                 .fillModeNonSolid = features.fillModeNonSolid,
@@ -378,6 +379,7 @@ bool Instance::CreateDevice() {
         vk::PhysicalDeviceExtendedDynamicStateFeaturesEXT{
             .extendedDynamicState = true,
         },
+        vk::PhysicalDeviceExtendedDynamicState2FeaturesEXT{},
         vk::PhysicalDeviceExtendedDynamicState3FeaturesEXT{
             .extendedDynamicState3ColorWriteMask = true,
         },
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index ff27b742..58473496 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -22,6 +22,8 @@ extern std::unique_ptr<Vulkan::Presenter> presenter;
 
 namespace Vulkan {
 
+using Shader::LogicalStage;
+using Shader::Stage;
 using Shader::VsOutput;
 
 constexpr static std::array DescriptorHeapSizes = {
@@ -78,7 +80,7 @@ void GatherVertexOutputs(Shader::VertexRuntimeInfo& info,
                    : (ctl.IsCullDistEnabled(7) ? VsOutput::CullDist7 : VsOutput::None));
 }
 
-Shader::RuntimeInfo PipelineCache::BuildRuntimeInfo(Shader::Stage stage) {
+Shader::RuntimeInfo PipelineCache::BuildRuntimeInfo(Stage stage, LogicalStage l_stage) {
     auto info = Shader::RuntimeInfo{stage};
     const auto& regs = liverpool->regs;
     const auto BuildCommon = [&](const auto& program) {
@@ -89,20 +91,47 @@ Shader::RuntimeInfo PipelineCache::BuildRuntimeInfo(Shader::Stage stage) {
         info.fp_round_mode32 = program.settings.fp_round_mode32;
     };
     switch (stage) {
-    case Shader::Stage::Export: {
+    case Stage::Local: {
+        BuildCommon(regs.ls_program);
+        if (regs.stage_enable.IsStageEnabled(static_cast<u32>(Stage::Hull))) {
+            info.ls_info.links_with_tcs = true;
+            Shader::TessellationDataConstantBuffer tess_constants;
+            const auto* pgm = regs.ProgramForStage(static_cast<u32>(Stage::Hull));
+            const auto params = Liverpool::GetParams(*pgm);
+            const auto& hull_info = program_cache.at(params.hash)->info;
+            hull_info.ReadTessConstantBuffer(tess_constants);
+            info.ls_info.ls_stride = tess_constants.ls_stride;
+        }
+        break;
+    }
+    case Stage::Hull: {
+        BuildCommon(regs.hs_program);
+        info.hs_info.num_input_control_points = regs.ls_hs_config.hs_input_control_points.Value();
+        info.hs_info.num_threads = regs.ls_hs_config.hs_output_control_points.Value();
+        info.hs_info.tess_type = regs.tess_config.type;
+
+        // We need to initialize most hs_info fields after finding the V# with tess constants
+        break;
+    }
+    case Stage::Export: {
         BuildCommon(regs.es_program);
         info.es_info.vertex_data_size = regs.vgt_esgs_ring_itemsize;
         break;
     }
-    case Shader::Stage::Vertex: {
+    case Stage::Vertex: {
         BuildCommon(regs.vs_program);
         GatherVertexOutputs(info.vs_info, regs.vs_output_control);
         info.vs_info.emulate_depth_negative_one_to_one =
             !instance.IsDepthClipControlSupported() &&
             regs.clipper_control.clip_space == Liverpool::ClipSpace::MinusWToW;
+        if (l_stage == LogicalStage::TessellationEval) {
+            info.vs_info.tess_type = regs.tess_config.type;
+            info.vs_info.tess_topology = regs.tess_config.topology;
+            info.vs_info.tess_partitioning = regs.tess_config.partitioning;
+        }
         break;
     }
-    case Shader::Stage::Geometry: {
+    case Stage::Geometry: {
         BuildCommon(regs.gs_program);
         auto& gs_info = info.gs_info;
         gs_info.output_vertices = regs.vgt_gs_max_vert_out;
@@ -121,7 +150,7 @@ Shader::RuntimeInfo PipelineCache::BuildRuntimeInfo(Shader::Stage stage) {
         DumpShader(gs_info.vs_copy, gs_info.vs_copy_hash, Shader::Stage::Vertex, 0, "copy.bin");
         break;
     }
-    case Shader::Stage::Fragment: {
+    case Stage::Fragment: {
         BuildCommon(regs.ps_program);
         info.fs_info.en_flags = regs.ps_input_ena;
         info.fs_info.addr_flags = regs.ps_input_addr;
@@ -143,7 +172,7 @@ Shader::RuntimeInfo PipelineCache::BuildRuntimeInfo(Shader::Stage stage) {
         }
         break;
     }
-    case Shader::Stage::Compute: {
+    case Stage::Compute: {
         const auto& cs_pgm = regs.cs_program;
         info.num_user_data = cs_pgm.settings.num_user_regs;
         info.num_allocated_vgprs = regs.cs_program.settings.num_vgprs * 4;
@@ -277,6 +306,11 @@ bool PipelineCache::RefreshGraphicsKey() {
     key.mrt_swizzles.fill(Liverpool::ColorBuffer::SwapMode::Standard);
     key.vertex_buffer_formats.fill(vk::Format::eUndefined);
 
+    key.patch_control_points = 0;
+    if (regs.stage_enable.hs_en.Value()) {
+        key.patch_control_points = regs.ls_hs_config.hs_input_control_points.Value();
+    }
+
     // First pass of bindings check to idenitfy formats and swizzles and pass them to rhe shader
     // recompiler.
     for (auto cb = 0u; cb < Liverpool::NumColorBuffers; ++cb) {
@@ -305,7 +339,7 @@ bool PipelineCache::RefreshGraphicsKey() {
     fetch_shader = std::nullopt;
 
     Shader::Backend::Bindings binding{};
-    const auto& TryBindStageRemap = [&](Shader::Stage stage_in, Shader::Stage stage_out) -> bool {
+    const auto& TryBindStage = [&](Shader::Stage stage_in, Shader::LogicalStage stage_out) -> bool {
         const auto stage_in_idx = static_cast<u32>(stage_in);
         const auto stage_out_idx = static_cast<u32>(stage_out);
         if (!regs.stage_enable.IsStageEnabled(stage_in_idx)) {
@@ -332,23 +366,23 @@ bool PipelineCache::RefreshGraphicsKey() {
         auto params = Liverpool::GetParams(*pgm);
         std::optional<Shader::Gcn::FetchShaderData> fetch_shader_;
         std::tie(infos[stage_out_idx], modules[stage_out_idx], fetch_shader_,
-                 key.stage_hashes[stage_out_idx]) = GetProgram(stage_in, params, binding);
+                 key.stage_hashes[stage_out_idx]) =
+            GetProgram(stage_in, stage_out, params, binding);
         if (fetch_shader_) {
             fetch_shader = fetch_shader_;
         }
         return true;
     };
 
-    const auto& TryBindStage = [&](Shader::Stage stage) { return TryBindStageRemap(stage, stage); };
-
     const auto& IsGsFeaturesSupported = [&]() -> bool {
         // These checks are temporary until all functionality is implemented.
         return !regs.vgt_gs_mode.onchip && !regs.vgt_strmout_config.raw;
     };
 
-    TryBindStage(Shader::Stage::Fragment);
+    infos.fill(nullptr);
+    TryBindStage(Stage::Fragment, LogicalStage::Fragment);
 
-    const auto* fs_info = infos[static_cast<u32>(Shader::Stage::Fragment)];
+    const auto* fs_info = infos[static_cast<u32>(LogicalStage::Fragment)];
     key.mrt_mask = fs_info ? fs_info->mrt_mask : 0u;
 
     switch (regs.stage_enable.raw) {
@@ -356,22 +390,36 @@ bool PipelineCache::RefreshGraphicsKey() {
         if (!instance.IsGeometryStageSupported() || !IsGsFeaturesSupported()) {
             return false;
         }
-        if (!TryBindStageRemap(Shader::Stage::Export, Shader::Stage::Vertex)) {
+        if (!TryBindStage(Stage::Export, LogicalStage::Vertex)) {
             return false;
         }
-        if (!TryBindStage(Shader::Stage::Geometry)) {
+        if (!TryBindStage(Stage::Geometry, LogicalStage::Geometry)) {
+            return false;
+        }
+        break;
+    }
+    case Liverpool::ShaderStageEnable::VgtStages::LsHs: {
+        if (!instance.IsTessellationSupported()) {
+            break;
+        }
+        if (!TryBindStage(Stage::Hull, LogicalStage::TessellationControl)) {
+            return false;
+        }
+        if (!TryBindStage(Stage::Vertex, LogicalStage::TessellationEval)) {
+            return false;
+        }
+        if (!TryBindStage(Stage::Local, LogicalStage::Vertex)) {
             return false;
         }
         break;
     }
     default: {
-        TryBindStage(Shader::Stage::Vertex);
-        infos[static_cast<u32>(Shader::Stage::Geometry)] = nullptr;
+        TryBindStage(Stage::Vertex, LogicalStage::Vertex);
         break;
     }
     }
 
-    const auto vs_info = infos[static_cast<u32>(Shader::Stage::Vertex)];
+    const auto vs_info = infos[static_cast<u32>(Shader::LogicalStage::Vertex)];
     if (vs_info && fetch_shader && !instance.IsVertexInputDynamicState()) {
         u32 vertex_binding = 0;
         for (const auto& attrib : fetch_shader->attributes) {
@@ -424,19 +472,18 @@ bool PipelineCache::RefreshGraphicsKey() {
     key.num_samples = num_samples;
 
     return true;
-}
+} // namespace Vulkan
 
 bool PipelineCache::RefreshComputeKey() {
     Shader::Backend::Bindings binding{};
     const auto* cs_pgm = &liverpool->regs.cs_program;
     const auto cs_params = Liverpool::GetParams(*cs_pgm);
     std::tie(infos[0], modules[0], fetch_shader, compute_key.value) =
-        GetProgram(Shader::Stage::Compute, cs_params, binding);
+        GetProgram(Shader::Stage::Compute, LogicalStage::Compute, cs_params, binding);
     return true;
 }
 
-vk::ShaderModule PipelineCache::CompileModule(Shader::Info& info,
-                                              const Shader::RuntimeInfo& runtime_info,
+vk::ShaderModule PipelineCache::CompileModule(Shader::Info& info, Shader::RuntimeInfo& runtime_info,
                                               std::span<const u32> code, size_t perm_idx,
                                               Shader::Backend::Bindings& binding) {
     LOG_INFO(Render_Vulkan, "Compiling {} shader {:#x} {}", info.stage, info.pgm_hash,
@@ -461,19 +508,19 @@ vk::ShaderModule PipelineCache::CompileModule(Shader::Info& info,
     const auto name = fmt::format("{}_{:#018x}_{}", info.stage, info.pgm_hash, perm_idx);
     Vulkan::SetObjectName(instance.GetDevice(), module, name);
     if (Config::collectShadersForDebug()) {
-        DebugState.CollectShader(name, module, spv, code, patch ? *patch : std::span<const u32>{},
-                                 is_patched);
+        DebugState.CollectShader(name, info.l_stage, module, spv, code,
+                                 patch ? *patch : std::span<const u32>{}, is_patched);
     }
     return module;
 }
 
-std::tuple<const Shader::Info*, vk::ShaderModule, std::optional<Shader::Gcn::FetchShaderData>, u64>
-PipelineCache::GetProgram(Shader::Stage stage, Shader::ShaderParams params,
-                          Shader::Backend::Bindings& binding) {
-    const auto runtime_info = BuildRuntimeInfo(stage);
+PipelineCache::Result PipelineCache::GetProgram(Stage stage, LogicalStage l_stage,
+                                                Shader::ShaderParams params,
+                                                Shader::Backend::Bindings& binding) {
+    auto runtime_info = BuildRuntimeInfo(stage, l_stage);
     auto [it_pgm, new_program] = program_cache.try_emplace(params.hash);
     if (new_program) {
-        it_pgm.value() = std::make_unique<Program>(stage, params);
+        it_pgm.value() = std::make_unique<Program>(stage, l_stage, params);
         auto& program = it_pgm.value();
         auto start = binding;
         const auto module = CompileModule(program->info, runtime_info, params.code, 0, binding);
@@ -492,7 +539,7 @@ PipelineCache::GetProgram(Shader::Stage stage, Shader::ShaderParams params,
 
     const auto it = std::ranges::find(program->modules, spec, &Program::Module::spec);
     if (it == program->modules.end()) {
-        auto new_info = Shader::Info(stage, params);
+        auto new_info = Shader::Info(stage, l_stage, params);
         module = CompileModule(new_info, runtime_info, params.code, perm_idx, binding);
         program->AddPermut(module, std::move(spec));
     } else {
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
index c5c2fc98..ec440644 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
@@ -34,11 +34,13 @@ struct Program {
         vk::ShaderModule module;
         Shader::StageSpecialization spec;
     };
+    using ModuleList = boost::container::small_vector<Module, 8>;
 
     Shader::Info info;
-    boost::container::small_vector<Module, 8> modules;
+    ModuleList modules;
 
-    explicit Program(Shader::Stage stage, Shader::ShaderParams params) : info{stage, params} {}
+    explicit Program(Shader::Stage stage, Shader::LogicalStage l_stage, Shader::ShaderParams params)
+        : info{stage, l_stage, params} {}
 
     void AddPermut(vk::ShaderModule module, const Shader::StageSpecialization&& spec) {
         modules.emplace_back(module, std::move(spec));
@@ -55,10 +57,10 @@ public:
 
     const ComputePipeline* GetComputePipeline();
 
-    std::tuple<const Shader::Info*, vk::ShaderModule, std::optional<Shader::Gcn::FetchShaderData>,
-               u64>
-    GetProgram(Shader::Stage stage, Shader::ShaderParams params,
-               Shader::Backend::Bindings& binding);
+    using Result = std::tuple<const Shader::Info*, vk::ShaderModule,
+                              std::optional<Shader::Gcn::FetchShaderData>, u64>;
+    Result GetProgram(Shader::Stage stage, Shader::LogicalStage l_stage,
+                      Shader::ShaderParams params, Shader::Backend::Bindings& binding);
 
     std::optional<vk::ShaderModule> ReplaceShader(vk::ShaderModule module,
                                                   std::span<const u32> spv_code);
@@ -71,10 +73,10 @@ private:
                     std::string_view ext);
     std::optional<std::vector<u32>> GetShaderPatch(u64 hash, Shader::Stage stage, size_t perm_idx,
                                                    std::string_view ext);
-    vk::ShaderModule CompileModule(Shader::Info& info, const Shader::RuntimeInfo& runtime_info,
+    vk::ShaderModule CompileModule(Shader::Info& info, Shader::RuntimeInfo& runtime_info,
                                    std::span<const u32> code, size_t perm_idx,
                                    Shader::Backend::Bindings& binding);
-    Shader::RuntimeInfo BuildRuntimeInfo(Shader::Stage stage);
+    Shader::RuntimeInfo BuildRuntimeInfo(Shader::Stage stage, Shader::LogicalStage l_stage);
 
 private:
     const Instance& instance;
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_common.h b/src/video_core/renderer_vulkan/vk_pipeline_common.h
index 8c48c83f..1b13a179 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_common.h
+++ b/src/video_core/renderer_vulkan/vk_pipeline_common.h
@@ -14,9 +14,10 @@ class BufferCache;
 
 namespace Vulkan {
 
-static constexpr auto gp_stage_flags = vk::ShaderStageFlagBits::eVertex |
-                                       vk::ShaderStageFlagBits::eGeometry |
-                                       vk::ShaderStageFlagBits::eFragment;
+static constexpr auto gp_stage_flags =
+    vk::ShaderStageFlagBits::eVertex | vk::ShaderStageFlagBits::eTessellationControl |
+    vk::ShaderStageFlagBits::eTessellationEvaluation | vk::ShaderStageFlagBits::eGeometry |
+    vk::ShaderStageFlagBits::eFragment;
 
 class Instance;
 class Scheduler;
@@ -37,6 +38,7 @@ public:
     }
 
     auto GetStages() const {
+        static_assert(static_cast<u32>(Shader::LogicalStage::Compute) == Shader::MaxStageTypes - 1);
         if (is_compute) {
             return std::span{stages.cend() - 1, stages.cend()};
         } else {
@@ -44,7 +46,7 @@ public:
         }
     }
 
-    const Shader::Info& GetStage(Shader::Stage stage) const noexcept {
+    const Shader::Info& GetStage(Shader::LogicalStage stage) const noexcept {
         return *stages[u32(stage)];
     }
 
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index eb2ef360..fef4c7ec 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -4,6 +4,7 @@
 #include "common/config.h"
 #include "common/debug.h"
 #include "core/memory.h"
+#include "shader_recompiler/runtime_info.h"
 #include "video_core/amdgpu/liverpool.h"
 #include "video_core/renderer_vulkan/vk_instance.h"
 #include "video_core/renderer_vulkan/vk_rasterizer.h"
@@ -48,10 +49,6 @@ void Rasterizer::CpSync() {
 
 bool Rasterizer::FilterDraw() {
     const auto& regs = liverpool->regs;
-    // Tessellation is unsupported so skip the draw to avoid locking up the driver.
-    if (regs.primitive_type == AmdGpu::PrimitiveType::PatchPrimitive) {
-        return false;
-    }
     // There are several cases (e.g. FCE, FMask/HTile decompression) where we don't need to do an
     // actual draw hence can skip pipeline creation.
     if (regs.color_control.mode == Liverpool::ColorControl::OperationMode::EliminateFastClear) {
@@ -214,7 +211,7 @@ void Rasterizer::Draw(bool is_indexed, u32 index_offset) {
         return;
     }
 
-    const auto& vs_info = pipeline->GetStage(Shader::Stage::Vertex);
+    const auto& vs_info = pipeline->GetStage(Shader::LogicalStage::Vertex);
     const auto& fetch_shader = pipeline->GetFetchShader();
     buffer_cache.BindVertexBuffers(vs_info, fetch_shader);
     const u32 num_indices = buffer_cache.BindIndexBuffer(is_indexed, index_offset);
@@ -271,7 +268,7 @@ void Rasterizer::DrawIndirect(bool is_indexed, VAddr arg_address, u32 offset, u3
         return;
     }
 
-    const auto& vs_info = pipeline->GetStage(Shader::Stage::Vertex);
+    const auto& vs_info = pipeline->GetStage(Shader::LogicalStage::Vertex);
     const auto& fetch_shader = pipeline->GetFetchShader();
     buffer_cache.BindVertexBuffers(vs_info, fetch_shader);
     buffer_cache.BindIndexBuffer(is_indexed, 0);
@@ -326,7 +323,7 @@ void Rasterizer::DispatchDirect() {
         return;
     }
 
-    const auto& cs = pipeline->GetStage(Shader::Stage::Compute);
+    const auto& cs = pipeline->GetStage(Shader::LogicalStage::Compute);
     if (ExecuteShaderHLE(cs, liverpool->regs, *this)) {
         return;
     }
@@ -387,7 +384,7 @@ bool Rasterizer::BindResources(const Pipeline* pipeline) {
     const auto& regs = liverpool->regs;
 
     if (pipeline->IsCompute()) {
-        const auto& info = pipeline->GetStage(Shader::Stage::Compute);
+        const auto& info = pipeline->GetStage(Shader::LogicalStage::Compute);
 
         // Most of the time when a metadata is updated with a shader it gets cleared. It means
         // we can skip the whole dispatch and update the tracked state instead. Also, it is not