From dd91456b48f995b4bb9583c4345430b2d4f292eb Mon Sep 17 00:00:00 2001
From: raphaelthegreat <47210458+raphaelthegreat@users.noreply.github.com>
Date: Thu, 30 May 2024 18:07:36 +0300
Subject: [PATCH] video_core: Moar shader instruction

---
 src/core/libraries/kernel/libkernel.cpp       |  1 +
 .../libraries/kernel/memory_management.cpp    | 20 +++++++----
 src/core/libraries/kernel/memory_management.h |  9 +++++
 src/core/memory.cpp                           | 27 +++++++++++---
 src/core/memory.h                             | 20 +++++++----
 src/input/controller.h                        |  2 +-
 .../frontend/translate/translate.cpp          | 24 +++++++++++++
 .../frontend/translate/translate.h            |  6 ++++
 .../frontend/translate/vector_alu.cpp         | 35 ++++++++++++++++++-
 .../renderer_vulkan/vk_graphics_pipeline.h    |  2 +-
 .../renderer_vulkan/vk_pipeline_cache.cpp     |  1 +
 11 files changed, 127 insertions(+), 20 deletions(-)

diff --git a/src/core/libraries/kernel/libkernel.cpp b/src/core/libraries/kernel/libkernel.cpp
index cad5dd97..a8c3975e 100644
--- a/src/core/libraries/kernel/libkernel.cpp
+++ b/src/core/libraries/kernel/libkernel.cpp
@@ -207,6 +207,7 @@ void LibKernel_Register(Core::Loader::SymbolsResolver* sym) {
     LIB_FUNCTION("pO96TwzOm5E", "libkernel", 1, "libkernel", 1, 1, sceKernelGetDirectMemorySize);
     LIB_FUNCTION("L-Q3LEjIbgA", "libkernel", 1, "libkernel", 1, 1, sceKernelMapDirectMemory);
     LIB_FUNCTION("WFcfL2lzido", "libkernel", 1, "libkernel", 1, 1, sceKernelQueryMemoryProtection);
+    LIB_FUNCTION("BHouLQzh0X0", "libkernel", 1, "libkernel", 1, 1, sceKernelDirectMemoryQuery);
     LIB_FUNCTION("MBuItvba6z8", "libkernel", 1, "libkernel", 1, 1, sceKernelReleaseDirectMemory);
     LIB_FUNCTION("cQke9UuBQOk", "libkernel", 1, "libkernel", 1, 1, sceKernelMunmap);
     LIB_FUNCTION("mL8NDH86iQI", "libkernel", 1, "libkernel", 1, 1, sceKernelMapNamedFlexibleMemory);
diff --git a/src/core/libraries/kernel/memory_management.cpp b/src/core/libraries/kernel/memory_management.cpp
index 2e650273..9e540107 100644
--- a/src/core/libraries/kernel/memory_management.cpp
+++ b/src/core/libraries/kernel/memory_management.cpp
@@ -18,11 +18,6 @@ u64 PS4_SYSV_ABI sceKernelGetDirectMemorySize() {
 
 int PS4_SYSV_ABI sceKernelAllocateDirectMemory(s64 searchStart, s64 searchEnd, u64 len,
                                                u64 alignment, int memoryType, s64* physAddrOut) {
-    LOG_INFO(Kernel_Vmm,
-             "searchStart = {:#x}, searchEnd = {:#x}, len = {:#x}, alignment = {:#x}, memoryType = "
-             "{:#x}",
-             searchStart, searchEnd, len, alignment, memoryType);
-
     if (searchStart < 0 || searchEnd <= searchStart) {
         LOG_ERROR(Kernel_Vmm, "Provided address range is invalid!");
         return SCE_KERNEL_ERROR_EINVAL;
@@ -44,7 +39,12 @@ int PS4_SYSV_ABI sceKernelAllocateDirectMemory(s64 searchStart, s64 searchEnd, u
     auto* memory = Core::Memory::Instance();
     PAddr phys_addr = memory->Allocate(searchStart, searchEnd, len, alignment, memoryType);
     *physAddrOut = static_cast<s64>(phys_addr);
-    LOG_INFO(Kernel_Vmm, "physAddrOut = {:#x}", phys_addr);
+
+    LOG_INFO(Kernel_Vmm,
+             "searchStart = {:#x}, searchEnd = {:#x}, len = {:#x}, "
+             "alignment = {:#x}, memoryType = {:#x}, physAddrOut = {:#x}",
+             searchStart, searchEnd, len, alignment, memoryType, phys_addr);
+
     return SCE_OK;
 }
 
@@ -115,8 +115,16 @@ s32 PS4_SYSV_ABI sceKernelMapFlexibleMemory(void** addr_in_out, std::size_t len,
 }
 
 int PS4_SYSV_ABI sceKernelQueryMemoryProtection(void* addr, void** start, void** end, u32* prot) {
+    LOG_WARNING(Kernel_Vmm, "called");
     auto* memory = Core::Memory::Instance();
     return memory->QueryProtection(std::bit_cast<VAddr>(addr), start, end, prot);
 }
 
+int PS4_SYSV_ABI sceKernelDirectMemoryQuery(u64 offset, int flags, OrbisQueryInfo* query_info,
+                                            size_t infoSize) {
+    LOG_WARNING(Kernel_Vmm, "called");
+    auto* memory = Core::Memory::Instance();
+    return memory->DirectMemoryQuery(offset, flags == 1, query_info);
+}
+
 } // namespace Libraries::Kernel
diff --git a/src/core/libraries/kernel/memory_management.h b/src/core/libraries/kernel/memory_management.h
index c4bc338f..be0d8514 100644
--- a/src/core/libraries/kernel/memory_management.h
+++ b/src/core/libraries/kernel/memory_management.h
@@ -30,6 +30,12 @@ enum MemoryProtection : u32 {
     SCE_KERNEL_PROT_GPU_RW = 0x30     // Permit reads/writes from the GPU
 };
 
+struct OrbisQueryInfo {
+    uintptr_t start;
+    uintptr_t end;
+    int memoryType;
+};
+
 u64 PS4_SYSV_ABI sceKernelGetDirectMemorySize();
 int PS4_SYSV_ABI sceKernelAllocateDirectMemory(s64 searchStart, s64 searchEnd, u64 len,
                                                u64 alignment, int memoryType, s64* physAddrOut);
@@ -41,4 +47,7 @@ s32 PS4_SYSV_ABI sceKernelMapFlexibleMemory(void** addr_in_out, std::size_t len,
                                             int flags);
 int PS4_SYSV_ABI sceKernelQueryMemoryProtection(void* addr, void** start, void** end, u32* prot);
 
+int PS4_SYSV_ABI sceKernelDirectMemoryQuery(u64 offset, int flags, OrbisQueryInfo* query_info,
+                                            size_t infoSize);
+
 } // namespace Libraries::Kernel
diff --git a/src/core/memory.cpp b/src/core/memory.cpp
index 83758688..06fde132 100644
--- a/src/core/memory.cpp
+++ b/src/core/memory.cpp
@@ -6,6 +6,7 @@
 #include "common/assert.h"
 #include "common/scope_exit.h"
 #include "core/libraries/error_codes.h"
+#include "core/libraries/kernel/memory_management.h"
 #include "core/memory.h"
 #include "video_core/renderer_vulkan/vk_instance.h"
 
@@ -80,7 +81,7 @@ int MemoryManager::MapMemory(void** out_addr, VAddr virtual_addr, size_t size, M
     if (True(flags & MemoryMapFlags::Fixed) && True(flags & MemoryMapFlags::NoOverwrite)) {
         // This should return SCE_KERNEL_ERROR_ENOMEM but shouldn't normally happen.
         const auto& vma = FindVMA(mapped_addr)->second;
-        const u32 remaining_size = vma.base + vma.size - mapped_addr;
+        const size_t remaining_size = vma.base + vma.size - mapped_addr;
         ASSERT_MSG(vma.type == VMAType::Free && remaining_size >= size);
     }
 
@@ -131,7 +132,22 @@ int MemoryManager::QueryProtection(VAddr addr, void** start, void** end, u32* pr
     *start = reinterpret_cast<void*>(vma.base);
     *end = reinterpret_cast<void*>(vma.base + vma.size);
     *prot = static_cast<u32>(vma.prot);
-    return SCE_OK;
+    return ORBIS_OK;
+}
+
+int MemoryManager::DirectMemoryQuery(PAddr addr, bool find_next,
+                                     Libraries::Kernel::OrbisQueryInfo* out_info) {
+    const auto it = std::ranges::find_if(allocations, [&](const DirectMemoryArea& alloc) {
+        return alloc.base <= addr && addr < alloc.base + alloc.size;
+    });
+    if (it == allocations.end()) {
+        return SCE_KERNEL_ERROR_EACCES;
+    }
+
+    out_info->start = it->base;
+    out_info->end = it->base + it->size;
+    out_info->memoryType = it->memory_type;
+    return ORBIS_OK;
 }
 
 std::pair<vk::Buffer, size_t> MemoryManager::GetVulkanBuffer(VAddr addr) {
@@ -146,7 +162,8 @@ VirtualMemoryArea& MemoryManager::AddMapping(VAddr virtual_addr, size_t size) {
     ASSERT_MSG(vma_handle != vma_map.end(), "Virtual address not in vm_map");
 
     const VirtualMemoryArea& vma = vma_handle->second;
-    ASSERT_MSG(vma.type == VMAType::Free, "Adding a mapping to already mapped region");
+    ASSERT_MSG(vma.type == VMAType::Free && vma.base <= virtual_addr,
+               "Adding a mapping to already mapped region");
 
     const VAddr start_in_vma = virtual_addr - vma.base;
     const VAddr end_in_vma = start_in_vma + size;
@@ -164,7 +181,7 @@ VirtualMemoryArea& MemoryManager::AddMapping(VAddr virtual_addr, size_t size) {
     return vma_handle->second;
 }
 
-MemoryManager::VMAHandle MemoryManager::Split(VMAHandle vma_handle, u32 offset_in_vma) {
+MemoryManager::VMAHandle MemoryManager::Split(VMAHandle vma_handle, size_t offset_in_vma) {
     auto& old_vma = vma_handle->second;
     ASSERT(offset_in_vma < old_vma.size && offset_in_vma > 0);
 
@@ -199,6 +216,7 @@ MemoryManager::VMAHandle MemoryManager::MergeAdjacent(VMAHandle iter) {
 }
 
 void MemoryManager::MapVulkanMemory(VAddr addr, size_t size) {
+    return;
     const vk::Device device = instance->GetDevice();
     const auto memory_props = instance->GetPhysicalDevice().getMemoryProperties();
     void* host_pointer = reinterpret_cast<void*>(addr);
@@ -270,6 +288,7 @@ void MemoryManager::MapVulkanMemory(VAddr addr, size_t size) {
 }
 
 void MemoryManager::UnmapVulkanMemory(VAddr addr, size_t size) {
+    return;
     const auto it = mapped_memories.find(addr);
     ASSERT(it != mapped_memories.end() && it->second.buffer_size == size);
     mapped_memories.erase(it);
diff --git a/src/core/memory.h b/src/core/memory.h
index 24e38df8..ab9006a4 100644
--- a/src/core/memory.h
+++ b/src/core/memory.h
@@ -17,6 +17,10 @@ namespace Vulkan {
 class Instance;
 }
 
+namespace Libraries::Kernel {
+struct OrbisQueryInfo;
+}
+
 namespace Core {
 
 enum class MemoryProt : u32 {
@@ -77,12 +81,12 @@ struct VirtualMemoryArea {
     }
 };
 
-constexpr VAddr SYSTEM_RESERVED = 0x800000000u;
-constexpr VAddr CODE_BASE_OFFSET = 0x100000000u;
-constexpr VAddr SYSTEM_MANAGED_MIN = 0x0000040000u;
-constexpr VAddr SYSTEM_MANAGED_MAX = 0x07FFFFBFFFu;
-constexpr VAddr USER_MIN = 0x1000000000u;
-constexpr VAddr USER_MAX = 0xFBFFFFFFFFu;
+constexpr VAddr SYSTEM_RESERVED = 0x800000000ULL;
+constexpr VAddr CODE_BASE_OFFSET = 0x100000000ULL;
+constexpr VAddr SYSTEM_MANAGED_MIN = 0x0000040000ULL;
+constexpr VAddr SYSTEM_MANAGED_MAX = 0x07FFFFBFFFULL;
+constexpr VAddr USER_MIN = 0x1000000000ULL;
+constexpr VAddr USER_MAX = 0xFBFFFFFFFFULL;
 
 class MemoryManager {
     using VMAMap = std::map<VAddr, VirtualMemoryArea>;
@@ -109,6 +113,8 @@ public:
 
     int QueryProtection(VAddr addr, void** start, void** end, u32* prot);
 
+    int DirectMemoryQuery(PAddr addr, bool find_next, Libraries::Kernel::OrbisQueryInfo* out_info);
+
     std::pair<vk::Buffer, size_t> GetVulkanBuffer(VAddr addr);
 
 private:
@@ -123,7 +129,7 @@ private:
 
     VirtualMemoryArea& AddMapping(VAddr virtual_addr, size_t size);
 
-    VMAHandle Split(VMAHandle vma_handle, u32 offset_in_vma);
+    VMAHandle Split(VMAHandle vma_handle, size_t offset_in_vma);
 
     VMAHandle MergeAdjacent(VMAHandle iter);
 
diff --git a/src/input/controller.h b/src/input/controller.h
index 4819e2d7..774bbca7 100644
--- a/src/input/controller.h
+++ b/src/input/controller.h
@@ -32,7 +32,7 @@ private:
     };
 
     std::mutex m_mutex;
-    bool m_connected = false;
+    bool m_connected = true;
     State m_last_state;
     int m_connected_count = 0;
     u32 m_states_num = 0;
diff --git a/src/shader_recompiler/frontend/translate/translate.cpp b/src/shader_recompiler/frontend/translate/translate.cpp
index 6136b46a..62409152 100644
--- a/src/shader_recompiler/frontend/translate/translate.cpp
+++ b/src/shader_recompiler/frontend/translate/translate.cpp
@@ -111,6 +111,9 @@ IR::U32F32 Translator::GetSrc(const InstOperand& operand, bool force_flt) {
     case OperandField::ConstFloatNeg_1_0:
         value = ir.Imm32(-1.0f);
         break;
+    case OperandField::ConstFloatNeg_2_0:
+        value = ir.Imm32(-2.0f);
+        break;
     case OperandField::VccLo:
         value = ir.GetVccLo();
         break;
@@ -327,9 +330,30 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
         case Opcode::S_ANDN2_B64:
             translator.S_ANDN2_B64(inst);
             break;
+        case Opcode::V_SIN_F32:
+            translator.V_SIN_F32(inst);
+            break;
+        case Opcode::V_LOG_F32:
+            translator.V_LOG_F32(inst);
+            break;
+        case Opcode::V_EXP_F32:
+            translator.V_EXP_F32(inst);
+            break;
+        case Opcode::V_SQRT_F32:
+            translator.V_SQRT_F32(inst);
+            break;
+        case Opcode::V_MIN_F32:
+            translator.V_MIN_F32(inst);
+            break;
+        case Opcode::V_MIN3_F32:
+            translator.V_MIN3_F32(inst);
+            break;
         case Opcode::S_NOP:
+        case Opcode::S_AND_B64:
         case Opcode::S_CBRANCH_EXECZ:
         case Opcode::S_CBRANCH_SCC0:
+        case Opcode::S_CBRANCH_SCC1:
+        case Opcode::S_BRANCH:
         case Opcode::S_MOV_B64:
         case Opcode::S_WQM_B64:
         case Opcode::V_INTERP_P1_F32:
diff --git a/src/shader_recompiler/frontend/translate/translate.h b/src/shader_recompiler/frontend/translate/translate.h
index 6e50e8fb..e721dad5 100644
--- a/src/shader_recompiler/frontend/translate/translate.h
+++ b/src/shader_recompiler/frontend/translate/translate.h
@@ -68,6 +68,12 @@ public:
     void V_CMP_F32(ConditionOp op, const GcnInst& inst);
     void V_MAX_F32(const GcnInst& inst);
     void V_RSQ_F32(const GcnInst& inst);
+    void V_SIN_F32(const GcnInst& inst);
+    void V_LOG_F32(const GcnInst& inst);
+    void V_EXP_F32(const GcnInst& inst);
+    void V_SQRT_F32(const GcnInst& inst);
+    void V_MIN_F32(const GcnInst& inst);
+    void V_MIN3_F32(const GcnInst& inst);
 
     // Vector Memory
     void BUFFER_LOAD_FORMAT(u32 num_dwords, bool is_typed, const GcnInst& inst);
diff --git a/src/shader_recompiler/frontend/translate/vector_alu.cpp b/src/shader_recompiler/frontend/translate/vector_alu.cpp
index 44394013..81366117 100644
--- a/src/shader_recompiler/frontend/translate/vector_alu.cpp
+++ b/src/shader_recompiler/frontend/translate/vector_alu.cpp
@@ -26,7 +26,7 @@ void Translator::V_CVT_PKRTZ_F16_F32(const GcnInst& inst) {
 
 void Translator::V_MUL_F32(const GcnInst& inst) {
     const IR::VectorReg dst_reg{inst.dst[0].code};
-    ir.SetVectorReg(dst_reg, ir.FPMul(GetSrc(inst.src[0]), GetSrc(inst.src[1])));
+    ir.SetVectorReg(dst_reg, ir.FPMul(GetSrc(inst.src[0], true), GetSrc(inst.src[1], true)));
 }
 
 void Translator::V_CMP_EQ_U32(const GcnInst& inst) {
@@ -198,4 +198,37 @@ void Translator::V_RSQ_F32(const GcnInst& inst) {
     SetDst(inst.dst[0], ir.FPRecipSqrt(src0));
 }
 
+void Translator::V_SIN_F32(const GcnInst& inst) {
+    const IR::F32 src0{GetSrc(inst.src[0], true)};
+    SetDst(inst.dst[0], ir.FPSin(src0));
+}
+
+void Translator::V_LOG_F32(const GcnInst& inst) {
+    const IR::F32 src0{GetSrc(inst.src[0], true)};
+    SetDst(inst.dst[0], ir.FPLog2(src0));
+}
+
+void Translator::V_EXP_F32(const GcnInst& inst) {
+    const IR::F32 src0{GetSrc(inst.src[0], true)};
+    SetDst(inst.dst[0], ir.FPExp2(src0));
+}
+
+void Translator::V_SQRT_F32(const GcnInst& inst) {
+    const IR::F32 src0{GetSrc(inst.src[0], true)};
+    SetDst(inst.dst[0], ir.FPSqrt(src0));
+}
+
+void Translator::V_MIN_F32(const GcnInst& inst) {
+    const IR::F32 src0{GetSrc(inst.src[0], true)};
+    const IR::F32 src1{GetSrc(inst.src[1], true)};
+    SetDst(inst.dst[0], ir.FPMin(src0, src1));
+}
+
+void Translator::V_MIN3_F32(const GcnInst& inst) {
+    const IR::F32 src0{GetSrc(inst.src[0], true)};
+    const IR::F32 src1{GetSrc(inst.src[1], true)};
+    const IR::F32 src2{GetSrc(inst.src[2], true)};
+    SetDst(inst.dst[0], ir.FPMin(src0, ir.FPMin(src1, src2)));
+}
+
 } // namespace Shader::Gcn
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
index 17ed225b..ddc67d8e 100644
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
@@ -42,7 +42,7 @@ struct GraphicsPipelineKey {
     std::array<vk::ColorComponentFlags, Liverpool::NumColorBuffers> write_masks;
 
     bool operator==(const GraphicsPipelineKey& key) const noexcept {
-        return std::memcmp(this, &key, sizeof(GraphicsPipelineKey)) == 0;
+        return std::memcmp(this, &key, sizeof(key)) == 0;
     }
 };
 static_assert(std::has_unique_object_representations_v<GraphicsPipelineKey>);
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index 1ddfa2fa..66ff9403 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -93,6 +93,7 @@ void PipelineCache::RefreshGraphicsKey() {
     key.stencil_ref_back = regs.stencil_ref_back;
     key.prim_type = regs.primitive_type;
     key.polygon_mode = regs.polygon_control.PolyMode();
+    key.cull_mode = regs.polygon_control.CullingMode();
 
     const auto& db = regs.depth_buffer;
     key.depth_format = key.depth.depth_enable