video_core: Moar shader instruction

This commit is contained in:
raphaelthegreat 2024-05-30 18:07:36 +03:00
parent ff21750815
commit dd91456b48
11 changed files with 127 additions and 20 deletions

View file

@ -207,6 +207,7 @@ void LibKernel_Register(Core::Loader::SymbolsResolver* sym) {
LIB_FUNCTION("pO96TwzOm5E", "libkernel", 1, "libkernel", 1, 1, sceKernelGetDirectMemorySize);
LIB_FUNCTION("L-Q3LEjIbgA", "libkernel", 1, "libkernel", 1, 1, sceKernelMapDirectMemory);
LIB_FUNCTION("WFcfL2lzido", "libkernel", 1, "libkernel", 1, 1, sceKernelQueryMemoryProtection);
LIB_FUNCTION("BHouLQzh0X0", "libkernel", 1, "libkernel", 1, 1, sceKernelDirectMemoryQuery);
LIB_FUNCTION("MBuItvba6z8", "libkernel", 1, "libkernel", 1, 1, sceKernelReleaseDirectMemory);
LIB_FUNCTION("cQke9UuBQOk", "libkernel", 1, "libkernel", 1, 1, sceKernelMunmap);
LIB_FUNCTION("mL8NDH86iQI", "libkernel", 1, "libkernel", 1, 1, sceKernelMapNamedFlexibleMemory);

View file

@ -18,11 +18,6 @@ u64 PS4_SYSV_ABI sceKernelGetDirectMemorySize() {
int PS4_SYSV_ABI sceKernelAllocateDirectMemory(s64 searchStart, s64 searchEnd, u64 len,
u64 alignment, int memoryType, s64* physAddrOut) {
LOG_INFO(Kernel_Vmm,
"searchStart = {:#x}, searchEnd = {:#x}, len = {:#x}, alignment = {:#x}, memoryType = "
"{:#x}",
searchStart, searchEnd, len, alignment, memoryType);
if (searchStart < 0 || searchEnd <= searchStart) {
LOG_ERROR(Kernel_Vmm, "Provided address range is invalid!");
return SCE_KERNEL_ERROR_EINVAL;
@ -44,7 +39,12 @@ int PS4_SYSV_ABI sceKernelAllocateDirectMemory(s64 searchStart, s64 searchEnd, u
auto* memory = Core::Memory::Instance();
PAddr phys_addr = memory->Allocate(searchStart, searchEnd, len, alignment, memoryType);
*physAddrOut = static_cast<s64>(phys_addr);
LOG_INFO(Kernel_Vmm, "physAddrOut = {:#x}", phys_addr);
LOG_INFO(Kernel_Vmm,
"searchStart = {:#x}, searchEnd = {:#x}, len = {:#x}, "
"alignment = {:#x}, memoryType = {:#x}, physAddrOut = {:#x}",
searchStart, searchEnd, len, alignment, memoryType, phys_addr);
return SCE_OK;
}
@ -115,8 +115,16 @@ s32 PS4_SYSV_ABI sceKernelMapFlexibleMemory(void** addr_in_out, std::size_t len,
}
int PS4_SYSV_ABI sceKernelQueryMemoryProtection(void* addr, void** start, void** end, u32* prot) {
LOG_WARNING(Kernel_Vmm, "called");
auto* memory = Core::Memory::Instance();
return memory->QueryProtection(std::bit_cast<VAddr>(addr), start, end, prot);
}
int PS4_SYSV_ABI sceKernelDirectMemoryQuery(u64 offset, int flags, OrbisQueryInfo* query_info,
size_t infoSize) {
LOG_WARNING(Kernel_Vmm, "called");
auto* memory = Core::Memory::Instance();
return memory->DirectMemoryQuery(offset, flags == 1, query_info);
}
} // namespace Libraries::Kernel

View file

@ -30,6 +30,12 @@ enum MemoryProtection : u32 {
SCE_KERNEL_PROT_GPU_RW = 0x30 // Permit reads/writes from the GPU
};
struct OrbisQueryInfo {
uintptr_t start;
uintptr_t end;
int memoryType;
};
u64 PS4_SYSV_ABI sceKernelGetDirectMemorySize();
int PS4_SYSV_ABI sceKernelAllocateDirectMemory(s64 searchStart, s64 searchEnd, u64 len,
u64 alignment, int memoryType, s64* physAddrOut);
@ -41,4 +47,7 @@ s32 PS4_SYSV_ABI sceKernelMapFlexibleMemory(void** addr_in_out, std::size_t len,
int flags);
int PS4_SYSV_ABI sceKernelQueryMemoryProtection(void* addr, void** start, void** end, u32* prot);
int PS4_SYSV_ABI sceKernelDirectMemoryQuery(u64 offset, int flags, OrbisQueryInfo* query_info,
size_t infoSize);
} // namespace Libraries::Kernel

View file

@ -6,6 +6,7 @@
#include "common/assert.h"
#include "common/scope_exit.h"
#include "core/libraries/error_codes.h"
#include "core/libraries/kernel/memory_management.h"
#include "core/memory.h"
#include "video_core/renderer_vulkan/vk_instance.h"
@ -80,7 +81,7 @@ int MemoryManager::MapMemory(void** out_addr, VAddr virtual_addr, size_t size, M
if (True(flags & MemoryMapFlags::Fixed) && True(flags & MemoryMapFlags::NoOverwrite)) {
// This should return SCE_KERNEL_ERROR_ENOMEM but shouldn't normally happen.
const auto& vma = FindVMA(mapped_addr)->second;
const u32 remaining_size = vma.base + vma.size - mapped_addr;
const size_t remaining_size = vma.base + vma.size - mapped_addr;
ASSERT_MSG(vma.type == VMAType::Free && remaining_size >= size);
}
@ -131,7 +132,22 @@ int MemoryManager::QueryProtection(VAddr addr, void** start, void** end, u32* pr
*start = reinterpret_cast<void*>(vma.base);
*end = reinterpret_cast<void*>(vma.base + vma.size);
*prot = static_cast<u32>(vma.prot);
return SCE_OK;
return ORBIS_OK;
}
int MemoryManager::DirectMemoryQuery(PAddr addr, bool find_next,
Libraries::Kernel::OrbisQueryInfo* out_info) {
const auto it = std::ranges::find_if(allocations, [&](const DirectMemoryArea& alloc) {
return alloc.base <= addr && addr < alloc.base + alloc.size;
});
if (it == allocations.end()) {
return SCE_KERNEL_ERROR_EACCES;
}
out_info->start = it->base;
out_info->end = it->base + it->size;
out_info->memoryType = it->memory_type;
return ORBIS_OK;
}
std::pair<vk::Buffer, size_t> MemoryManager::GetVulkanBuffer(VAddr addr) {
@ -146,7 +162,8 @@ VirtualMemoryArea& MemoryManager::AddMapping(VAddr virtual_addr, size_t size) {
ASSERT_MSG(vma_handle != vma_map.end(), "Virtual address not in vm_map");
const VirtualMemoryArea& vma = vma_handle->second;
ASSERT_MSG(vma.type == VMAType::Free, "Adding a mapping to already mapped region");
ASSERT_MSG(vma.type == VMAType::Free && vma.base <= virtual_addr,
"Adding a mapping to already mapped region");
const VAddr start_in_vma = virtual_addr - vma.base;
const VAddr end_in_vma = start_in_vma + size;
@ -164,7 +181,7 @@ VirtualMemoryArea& MemoryManager::AddMapping(VAddr virtual_addr, size_t size) {
return vma_handle->second;
}
MemoryManager::VMAHandle MemoryManager::Split(VMAHandle vma_handle, u32 offset_in_vma) {
MemoryManager::VMAHandle MemoryManager::Split(VMAHandle vma_handle, size_t offset_in_vma) {
auto& old_vma = vma_handle->second;
ASSERT(offset_in_vma < old_vma.size && offset_in_vma > 0);
@ -199,6 +216,7 @@ MemoryManager::VMAHandle MemoryManager::MergeAdjacent(VMAHandle iter) {
}
void MemoryManager::MapVulkanMemory(VAddr addr, size_t size) {
return;
const vk::Device device = instance->GetDevice();
const auto memory_props = instance->GetPhysicalDevice().getMemoryProperties();
void* host_pointer = reinterpret_cast<void*>(addr);
@ -270,6 +288,7 @@ void MemoryManager::MapVulkanMemory(VAddr addr, size_t size) {
}
void MemoryManager::UnmapVulkanMemory(VAddr addr, size_t size) {
return;
const auto it = mapped_memories.find(addr);
ASSERT(it != mapped_memories.end() && it->second.buffer_size == size);
mapped_memories.erase(it);

View file

@ -17,6 +17,10 @@ namespace Vulkan {
class Instance;
}
namespace Libraries::Kernel {
struct OrbisQueryInfo;
}
namespace Core {
enum class MemoryProt : u32 {
@ -77,12 +81,12 @@ struct VirtualMemoryArea {
}
};
constexpr VAddr SYSTEM_RESERVED = 0x800000000u;
constexpr VAddr CODE_BASE_OFFSET = 0x100000000u;
constexpr VAddr SYSTEM_MANAGED_MIN = 0x0000040000u;
constexpr VAddr SYSTEM_MANAGED_MAX = 0x07FFFFBFFFu;
constexpr VAddr USER_MIN = 0x1000000000u;
constexpr VAddr USER_MAX = 0xFBFFFFFFFFu;
constexpr VAddr SYSTEM_RESERVED = 0x800000000ULL;
constexpr VAddr CODE_BASE_OFFSET = 0x100000000ULL;
constexpr VAddr SYSTEM_MANAGED_MIN = 0x0000040000ULL;
constexpr VAddr SYSTEM_MANAGED_MAX = 0x07FFFFBFFFULL;
constexpr VAddr USER_MIN = 0x1000000000ULL;
constexpr VAddr USER_MAX = 0xFBFFFFFFFFULL;
class MemoryManager {
using VMAMap = std::map<VAddr, VirtualMemoryArea>;
@ -109,6 +113,8 @@ public:
int QueryProtection(VAddr addr, void** start, void** end, u32* prot);
int DirectMemoryQuery(PAddr addr, bool find_next, Libraries::Kernel::OrbisQueryInfo* out_info);
std::pair<vk::Buffer, size_t> GetVulkanBuffer(VAddr addr);
private:
@ -123,7 +129,7 @@ private:
VirtualMemoryArea& AddMapping(VAddr virtual_addr, size_t size);
VMAHandle Split(VMAHandle vma_handle, u32 offset_in_vma);
VMAHandle Split(VMAHandle vma_handle, size_t offset_in_vma);
VMAHandle MergeAdjacent(VMAHandle iter);

View file

@ -32,7 +32,7 @@ private:
};
std::mutex m_mutex;
bool m_connected = false;
bool m_connected = true;
State m_last_state;
int m_connected_count = 0;
u32 m_states_num = 0;

View file

@ -111,6 +111,9 @@ IR::U32F32 Translator::GetSrc(const InstOperand& operand, bool force_flt) {
case OperandField::ConstFloatNeg_1_0:
value = ir.Imm32(-1.0f);
break;
case OperandField::ConstFloatNeg_2_0:
value = ir.Imm32(-2.0f);
break;
case OperandField::VccLo:
value = ir.GetVccLo();
break;
@ -327,9 +330,30 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
case Opcode::S_ANDN2_B64:
translator.S_ANDN2_B64(inst);
break;
case Opcode::V_SIN_F32:
translator.V_SIN_F32(inst);
break;
case Opcode::V_LOG_F32:
translator.V_LOG_F32(inst);
break;
case Opcode::V_EXP_F32:
translator.V_EXP_F32(inst);
break;
case Opcode::V_SQRT_F32:
translator.V_SQRT_F32(inst);
break;
case Opcode::V_MIN_F32:
translator.V_MIN_F32(inst);
break;
case Opcode::V_MIN3_F32:
translator.V_MIN3_F32(inst);
break;
case Opcode::S_NOP:
case Opcode::S_AND_B64:
case Opcode::S_CBRANCH_EXECZ:
case Opcode::S_CBRANCH_SCC0:
case Opcode::S_CBRANCH_SCC1:
case Opcode::S_BRANCH:
case Opcode::S_MOV_B64:
case Opcode::S_WQM_B64:
case Opcode::V_INTERP_P1_F32:

View file

@ -68,6 +68,12 @@ public:
void V_CMP_F32(ConditionOp op, const GcnInst& inst);
void V_MAX_F32(const GcnInst& inst);
void V_RSQ_F32(const GcnInst& inst);
void V_SIN_F32(const GcnInst& inst);
void V_LOG_F32(const GcnInst& inst);
void V_EXP_F32(const GcnInst& inst);
void V_SQRT_F32(const GcnInst& inst);
void V_MIN_F32(const GcnInst& inst);
void V_MIN3_F32(const GcnInst& inst);
// Vector Memory
void BUFFER_LOAD_FORMAT(u32 num_dwords, bool is_typed, const GcnInst& inst);

View file

@ -26,7 +26,7 @@ void Translator::V_CVT_PKRTZ_F16_F32(const GcnInst& inst) {
void Translator::V_MUL_F32(const GcnInst& inst) {
const IR::VectorReg dst_reg{inst.dst[0].code};
ir.SetVectorReg(dst_reg, ir.FPMul(GetSrc(inst.src[0]), GetSrc(inst.src[1])));
ir.SetVectorReg(dst_reg, ir.FPMul(GetSrc(inst.src[0], true), GetSrc(inst.src[1], true)));
}
void Translator::V_CMP_EQ_U32(const GcnInst& inst) {
@ -198,4 +198,37 @@ void Translator::V_RSQ_F32(const GcnInst& inst) {
SetDst(inst.dst[0], ir.FPRecipSqrt(src0));
}
void Translator::V_SIN_F32(const GcnInst& inst) {
const IR::F32 src0{GetSrc(inst.src[0], true)};
SetDst(inst.dst[0], ir.FPSin(src0));
}
void Translator::V_LOG_F32(const GcnInst& inst) {
const IR::F32 src0{GetSrc(inst.src[0], true)};
SetDst(inst.dst[0], ir.FPLog2(src0));
}
void Translator::V_EXP_F32(const GcnInst& inst) {
const IR::F32 src0{GetSrc(inst.src[0], true)};
SetDst(inst.dst[0], ir.FPExp2(src0));
}
void Translator::V_SQRT_F32(const GcnInst& inst) {
const IR::F32 src0{GetSrc(inst.src[0], true)};
SetDst(inst.dst[0], ir.FPSqrt(src0));
}
void Translator::V_MIN_F32(const GcnInst& inst) {
const IR::F32 src0{GetSrc(inst.src[0], true)};
const IR::F32 src1{GetSrc(inst.src[1], true)};
SetDst(inst.dst[0], ir.FPMin(src0, src1));
}
void Translator::V_MIN3_F32(const GcnInst& inst) {
const IR::F32 src0{GetSrc(inst.src[0], true)};
const IR::F32 src1{GetSrc(inst.src[1], true)};
const IR::F32 src2{GetSrc(inst.src[2], true)};
SetDst(inst.dst[0], ir.FPMin(src0, ir.FPMin(src1, src2)));
}
} // namespace Shader::Gcn

View file

@ -42,7 +42,7 @@ struct GraphicsPipelineKey {
std::array<vk::ColorComponentFlags, Liverpool::NumColorBuffers> write_masks;
bool operator==(const GraphicsPipelineKey& key) const noexcept {
return std::memcmp(this, &key, sizeof(GraphicsPipelineKey)) == 0;
return std::memcmp(this, &key, sizeof(key)) == 0;
}
};
static_assert(std::has_unique_object_representations_v<GraphicsPipelineKey>);

View file

@ -93,6 +93,7 @@ void PipelineCache::RefreshGraphicsKey() {
key.stencil_ref_back = regs.stencil_ref_back;
key.prim_type = regs.primitive_type;
key.polygon_mode = regs.polygon_control.PolyMode();
key.cull_mode = regs.polygon_control.CullingMode();
const auto& db = regs.depth_buffer;
key.depth_format = key.depth.depth_enable