mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2024-12-28 02:26:07 +00:00
video_core: Moar shader instruction
This commit is contained in:
parent
ff21750815
commit
dd91456b48
|
@ -207,6 +207,7 @@ void LibKernel_Register(Core::Loader::SymbolsResolver* sym) {
|
|||
LIB_FUNCTION("pO96TwzOm5E", "libkernel", 1, "libkernel", 1, 1, sceKernelGetDirectMemorySize);
|
||||
LIB_FUNCTION("L-Q3LEjIbgA", "libkernel", 1, "libkernel", 1, 1, sceKernelMapDirectMemory);
|
||||
LIB_FUNCTION("WFcfL2lzido", "libkernel", 1, "libkernel", 1, 1, sceKernelQueryMemoryProtection);
|
||||
LIB_FUNCTION("BHouLQzh0X0", "libkernel", 1, "libkernel", 1, 1, sceKernelDirectMemoryQuery);
|
||||
LIB_FUNCTION("MBuItvba6z8", "libkernel", 1, "libkernel", 1, 1, sceKernelReleaseDirectMemory);
|
||||
LIB_FUNCTION("cQke9UuBQOk", "libkernel", 1, "libkernel", 1, 1, sceKernelMunmap);
|
||||
LIB_FUNCTION("mL8NDH86iQI", "libkernel", 1, "libkernel", 1, 1, sceKernelMapNamedFlexibleMemory);
|
||||
|
|
|
@ -18,11 +18,6 @@ u64 PS4_SYSV_ABI sceKernelGetDirectMemorySize() {
|
|||
|
||||
int PS4_SYSV_ABI sceKernelAllocateDirectMemory(s64 searchStart, s64 searchEnd, u64 len,
|
||||
u64 alignment, int memoryType, s64* physAddrOut) {
|
||||
LOG_INFO(Kernel_Vmm,
|
||||
"searchStart = {:#x}, searchEnd = {:#x}, len = {:#x}, alignment = {:#x}, memoryType = "
|
||||
"{:#x}",
|
||||
searchStart, searchEnd, len, alignment, memoryType);
|
||||
|
||||
if (searchStart < 0 || searchEnd <= searchStart) {
|
||||
LOG_ERROR(Kernel_Vmm, "Provided address range is invalid!");
|
||||
return SCE_KERNEL_ERROR_EINVAL;
|
||||
|
@ -44,7 +39,12 @@ int PS4_SYSV_ABI sceKernelAllocateDirectMemory(s64 searchStart, s64 searchEnd, u
|
|||
auto* memory = Core::Memory::Instance();
|
||||
PAddr phys_addr = memory->Allocate(searchStart, searchEnd, len, alignment, memoryType);
|
||||
*physAddrOut = static_cast<s64>(phys_addr);
|
||||
LOG_INFO(Kernel_Vmm, "physAddrOut = {:#x}", phys_addr);
|
||||
|
||||
LOG_INFO(Kernel_Vmm,
|
||||
"searchStart = {:#x}, searchEnd = {:#x}, len = {:#x}, "
|
||||
"alignment = {:#x}, memoryType = {:#x}, physAddrOut = {:#x}",
|
||||
searchStart, searchEnd, len, alignment, memoryType, phys_addr);
|
||||
|
||||
return SCE_OK;
|
||||
}
|
||||
|
||||
|
@ -115,8 +115,16 @@ s32 PS4_SYSV_ABI sceKernelMapFlexibleMemory(void** addr_in_out, std::size_t len,
|
|||
}
|
||||
|
||||
int PS4_SYSV_ABI sceKernelQueryMemoryProtection(void* addr, void** start, void** end, u32* prot) {
|
||||
LOG_WARNING(Kernel_Vmm, "called");
|
||||
auto* memory = Core::Memory::Instance();
|
||||
return memory->QueryProtection(std::bit_cast<VAddr>(addr), start, end, prot);
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceKernelDirectMemoryQuery(u64 offset, int flags, OrbisQueryInfo* query_info,
|
||||
size_t infoSize) {
|
||||
LOG_WARNING(Kernel_Vmm, "called");
|
||||
auto* memory = Core::Memory::Instance();
|
||||
return memory->DirectMemoryQuery(offset, flags == 1, query_info);
|
||||
}
|
||||
|
||||
} // namespace Libraries::Kernel
|
||||
|
|
|
@ -30,6 +30,12 @@ enum MemoryProtection : u32 {
|
|||
SCE_KERNEL_PROT_GPU_RW = 0x30 // Permit reads/writes from the GPU
|
||||
};
|
||||
|
||||
struct OrbisQueryInfo {
|
||||
uintptr_t start;
|
||||
uintptr_t end;
|
||||
int memoryType;
|
||||
};
|
||||
|
||||
u64 PS4_SYSV_ABI sceKernelGetDirectMemorySize();
|
||||
int PS4_SYSV_ABI sceKernelAllocateDirectMemory(s64 searchStart, s64 searchEnd, u64 len,
|
||||
u64 alignment, int memoryType, s64* physAddrOut);
|
||||
|
@ -41,4 +47,7 @@ s32 PS4_SYSV_ABI sceKernelMapFlexibleMemory(void** addr_in_out, std::size_t len,
|
|||
int flags);
|
||||
int PS4_SYSV_ABI sceKernelQueryMemoryProtection(void* addr, void** start, void** end, u32* prot);
|
||||
|
||||
int PS4_SYSV_ABI sceKernelDirectMemoryQuery(u64 offset, int flags, OrbisQueryInfo* query_info,
|
||||
size_t infoSize);
|
||||
|
||||
} // namespace Libraries::Kernel
|
||||
|
|
|
@ -6,6 +6,7 @@
|
|||
#include "common/assert.h"
|
||||
#include "common/scope_exit.h"
|
||||
#include "core/libraries/error_codes.h"
|
||||
#include "core/libraries/kernel/memory_management.h"
|
||||
#include "core/memory.h"
|
||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||
|
||||
|
@ -80,7 +81,7 @@ int MemoryManager::MapMemory(void** out_addr, VAddr virtual_addr, size_t size, M
|
|||
if (True(flags & MemoryMapFlags::Fixed) && True(flags & MemoryMapFlags::NoOverwrite)) {
|
||||
// This should return SCE_KERNEL_ERROR_ENOMEM but shouldn't normally happen.
|
||||
const auto& vma = FindVMA(mapped_addr)->second;
|
||||
const u32 remaining_size = vma.base + vma.size - mapped_addr;
|
||||
const size_t remaining_size = vma.base + vma.size - mapped_addr;
|
||||
ASSERT_MSG(vma.type == VMAType::Free && remaining_size >= size);
|
||||
}
|
||||
|
||||
|
@ -131,7 +132,22 @@ int MemoryManager::QueryProtection(VAddr addr, void** start, void** end, u32* pr
|
|||
*start = reinterpret_cast<void*>(vma.base);
|
||||
*end = reinterpret_cast<void*>(vma.base + vma.size);
|
||||
*prot = static_cast<u32>(vma.prot);
|
||||
return SCE_OK;
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
int MemoryManager::DirectMemoryQuery(PAddr addr, bool find_next,
|
||||
Libraries::Kernel::OrbisQueryInfo* out_info) {
|
||||
const auto it = std::ranges::find_if(allocations, [&](const DirectMemoryArea& alloc) {
|
||||
return alloc.base <= addr && addr < alloc.base + alloc.size;
|
||||
});
|
||||
if (it == allocations.end()) {
|
||||
return SCE_KERNEL_ERROR_EACCES;
|
||||
}
|
||||
|
||||
out_info->start = it->base;
|
||||
out_info->end = it->base + it->size;
|
||||
out_info->memoryType = it->memory_type;
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
std::pair<vk::Buffer, size_t> MemoryManager::GetVulkanBuffer(VAddr addr) {
|
||||
|
@ -146,7 +162,8 @@ VirtualMemoryArea& MemoryManager::AddMapping(VAddr virtual_addr, size_t size) {
|
|||
ASSERT_MSG(vma_handle != vma_map.end(), "Virtual address not in vm_map");
|
||||
|
||||
const VirtualMemoryArea& vma = vma_handle->second;
|
||||
ASSERT_MSG(vma.type == VMAType::Free, "Adding a mapping to already mapped region");
|
||||
ASSERT_MSG(vma.type == VMAType::Free && vma.base <= virtual_addr,
|
||||
"Adding a mapping to already mapped region");
|
||||
|
||||
const VAddr start_in_vma = virtual_addr - vma.base;
|
||||
const VAddr end_in_vma = start_in_vma + size;
|
||||
|
@ -164,7 +181,7 @@ VirtualMemoryArea& MemoryManager::AddMapping(VAddr virtual_addr, size_t size) {
|
|||
return vma_handle->second;
|
||||
}
|
||||
|
||||
MemoryManager::VMAHandle MemoryManager::Split(VMAHandle vma_handle, u32 offset_in_vma) {
|
||||
MemoryManager::VMAHandle MemoryManager::Split(VMAHandle vma_handle, size_t offset_in_vma) {
|
||||
auto& old_vma = vma_handle->second;
|
||||
ASSERT(offset_in_vma < old_vma.size && offset_in_vma > 0);
|
||||
|
||||
|
@ -199,6 +216,7 @@ MemoryManager::VMAHandle MemoryManager::MergeAdjacent(VMAHandle iter) {
|
|||
}
|
||||
|
||||
void MemoryManager::MapVulkanMemory(VAddr addr, size_t size) {
|
||||
return;
|
||||
const vk::Device device = instance->GetDevice();
|
||||
const auto memory_props = instance->GetPhysicalDevice().getMemoryProperties();
|
||||
void* host_pointer = reinterpret_cast<void*>(addr);
|
||||
|
@ -270,6 +288,7 @@ void MemoryManager::MapVulkanMemory(VAddr addr, size_t size) {
|
|||
}
|
||||
|
||||
void MemoryManager::UnmapVulkanMemory(VAddr addr, size_t size) {
|
||||
return;
|
||||
const auto it = mapped_memories.find(addr);
|
||||
ASSERT(it != mapped_memories.end() && it->second.buffer_size == size);
|
||||
mapped_memories.erase(it);
|
||||
|
|
|
@ -17,6 +17,10 @@ namespace Vulkan {
|
|||
class Instance;
|
||||
}
|
||||
|
||||
namespace Libraries::Kernel {
|
||||
struct OrbisQueryInfo;
|
||||
}
|
||||
|
||||
namespace Core {
|
||||
|
||||
enum class MemoryProt : u32 {
|
||||
|
@ -77,12 +81,12 @@ struct VirtualMemoryArea {
|
|||
}
|
||||
};
|
||||
|
||||
constexpr VAddr SYSTEM_RESERVED = 0x800000000u;
|
||||
constexpr VAddr CODE_BASE_OFFSET = 0x100000000u;
|
||||
constexpr VAddr SYSTEM_MANAGED_MIN = 0x0000040000u;
|
||||
constexpr VAddr SYSTEM_MANAGED_MAX = 0x07FFFFBFFFu;
|
||||
constexpr VAddr USER_MIN = 0x1000000000u;
|
||||
constexpr VAddr USER_MAX = 0xFBFFFFFFFFu;
|
||||
constexpr VAddr SYSTEM_RESERVED = 0x800000000ULL;
|
||||
constexpr VAddr CODE_BASE_OFFSET = 0x100000000ULL;
|
||||
constexpr VAddr SYSTEM_MANAGED_MIN = 0x0000040000ULL;
|
||||
constexpr VAddr SYSTEM_MANAGED_MAX = 0x07FFFFBFFFULL;
|
||||
constexpr VAddr USER_MIN = 0x1000000000ULL;
|
||||
constexpr VAddr USER_MAX = 0xFBFFFFFFFFULL;
|
||||
|
||||
class MemoryManager {
|
||||
using VMAMap = std::map<VAddr, VirtualMemoryArea>;
|
||||
|
@ -109,6 +113,8 @@ public:
|
|||
|
||||
int QueryProtection(VAddr addr, void** start, void** end, u32* prot);
|
||||
|
||||
int DirectMemoryQuery(PAddr addr, bool find_next, Libraries::Kernel::OrbisQueryInfo* out_info);
|
||||
|
||||
std::pair<vk::Buffer, size_t> GetVulkanBuffer(VAddr addr);
|
||||
|
||||
private:
|
||||
|
@ -123,7 +129,7 @@ private:
|
|||
|
||||
VirtualMemoryArea& AddMapping(VAddr virtual_addr, size_t size);
|
||||
|
||||
VMAHandle Split(VMAHandle vma_handle, u32 offset_in_vma);
|
||||
VMAHandle Split(VMAHandle vma_handle, size_t offset_in_vma);
|
||||
|
||||
VMAHandle MergeAdjacent(VMAHandle iter);
|
||||
|
||||
|
|
|
@ -32,7 +32,7 @@ private:
|
|||
};
|
||||
|
||||
std::mutex m_mutex;
|
||||
bool m_connected = false;
|
||||
bool m_connected = true;
|
||||
State m_last_state;
|
||||
int m_connected_count = 0;
|
||||
u32 m_states_num = 0;
|
||||
|
|
|
@ -111,6 +111,9 @@ IR::U32F32 Translator::GetSrc(const InstOperand& operand, bool force_flt) {
|
|||
case OperandField::ConstFloatNeg_1_0:
|
||||
value = ir.Imm32(-1.0f);
|
||||
break;
|
||||
case OperandField::ConstFloatNeg_2_0:
|
||||
value = ir.Imm32(-2.0f);
|
||||
break;
|
||||
case OperandField::VccLo:
|
||||
value = ir.GetVccLo();
|
||||
break;
|
||||
|
@ -327,9 +330,30 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
|
|||
case Opcode::S_ANDN2_B64:
|
||||
translator.S_ANDN2_B64(inst);
|
||||
break;
|
||||
case Opcode::V_SIN_F32:
|
||||
translator.V_SIN_F32(inst);
|
||||
break;
|
||||
case Opcode::V_LOG_F32:
|
||||
translator.V_LOG_F32(inst);
|
||||
break;
|
||||
case Opcode::V_EXP_F32:
|
||||
translator.V_EXP_F32(inst);
|
||||
break;
|
||||
case Opcode::V_SQRT_F32:
|
||||
translator.V_SQRT_F32(inst);
|
||||
break;
|
||||
case Opcode::V_MIN_F32:
|
||||
translator.V_MIN_F32(inst);
|
||||
break;
|
||||
case Opcode::V_MIN3_F32:
|
||||
translator.V_MIN3_F32(inst);
|
||||
break;
|
||||
case Opcode::S_NOP:
|
||||
case Opcode::S_AND_B64:
|
||||
case Opcode::S_CBRANCH_EXECZ:
|
||||
case Opcode::S_CBRANCH_SCC0:
|
||||
case Opcode::S_CBRANCH_SCC1:
|
||||
case Opcode::S_BRANCH:
|
||||
case Opcode::S_MOV_B64:
|
||||
case Opcode::S_WQM_B64:
|
||||
case Opcode::V_INTERP_P1_F32:
|
||||
|
|
|
@ -68,6 +68,12 @@ public:
|
|||
void V_CMP_F32(ConditionOp op, const GcnInst& inst);
|
||||
void V_MAX_F32(const GcnInst& inst);
|
||||
void V_RSQ_F32(const GcnInst& inst);
|
||||
void V_SIN_F32(const GcnInst& inst);
|
||||
void V_LOG_F32(const GcnInst& inst);
|
||||
void V_EXP_F32(const GcnInst& inst);
|
||||
void V_SQRT_F32(const GcnInst& inst);
|
||||
void V_MIN_F32(const GcnInst& inst);
|
||||
void V_MIN3_F32(const GcnInst& inst);
|
||||
|
||||
// Vector Memory
|
||||
void BUFFER_LOAD_FORMAT(u32 num_dwords, bool is_typed, const GcnInst& inst);
|
||||
|
|
|
@ -26,7 +26,7 @@ void Translator::V_CVT_PKRTZ_F16_F32(const GcnInst& inst) {
|
|||
|
||||
void Translator::V_MUL_F32(const GcnInst& inst) {
|
||||
const IR::VectorReg dst_reg{inst.dst[0].code};
|
||||
ir.SetVectorReg(dst_reg, ir.FPMul(GetSrc(inst.src[0]), GetSrc(inst.src[1])));
|
||||
ir.SetVectorReg(dst_reg, ir.FPMul(GetSrc(inst.src[0], true), GetSrc(inst.src[1], true)));
|
||||
}
|
||||
|
||||
void Translator::V_CMP_EQ_U32(const GcnInst& inst) {
|
||||
|
@ -198,4 +198,37 @@ void Translator::V_RSQ_F32(const GcnInst& inst) {
|
|||
SetDst(inst.dst[0], ir.FPRecipSqrt(src0));
|
||||
}
|
||||
|
||||
void Translator::V_SIN_F32(const GcnInst& inst) {
|
||||
const IR::F32 src0{GetSrc(inst.src[0], true)};
|
||||
SetDst(inst.dst[0], ir.FPSin(src0));
|
||||
}
|
||||
|
||||
void Translator::V_LOG_F32(const GcnInst& inst) {
|
||||
const IR::F32 src0{GetSrc(inst.src[0], true)};
|
||||
SetDst(inst.dst[0], ir.FPLog2(src0));
|
||||
}
|
||||
|
||||
void Translator::V_EXP_F32(const GcnInst& inst) {
|
||||
const IR::F32 src0{GetSrc(inst.src[0], true)};
|
||||
SetDst(inst.dst[0], ir.FPExp2(src0));
|
||||
}
|
||||
|
||||
void Translator::V_SQRT_F32(const GcnInst& inst) {
|
||||
const IR::F32 src0{GetSrc(inst.src[0], true)};
|
||||
SetDst(inst.dst[0], ir.FPSqrt(src0));
|
||||
}
|
||||
|
||||
void Translator::V_MIN_F32(const GcnInst& inst) {
|
||||
const IR::F32 src0{GetSrc(inst.src[0], true)};
|
||||
const IR::F32 src1{GetSrc(inst.src[1], true)};
|
||||
SetDst(inst.dst[0], ir.FPMin(src0, src1));
|
||||
}
|
||||
|
||||
void Translator::V_MIN3_F32(const GcnInst& inst) {
|
||||
const IR::F32 src0{GetSrc(inst.src[0], true)};
|
||||
const IR::F32 src1{GetSrc(inst.src[1], true)};
|
||||
const IR::F32 src2{GetSrc(inst.src[2], true)};
|
||||
SetDst(inst.dst[0], ir.FPMin(src0, ir.FPMin(src1, src2)));
|
||||
}
|
||||
|
||||
} // namespace Shader::Gcn
|
||||
|
|
|
@ -42,7 +42,7 @@ struct GraphicsPipelineKey {
|
|||
std::array<vk::ColorComponentFlags, Liverpool::NumColorBuffers> write_masks;
|
||||
|
||||
bool operator==(const GraphicsPipelineKey& key) const noexcept {
|
||||
return std::memcmp(this, &key, sizeof(GraphicsPipelineKey)) == 0;
|
||||
return std::memcmp(this, &key, sizeof(key)) == 0;
|
||||
}
|
||||
};
|
||||
static_assert(std::has_unique_object_representations_v<GraphicsPipelineKey>);
|
||||
|
|
|
@ -93,6 +93,7 @@ void PipelineCache::RefreshGraphicsKey() {
|
|||
key.stencil_ref_back = regs.stencil_ref_back;
|
||||
key.prim_type = regs.primitive_type;
|
||||
key.polygon_mode = regs.polygon_control.PolyMode();
|
||||
key.cull_mode = regs.polygon_control.CullingMode();
|
||||
|
||||
const auto& db = regs.depth_buffer;
|
||||
key.depth_format = key.depth.depth_enable
|
||||
|
|
Loading…
Reference in a new issue