mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-01-21 05:51:39 +00:00
Various fixes
This commit is contained in:
parent
511595aca7
commit
ea2e4f7b5c
|
@ -66,7 +66,7 @@ int SDLAudio::AudioOutOpen(int type, u32 samples_num, u32 freq,
|
|||
port.sample_size = 4;
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE_MSG("Unknown format");
|
||||
UNREACHABLE_MSG("Unknown format {}", u32(format));
|
||||
}
|
||||
|
||||
for (int i = 0; i < port.channels_num; i++) {
|
||||
|
|
|
@ -234,7 +234,7 @@ s32 PS4_SYSV_ABI sceAudioOutOpen(UserService::OrbisUserServiceUserId user_id,
|
|||
"AudioOutOpen id = {} port_type = {} index = {} lenght= {} sample_rate = {} "
|
||||
"param_type = {}",
|
||||
user_id, GetAudioOutPort(port_type), index, length, sample_rate,
|
||||
GetAudioOutParam(param_type));
|
||||
GetAudioOutParam(param_type & 0xFF));
|
||||
if ((port_type < 0 || port_type > 4) && (port_type != 127)) {
|
||||
LOG_ERROR(Lib_AudioOut, "Invalid port type");
|
||||
return ORBIS_AUDIO_OUT_ERROR_INVALID_PORT_TYPE;
|
||||
|
@ -243,10 +243,6 @@ s32 PS4_SYSV_ABI sceAudioOutOpen(UserService::OrbisUserServiceUserId user_id,
|
|||
LOG_ERROR(Lib_AudioOut, "Invalid sample rate");
|
||||
return ORBIS_AUDIO_OUT_ERROR_INVALID_SAMPLE_FREQ;
|
||||
}
|
||||
if (param_type < 0 || param_type > 7) {
|
||||
LOG_ERROR(Lib_AudioOut, "Invalid format");
|
||||
return ORBIS_AUDIO_OUT_ERROR_INVALID_FORMAT;
|
||||
}
|
||||
if (length != 256 && length != 512 && length != 768 && length != 1024 && length != 1280 &&
|
||||
length != 1536 && length != 1792 && length != 2048) {
|
||||
LOG_ERROR(Lib_AudioOut, "Invalid length");
|
||||
|
@ -255,7 +251,7 @@ s32 PS4_SYSV_ABI sceAudioOutOpen(UserService::OrbisUserServiceUserId user_id,
|
|||
if (index != 0) {
|
||||
LOG_ERROR(Lib_AudioOut, "index is not valid !=0 {}", index);
|
||||
}
|
||||
int result = audio->AudioOutOpen(port_type, length, sample_rate, param_type);
|
||||
int result = audio->AudioOutOpen(port_type, length, sample_rate, OrbisAudioOutParam(param_type & 0xFF));
|
||||
if (result == -1) {
|
||||
LOG_ERROR(Lib_AudioOut, "Audio ports are full");
|
||||
return ORBIS_AUDIO_OUT_ERROR_PORT_FULL;
|
||||
|
|
|
@ -28,7 +28,7 @@ int PS4_SYSV_ABI sceKernelAllocateDirectMemory(s64 searchStart, s64 searchEnd, u
|
|||
LOG_ERROR(Kernel_Vmm, "Provided address range is invalid!");
|
||||
return SCE_KERNEL_ERROR_EINVAL;
|
||||
}
|
||||
if ((alignment != 0 || Common::Is16KBAligned(alignment)) && !std::has_single_bit(alignment)) {
|
||||
if (alignment != 0 && !Common::Is16KBAligned(alignment)) {
|
||||
LOG_ERROR(Kernel_Vmm, "Alignment value is invalid!");
|
||||
return SCE_KERNEL_ERROR_EINVAL;
|
||||
}
|
||||
|
|
|
@ -10,7 +10,6 @@
|
|||
#include "core/libraries/error_codes.h"
|
||||
#include "core/libraries/kernel/thread_management.h"
|
||||
#include "core/libraries/libs.h"
|
||||
#include "core/tls.h"
|
||||
#include "core/linker.h"
|
||||
#ifdef _WIN64
|
||||
#include <windows.h>
|
||||
|
@ -516,7 +515,7 @@ int PS4_SYSV_ABI scePthreadMutexLock(ScePthreadMutex* mutex) {
|
|||
|
||||
int result = pthread_mutex_lock(&(*mutex)->pth_mutex);
|
||||
if (result != 0) {
|
||||
LOG_INFO(Kernel_Pthread, "name={}, result={}", (*mutex)->name, result);
|
||||
//LOG_INFO(Kernel_Pthread, "name={}, result={}", (*mutex)->name, result);
|
||||
}
|
||||
switch (result) {
|
||||
case 0:
|
||||
|
@ -539,7 +538,7 @@ int PS4_SYSV_ABI scePthreadMutexUnlock(ScePthreadMutex* mutex) {
|
|||
|
||||
int result = pthread_mutex_unlock(&(*mutex)->pth_mutex);
|
||||
if (result != 0) {
|
||||
LOG_INFO(Kernel_Pthread, "name={}, result={}", (*mutex)->name, result);
|
||||
//LOG_INFO(Kernel_Pthread, "name={}, result={}", (*mutex)->name, result);
|
||||
}
|
||||
switch (result) {
|
||||
case 0:
|
||||
|
@ -1122,6 +1121,34 @@ void* PS4_SYSV_ABI __tls_get_addr(TlsIndex* index) {
|
|||
return linker->TlsGetAddr(index->ti_module, index->ti_offset);
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI posix_sem_init(sem_t *sem, int pshared, unsigned int value) {
|
||||
return sem_init(sem, pshared, value);
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI posix_sem_wait(sem_t *sem) {
|
||||
return sem_wait(sem);
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI posix_sem_post(sem_t *sem) {
|
||||
return sem_post(sem);
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI posix_pthread_mutex_destroy(ScePthreadMutex* mutex) {
|
||||
// LOG_INFO(Kernel_Pthread, "posix pthread_mutex_init redirect to scePthreadMutexInit");
|
||||
int result = scePthreadMutexDestroy(mutex);
|
||||
if (result < 0) {
|
||||
int rt = result > SCE_KERNEL_ERROR_UNKNOWN && result <= SCE_KERNEL_ERROR_ESTOP
|
||||
? result + -SCE_KERNEL_ERROR_UNKNOWN
|
||||
: POSIX_EOTHER;
|
||||
return rt;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI posix_pthread_join(ScePthread thread, void** value_ptr) {
|
||||
return pthread_join(thread->pth, value_ptr);
|
||||
}
|
||||
|
||||
void pthreadSymbolsRegister(Core::Loader::SymbolsResolver* sym) {
|
||||
LIB_FUNCTION("4+h9EzwKF4I", "libkernel", 1, "libkernel", 1, 1, scePthreadAttrSetschedpolicy);
|
||||
LIB_FUNCTION("-Wreprtu0Qs", "libkernel", 1, "libkernel", 1, 1, scePthreadAttrSetdetachstate);
|
||||
|
@ -1173,9 +1200,14 @@ void pthreadSymbolsRegister(Core::Loader::SymbolsResolver* sym) {
|
|||
LIB_FUNCTION("dQHWEsJtoE4", "libScePosix", 1, "libkernel", 1, 1, pthread_mutexattr_init);
|
||||
LIB_FUNCTION("mDmgMOGVUqg", "libScePosix", 1, "libkernel", 1, 1, pthread_mutexattr_settype);
|
||||
LIB_FUNCTION("ttHNfU+qDBU", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_mutex_init);
|
||||
LIB_FUNCTION("ltCfaGr2JGE", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_mutex_destroy);
|
||||
LIB_FUNCTION("7H0iTOciTLo", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_mutex_lock);
|
||||
LIB_FUNCTION("2Z+PpY6CaJg", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_mutex_unlock);
|
||||
LIB_FUNCTION("mkx2fVhNMsg", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_cond_broadcast);
|
||||
LIB_FUNCTION("h9CcP3J0oVM", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_join);
|
||||
LIB_FUNCTION("pDuPEf3m4fI", "libScePosix", 1, "libkernel", 1, 1, posix_sem_init);
|
||||
LIB_FUNCTION("YCV5dGGBcCo", "libScePosix", 1, "libkernel", 1, 1, posix_sem_wait);
|
||||
LIB_FUNCTION("IKP8typ0QUk", "libScePosix", 1, "libkernel", 1, 1, posix_sem_post);
|
||||
|
||||
LIB_FUNCTION("QBi7HCK03hw", "libkernel", 1, "libkernel", 1, 1, sceKernelClockGettime);
|
||||
LIB_FUNCTION("lLMT9vJAck0", "libkernel", 1, "libkernel", 1, 1, clock_gettime);
|
||||
|
|
|
@ -8,6 +8,7 @@
|
|||
#include <string>
|
||||
#include <vector>
|
||||
#include <pthread.h>
|
||||
#include <semaphore.h>
|
||||
#include <sched.h>
|
||||
#include "common/types.h"
|
||||
|
||||
|
|
|
@ -57,24 +57,24 @@ void Linker::Execute() {
|
|||
}
|
||||
|
||||
// Calculate static TLS size.
|
||||
static constexpr size_t StOff = 0x80; // TODO: What is this offset?
|
||||
static_tls_size = std::ranges::fold_left(m_modules, StOff, [&](u32 size, auto& module) {
|
||||
const size_t new_size = size + module->tls.image_size;
|
||||
module->tls.distance_from_fs = new_size;
|
||||
return new_size;
|
||||
});
|
||||
|
||||
Common::SetCurrentThreadName("GAME_MainThread");
|
||||
Libraries::Kernel::pthreadInitSelfMainThread();
|
||||
|
||||
// Init primary thread TLS.
|
||||
InitTlsForThread(true);
|
||||
for (const auto& module : m_modules) {
|
||||
if (module->tls.image_size != 0) {
|
||||
module->tls.modid = ++max_tls_index;
|
||||
}
|
||||
static_tls_size += module->tls.image_size;
|
||||
module->tls.offset = static_tls_size;
|
||||
}
|
||||
|
||||
// Relocate all modules
|
||||
for (u32 i = 1; const auto& m : m_modules) {
|
||||
Relocate(i, m.get());
|
||||
for (const auto& m : m_modules) {
|
||||
Relocate(m.get());
|
||||
}
|
||||
|
||||
// Init primary thread.
|
||||
Common::SetCurrentThreadName("GAME_MainThread");
|
||||
Libraries::Kernel::pthreadInitSelfMainThread();
|
||||
InitTlsForThread(true);
|
||||
|
||||
// Start shared library modules
|
||||
for (auto& m : m_modules) {
|
||||
if (m->IsSharedLib()) {
|
||||
|
@ -113,7 +113,7 @@ Module* Linker::LoadModule(const std::filesystem::path& elf_name) {
|
|||
return m_modules.emplace_back(std::move(module)).get();
|
||||
}
|
||||
|
||||
void Linker::Relocate(u32 index, Module* module) {
|
||||
void Linker::Relocate(Module* module) {
|
||||
module->ForEachRelocation([&](elf_relocation* rel, bool isJmpRel) {
|
||||
auto type = rel->GetType();
|
||||
auto symbol = rel->GetSymbol();
|
||||
|
@ -134,7 +134,7 @@ void Linker::Relocate(u32 index, Module* module) {
|
|||
rel_is_resolved = true;
|
||||
break;
|
||||
case R_X86_64_DTPMOD64:
|
||||
rel_value = static_cast<uint64_t>(index);
|
||||
rel_value = static_cast<u64>(module->tls.modid);
|
||||
rel_is_resolved = true;
|
||||
rel_sym_type = Loader::SymbolType::Tls;
|
||||
break;
|
||||
|
@ -254,10 +254,11 @@ void Linker::Resolve(const std::string& name, Loader::SymbolType sym_type, Modul
|
|||
}
|
||||
|
||||
void* Linker::TlsGetAddr(u64 module_index, u64 offset) {
|
||||
DtvEntry* dtv_table = GetTcbBase()->tcb_dtv;
|
||||
ASSERT_MSG(dtv_table[0].counter == dtv_generation_counter,
|
||||
"Reallocation of DTV table is not supported");
|
||||
|
||||
void* module = dtv_table[module_index + 1].pointer;
|
||||
void* module = (u8*)dtv_table[module_index + 1].pointer + offset;
|
||||
ASSERT_MSG(module, "DTV allocation is not supported");
|
||||
return module;
|
||||
}
|
||||
|
@ -286,26 +287,29 @@ void Linker::InitTlsForThread(bool is_primary) {
|
|||
}
|
||||
|
||||
// Initialize allocated memory and allocate DTV table.
|
||||
const u32 num_dtvs = m_modules.size() - 1;
|
||||
const u32 num_dtvs = max_tls_index;
|
||||
std::memset(addr_out, 0, total_tls_size);
|
||||
dtv_table.resize(num_dtvs + 2);
|
||||
DtvEntry* dtv_table = new DtvEntry[num_dtvs + 2];
|
||||
|
||||
// Initialize thread control block
|
||||
u8* addr = reinterpret_cast<u8*>(addr_out);
|
||||
Tcb* tcb = reinterpret_cast<Tcb*>(addr + static_tls_size);
|
||||
tcb->tcb_self = tcb;
|
||||
tcb->tcb_dtv = dtv_table.data();
|
||||
tcb->tcb_dtv = dtv_table;
|
||||
|
||||
// Dtv[0] is the generation counter. libkernel puts their number into dtv[1] (why?)
|
||||
dtv_table[0].counter = dtv_generation_counter;
|
||||
dtv_table[1].counter = num_dtvs;
|
||||
|
||||
// Copy init images to TLS thread blocks and map them to DTV slots.
|
||||
for (u32 i = 2; const auto& module : m_modules) {
|
||||
u8* dest = reinterpret_cast<u8*>(addr + static_tls_size - module->tls.distance_from_fs);
|
||||
for (const auto& module : m_modules) {
|
||||
if (module->tls.image_size == 0) {
|
||||
continue;
|
||||
}
|
||||
u8* dest = reinterpret_cast<u8*>(addr + static_tls_size - module->tls.offset);
|
||||
const u8* src = reinterpret_cast<const u8*>(module->tls.image_virtual_addr);
|
||||
std::memcpy(dest, src, module->tls.init_image_size);
|
||||
tcb->tcb_dtv[i++].pointer = dest;
|
||||
tcb->tcb_dtv[module->tls.modid + 1].pointer = dest;
|
||||
}
|
||||
|
||||
// Set pointer to FS base
|
||||
|
|
|
@ -17,19 +17,6 @@ struct EntryParams {
|
|||
const char* argv[3];
|
||||
};
|
||||
|
||||
union DtvEntry {
|
||||
struct {
|
||||
size_t counter;
|
||||
};
|
||||
void* pointer;
|
||||
};
|
||||
|
||||
struct Tcb {
|
||||
Tcb* tcb_self;
|
||||
DtvEntry* tcb_dtv;
|
||||
void* tcb_thread;
|
||||
};
|
||||
|
||||
using HeapApiFunc = PS4_SYSV_ABI void*(*)(size_t);
|
||||
|
||||
class Linker {
|
||||
|
@ -54,7 +41,7 @@ public:
|
|||
|
||||
Module* LoadModule(const std::filesystem::path& elf_name);
|
||||
|
||||
void Relocate(u32 index, Module* module);
|
||||
void Relocate(Module* module);
|
||||
void Resolve(const std::string& name, Loader::SymbolType type,
|
||||
Module* module, Loader::SymbolRecord* return_info);
|
||||
void Execute();
|
||||
|
@ -64,9 +51,9 @@ private:
|
|||
const Module* FindExportedModule(const ModuleInfo& m, const LibraryInfo& l);
|
||||
void InitTls();
|
||||
|
||||
std::vector<DtvEntry> dtv_table;
|
||||
u32 dtv_generation_counter{1};
|
||||
size_t static_tls_size{};
|
||||
size_t max_tls_index{};
|
||||
HeapApiFunc heap_api_func{};
|
||||
std::vector<std::unique_ptr<Module>> m_modules;
|
||||
Loader::SymbolsResolver m_hle_symbols{};
|
||||
|
|
|
@ -35,7 +35,9 @@ PAddr MemoryManager::Allocate(PAddr search_start, PAddr search_end, size_t size,
|
|||
}
|
||||
|
||||
// Align free position
|
||||
free_addr = Common::AlignUp(free_addr, alignment);
|
||||
if (alignment > 0) {
|
||||
free_addr = Common::AlignUp(free_addr, alignment);
|
||||
}
|
||||
ASSERT(free_addr >= search_start && free_addr + size <= search_end);
|
||||
|
||||
// Add the allocated region to the list and commit its pages.
|
||||
|
|
|
@ -47,10 +47,11 @@ struct LibraryInfo {
|
|||
|
||||
struct ThreadLocalImage {
|
||||
u64 align;
|
||||
u64 image_size;
|
||||
u64 offset;
|
||||
u32 modid;
|
||||
VAddr image_virtual_addr;
|
||||
u64 init_image_size;
|
||||
u64 image_size;
|
||||
u64 distance_from_fs;
|
||||
};
|
||||
|
||||
struct DynamicModuleInfo {
|
||||
|
@ -166,7 +167,7 @@ public:
|
|||
std::vector<u8> m_dynamic_data;
|
||||
Loader::SymbolsResolver export_sym;
|
||||
Loader::SymbolsResolver import_sym;
|
||||
ThreadLocalImage tls;
|
||||
ThreadLocalImage tls{};
|
||||
};
|
||||
|
||||
} // namespace Core
|
||||
|
|
|
@ -49,6 +49,10 @@ void SetTcbBase(void* image_address) {
|
|||
ASSERT(result != 0);
|
||||
}
|
||||
|
||||
Tcb* GetTcbBase() {
|
||||
return reinterpret_cast<Tcb*>(TlsGetValue(slot));
|
||||
}
|
||||
|
||||
void PatchTLS(u64 segment_addr, u64 segment_size, Xbyak::CodeGenerator& c) {
|
||||
using namespace Xbyak::util;
|
||||
|
||||
|
|
|
@ -11,9 +11,23 @@ class CodeGenerator;
|
|||
|
||||
namespace Core {
|
||||
|
||||
union DtvEntry {
|
||||
size_t counter;
|
||||
void* pointer;
|
||||
};
|
||||
|
||||
struct Tcb {
|
||||
Tcb* tcb_self;
|
||||
DtvEntry* tcb_dtv;
|
||||
void* tcb_thread;
|
||||
};
|
||||
|
||||
/// Sets the data pointer to the TCB block.
|
||||
void SetTcbBase(void* image_address);
|
||||
|
||||
/// Retrieves Tcb structure for the calling thread.
|
||||
Tcb* GetTcbBase();
|
||||
|
||||
/// Patches any instructions that access guest TLS to use provided storage.
|
||||
void PatchTLS(u64 segment_addr, u64 segment_size, Xbyak::CodeGenerator& c);
|
||||
|
||||
|
|
|
@ -87,6 +87,7 @@ int main(int argc, char* argv[]) {
|
|||
linker->LoadModule(entry.path().string().c_str());
|
||||
}
|
||||
}
|
||||
|
||||
// Check if there is a libc.prx in sce_module folder
|
||||
bool found = false;
|
||||
if (Config::isLleLibc()) {
|
||||
|
@ -94,7 +95,8 @@ int main(int argc, char* argv[]) {
|
|||
if (std::filesystem::is_directory(sce_module_folder)) {
|
||||
for (const auto& entry : std::filesystem::directory_iterator(sce_module_folder)) {
|
||||
if (entry.path().filename() == "libc.prx" ||
|
||||
entry.path().filename() == "libSceFios2.prx") {
|
||||
entry.path().filename() == "libSceFios2.prx" ||
|
||||
entry.path().filename() == "libSceNpToolkit2.prx") {
|
||||
found = true;
|
||||
LOG_INFO(Loader, "Loading {}", entry.path().string().c_str());
|
||||
linker->LoadModule(entry.path().string().c_str());
|
||||
|
@ -105,6 +107,7 @@ int main(int argc, char* argv[]) {
|
|||
if (!found) {
|
||||
Libraries::LibC::libcSymbolsRegister(&linker->GetHLESymbols());
|
||||
}
|
||||
|
||||
std::thread mainthread([linker]() { linker->Execute(); });
|
||||
Discord::RPC discordRPC;
|
||||
discordRPC.init();
|
||||
|
|
|
@ -55,8 +55,20 @@ void Translator::S_ANDN2_B64(const GcnInst& inst) {
|
|||
const IR::U1 src0{get_src(inst.src[0])};
|
||||
const IR::U1 src1{get_src(inst.src[1])};
|
||||
const IR::U1 result{ir.LogicalAnd(src0, ir.LogicalNot(src1))};
|
||||
SetDst(inst.dst[0], result);
|
||||
ir.SetScc(result);
|
||||
switch (inst.dst[0].field) {
|
||||
case OperandField::VccLo:
|
||||
ir.SetVcc(result);
|
||||
break;
|
||||
case OperandField::ExecLo:
|
||||
ir.SetExec(result);
|
||||
break;
|
||||
case OperandField::ScalarGPR:
|
||||
ir.SetThreadBitScalarReg(IR::ScalarReg(inst.dst[0].code), result);
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
|
||||
void Translator::S_AND_SAVEEXEC_B64(const GcnInst& inst) {
|
||||
|
@ -124,9 +136,17 @@ void Translator::S_OR_B64(bool negate, const GcnInst& inst) {
|
|||
if (negate) {
|
||||
result = ir.LogicalNot(result);
|
||||
}
|
||||
ASSERT(inst.dst[0].field == OperandField::VccLo);
|
||||
ir.SetVcc(result);
|
||||
ir.SetScc(result);
|
||||
switch (inst.dst[0].field) {
|
||||
case OperandField::VccLo:
|
||||
ir.SetVcc(result);
|
||||
break;
|
||||
case OperandField::ScalarGPR:
|
||||
ir.SetThreadBitScalarReg(IR::ScalarReg(inst.dst[0].code), result);
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
|
||||
void Translator::S_AND_B64(const GcnInst& inst) {
|
||||
|
@ -145,9 +165,17 @@ void Translator::S_AND_B64(const GcnInst& inst) {
|
|||
const IR::U1 src0{get_src(inst.src[0])};
|
||||
const IR::U1 src1{get_src(inst.src[1])};
|
||||
const IR::U1 result = ir.LogicalAnd(src0, src1);
|
||||
ASSERT(inst.dst[0].field == OperandField::VccLo);
|
||||
ir.SetVcc(result);
|
||||
ir.SetScc(result);
|
||||
switch (inst.dst[0].field) {
|
||||
case OperandField::VccLo:
|
||||
ir.SetVcc(result);
|
||||
break;
|
||||
case OperandField::ScalarGPR:
|
||||
ir.SetThreadBitScalarReg(IR::ScalarReg(inst.dst[0].code), result);
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
|
||||
void Translator::S_ADD_I32(const GcnInst& inst) {
|
||||
|
@ -179,6 +207,36 @@ void Translator::S_CSELECT_B32(const GcnInst& inst) {
|
|||
SetDst(inst.dst[0], IR::U32{ir.Select(ir.GetScc(), src0, src1)});
|
||||
}
|
||||
|
||||
void Translator::S_CSELECT_B64(const GcnInst& inst) {
|
||||
const auto get_src = [&](const InstOperand& operand) {
|
||||
switch (operand.field) {
|
||||
case OperandField::VccLo:
|
||||
return ir.GetVcc();
|
||||
case OperandField::ExecLo:
|
||||
return ir.GetExec();
|
||||
case OperandField::ScalarGPR:
|
||||
return ir.GetThreadBitScalarReg(IR::ScalarReg(operand.code));
|
||||
case OperandField::ConstZero:
|
||||
return ir.Imm1(false);
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
};
|
||||
const IR::U1 src0{get_src(inst.src[0])};
|
||||
const IR::U1 src1{get_src(inst.src[1])};
|
||||
const IR::U1 result{ir.Select(ir.GetScc(), src0, src1)};
|
||||
switch (inst.dst[0].field) {
|
||||
case OperandField::VccLo:
|
||||
ir.SetVcc(result);
|
||||
break;
|
||||
case OperandField::ScalarGPR:
|
||||
ir.SetThreadBitScalarReg(IR::ScalarReg(inst.dst[0].code), result);
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
|
||||
void Translator::S_BFE_U32(const GcnInst& inst) {
|
||||
const IR::U32 src0{GetSrc(inst.src[0])};
|
||||
const IR::U32 src1{GetSrc(inst.src[1])};
|
||||
|
|
|
@ -5,30 +5,15 @@
|
|||
|
||||
namespace Shader::Gcn {
|
||||
|
||||
void Load(IR::IREmitter& ir, int num_dwords, const IR::Value& handle, IR::ScalarReg dst_reg,
|
||||
const IR::U32U64& address) {
|
||||
for (u32 i = 0; i < num_dwords; i++) {
|
||||
if (handle.IsEmpty()) {
|
||||
ir.SetScalarReg(dst_reg++, ir.ReadConst(address, ir.Imm32(i)));
|
||||
} else {
|
||||
const IR::U32 index = ir.IAdd(address, ir.Imm32(i));
|
||||
ir.SetScalarReg(dst_reg++, ir.ReadConstBuffer(handle, index));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Translator::S_LOAD_DWORD(int num_dwords, const GcnInst& inst) {
|
||||
const auto& smrd = inst.control.smrd;
|
||||
ASSERT_MSG(smrd.imm, "Bindless texture loads unsupported");
|
||||
const IR::ScalarReg sbase{inst.src[0].code * 2};
|
||||
const IR::U32 offset =
|
||||
smrd.imm ? ir.Imm32(smrd.offset * 4)
|
||||
: IR::U32{ir.ShiftLeftLogical(ir.GetScalarReg(IR::ScalarReg(smrd.offset)),
|
||||
ir.Imm32(2))};
|
||||
const IR::U64 base =
|
||||
ir.PackUint2x32(ir.CompositeConstruct(ir.GetScalarReg(sbase), ir.GetScalarReg(sbase + 1)));
|
||||
const IR::U64 address = ir.IAdd(base, offset);
|
||||
const IR::ScalarReg dst_reg{inst.dst[0].code};
|
||||
Load(ir, num_dwords, {}, dst_reg, address);
|
||||
const IR::Value base = ir.CompositeConstruct(ir.GetScalarReg(sbase), ir.GetScalarReg(sbase + 1));
|
||||
IR::ScalarReg dst_reg{inst.dst[0].code};
|
||||
for (u32 i = 0; i < num_dwords; i++) {
|
||||
ir.SetScalarReg(dst_reg++, ir.ReadConst(base, ir.Imm32(smrd.offset + i)));
|
||||
}
|
||||
}
|
||||
|
||||
void Translator::S_BUFFER_LOAD_DWORD(int num_dwords, const GcnInst& inst) {
|
||||
|
@ -37,8 +22,11 @@ void Translator::S_BUFFER_LOAD_DWORD(int num_dwords, const GcnInst& inst) {
|
|||
const IR::U32 dword_offset =
|
||||
smrd.imm ? ir.Imm32(smrd.offset) : ir.GetScalarReg(IR::ScalarReg(smrd.offset));
|
||||
const IR::Value vsharp = ir.GetScalarReg(sbase);
|
||||
const IR::ScalarReg dst_reg{inst.dst[0].code};
|
||||
Load(ir, num_dwords, vsharp, dst_reg, dword_offset);
|
||||
IR::ScalarReg dst_reg{inst.dst[0].code};
|
||||
for (u32 i = 0; i < num_dwords; i++) {
|
||||
const IR::U32 index = ir.IAdd(dword_offset, ir.Imm32(i));
|
||||
ir.SetScalarReg(dst_reg++, ir.ReadConstBuffer(vsharp, index));
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Shader::Gcn
|
||||
|
|
|
@ -256,6 +256,12 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
|
|||
break;
|
||||
case Opcode::S_WAITCNT:
|
||||
break;
|
||||
case Opcode::S_LOAD_DWORDX4:
|
||||
translator.S_LOAD_DWORD(4, inst);
|
||||
break;
|
||||
case Opcode::S_LOAD_DWORDX8:
|
||||
translator.S_LOAD_DWORD(8, inst);
|
||||
break;
|
||||
case Opcode::S_BUFFER_LOAD_DWORD:
|
||||
translator.S_BUFFER_LOAD_DWORD(1, inst);
|
||||
break;
|
||||
|
@ -356,9 +362,15 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
|
|||
case Opcode::S_CMP_LG_U32:
|
||||
translator.S_CMP(ConditionOp::LG, false, inst);
|
||||
break;
|
||||
case Opcode::S_CMP_LG_I32:
|
||||
translator.S_CMP(ConditionOp::LG, true, inst);
|
||||
break;
|
||||
case Opcode::S_CMP_EQ_I32:
|
||||
translator.S_CMP(ConditionOp::EQ, true, inst);
|
||||
break;
|
||||
case Opcode::S_CMP_EQ_U32:
|
||||
translator.S_CMP(ConditionOp::EQ, false, inst);
|
||||
break;
|
||||
case Opcode::V_CNDMASK_B32:
|
||||
translator.V_CNDMASK_B32(inst);
|
||||
break;
|
||||
|
@ -509,6 +521,9 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
|
|||
case Opcode::S_CSELECT_B32:
|
||||
translator.S_CSELECT_B32(inst);
|
||||
break;
|
||||
case Opcode::S_CSELECT_B64:
|
||||
translator.S_CSELECT_B64(inst);
|
||||
break;
|
||||
case Opcode::S_BFE_U32:
|
||||
translator.S_BFE_U32(inst);
|
||||
break;
|
||||
|
@ -516,6 +531,8 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
|
|||
case Opcode::S_CBRANCH_EXECZ:
|
||||
case Opcode::S_CBRANCH_SCC0:
|
||||
case Opcode::S_CBRANCH_SCC1:
|
||||
case Opcode::S_CBRANCH_VCCNZ:
|
||||
case Opcode::S_CBRANCH_VCCZ:
|
||||
case Opcode::S_BRANCH:
|
||||
case Opcode::S_WQM_B64:
|
||||
case Opcode::V_INTERP_P1_F32:
|
||||
|
@ -523,7 +540,7 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
|
|||
break;
|
||||
default:
|
||||
const u32 opcode = u32(inst.opcode);
|
||||
UNREACHABLE_MSG("Unknown opcode {}", opcode);
|
||||
throw NotImplementedException("Opcode {}", opcode);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -46,6 +46,7 @@ public:
|
|||
void S_AND_B32(const GcnInst& inst);
|
||||
void S_LSHR_B32(const GcnInst& inst);
|
||||
void S_CSELECT_B32(const GcnInst& inst);
|
||||
void S_CSELECT_B64(const GcnInst& inst);
|
||||
void S_BFE_U32(const GcnInst& inst);
|
||||
|
||||
// Scalar Memory
|
||||
|
|
|
@ -85,21 +85,21 @@ void Translator::V_CVT_F32_U32(const GcnInst& inst) {
|
|||
}
|
||||
|
||||
void Translator::V_MAD_F32(const GcnInst& inst) {
|
||||
const IR::F32 src0{GetSrc(inst.src[0])};
|
||||
const IR::F32 src1{GetSrc(inst.src[1])};
|
||||
const IR::F32 src2{GetSrc(inst.src[2])};
|
||||
const IR::F32 src0{GetSrc(inst.src[0], true)};
|
||||
const IR::F32 src1{GetSrc(inst.src[1], true)};
|
||||
const IR::F32 src2{GetSrc(inst.src[2], true)};
|
||||
SetDst(inst.dst[0], ir.FPFma(src0, src1, src2));
|
||||
}
|
||||
|
||||
void Translator::V_FRACT_F32(const GcnInst& inst) {
|
||||
const IR::F32 src0{GetSrc(inst.src[0])};
|
||||
const IR::F32 src0{GetSrc(inst.src[0], true)};
|
||||
const IR::VectorReg dst_reg{inst.dst[0].code};
|
||||
ir.SetVectorReg(dst_reg, ir.Fract(src0));
|
||||
}
|
||||
|
||||
void Translator::V_ADD_F32(const GcnInst& inst) {
|
||||
const IR::F32 src0{GetSrc(inst.src[0])};
|
||||
const IR::F32 src1{GetSrc(inst.src[1])};
|
||||
const IR::F32 src0{GetSrc(inst.src[0], true)};
|
||||
const IR::F32 src1{GetSrc(inst.src[1], true)};
|
||||
SetDst(inst.dst[0], ir.FPAdd(src0, src1));
|
||||
}
|
||||
|
||||
|
@ -114,14 +114,14 @@ void Translator::V_CVT_OFF_F32_I4(const GcnInst& inst) {
|
|||
|
||||
void Translator::V_MED3_F32(const GcnInst& inst) {
|
||||
const IR::F32 src0{GetSrc(inst.src[0], true)};
|
||||
const IR::F32 src1{GetSrc(inst.src[1])};
|
||||
const IR::F32 src2{GetSrc(inst.src[2])};
|
||||
const IR::F32 src1{GetSrc(inst.src[1], true)};
|
||||
const IR::F32 src2{GetSrc(inst.src[2], true)};
|
||||
const IR::F32 mmx = ir.FPMin(ir.FPMax(src0, src1), src2);
|
||||
SetDst(inst.dst[0], ir.FPMax(ir.FPMin(src0, src1), mmx));
|
||||
}
|
||||
|
||||
void Translator::V_FLOOR_F32(const GcnInst& inst) {
|
||||
const IR::F32 src0{GetSrc(inst.src[0])};
|
||||
const IR::F32 src0{GetSrc(inst.src[0], true)};
|
||||
const IR::VectorReg dst_reg{inst.dst[0].code};
|
||||
ir.SetVectorReg(dst_reg, ir.FPFloor(src0));
|
||||
}
|
||||
|
@ -167,7 +167,17 @@ void Translator::V_CMP_F32(ConditionOp op, const GcnInst& inst) {
|
|||
UNREACHABLE();
|
||||
}
|
||||
}();
|
||||
ir.SetVcc(result);
|
||||
|
||||
switch (inst.dst[1].field) {
|
||||
case OperandField::VccLo:
|
||||
ir.SetVcc(result);
|
||||
break;
|
||||
case OperandField::ScalarGPR:
|
||||
ir.SetThreadBitScalarReg(IR::ScalarReg(inst.dst[1].code), result);
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
|
||||
void Translator::V_MAX_F32(const GcnInst& inst) {
|
||||
|
|
|
@ -273,8 +273,8 @@ void IREmitter::WriteShared(int bit_size, const Value& value, const U32& offset)
|
|||
}*/
|
||||
}
|
||||
|
||||
U32 IREmitter::ReadConst(const U64& address, const U32& offset) {
|
||||
return Inst<U32>(Opcode::ReadConst, address, offset);
|
||||
U32 IREmitter::ReadConst(const Value& base, const U32& offset) {
|
||||
return Inst<U32>(Opcode::ReadConst, base, offset);
|
||||
}
|
||||
|
||||
F32 IREmitter::ReadConstBuffer(const Value& handle, const U32& index) {
|
||||
|
|
|
@ -77,7 +77,7 @@ public:
|
|||
[[nodiscard]] U32U64 ReadShared(int bit_size, bool is_signed, const U32& offset);
|
||||
void WriteShared(int bit_size, const Value& value, const U32& offset);
|
||||
|
||||
[[nodiscard]] U32 ReadConst(const U64& address, const U32& offset);
|
||||
[[nodiscard]] U32 ReadConst(const Value& base, const U32& offset);
|
||||
[[nodiscard]] F32 ReadConstBuffer(const Value& handle, const U32& index);
|
||||
|
||||
[[nodiscard]] Value LoadBuffer(int num_dwords, const Value& handle, const Value& address,
|
||||
|
|
|
@ -15,7 +15,7 @@ OPCODE(Epilogue, Void,
|
|||
OPCODE(Discard, Void, )
|
||||
|
||||
// Constant memory operations
|
||||
OPCODE(ReadConst, U32, U64, U32, )
|
||||
OPCODE(ReadConst, U32, U32x2, U32, )
|
||||
OPCODE(ReadConstBuffer, F32, Opaque, U32, )
|
||||
OPCODE(ReadConstBufferU32, U32, Opaque, U32, )
|
||||
|
||||
|
|
|
@ -157,16 +157,15 @@ SharpLocation TrackSharp(const IR::Inst* inst) {
|
|||
ASSERT_MSG(inst->GetOpcode() == IR::Opcode::ReadConst, "Sharp load not from constant memory");
|
||||
|
||||
// Retrieve offset from base.
|
||||
IR::Inst* addr = inst->Arg(0).InstRecursive();
|
||||
u32 dword_offset = addr->Arg(1).U32();
|
||||
addr = addr->Arg(0).InstRecursive();
|
||||
ASSERT_MSG(addr->Arg(1).IsImmediate(), "Bindless not supported");
|
||||
dword_offset += addr->Arg(1).U32() >> 2;
|
||||
const u32 dword_offset = inst->Arg(1).U32();
|
||||
const IR::Inst* spgpr_base = inst->Arg(0).InstRecursive();
|
||||
|
||||
// Retrieve SGPR that holds sbase
|
||||
inst = addr->Arg(0).InstRecursive()->Arg(0).InstRecursive();
|
||||
ASSERT_MSG(inst->GetOpcode() == IR::Opcode::GetUserData, "Nested resource loads not supported");
|
||||
const IR::ScalarReg base = inst->Arg(0).ScalarReg();
|
||||
// Retrieve SGPR pair that holds sbase
|
||||
const IR::Inst* sbase0 = spgpr_base->Arg(0).InstRecursive();
|
||||
const IR::Inst* sbase1 = spgpr_base->Arg(1).InstRecursive();
|
||||
ASSERT_MSG(sbase0->GetOpcode() == IR::Opcode::GetUserData &&
|
||||
sbase1->GetOpcode() == IR::Opcode::GetUserData, "Nested resource loads not supported");
|
||||
const IR::ScalarReg base = sbase0->Arg(0).ScalarReg();
|
||||
|
||||
// Return retrieved location.
|
||||
return SharpLocation{
|
||||
|
|
|
@ -160,8 +160,13 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline() {
|
|||
inst_pool.ReleaseContents();
|
||||
|
||||
// Recompile shader to IR.
|
||||
const Shader::Info info = MakeShaderInfo(stage, pgm->user_data, regs);
|
||||
programs[i] = Shader::TranslateProgram(inst_pool, block_pool, code, std::move(info));
|
||||
try {
|
||||
const Shader::Info info = MakeShaderInfo(stage, pgm->user_data, regs);
|
||||
programs[i] = Shader::TranslateProgram(inst_pool, block_pool, code, std::move(info));
|
||||
} catch (const Shader::Exception& e) {
|
||||
LOG_ERROR(Render_Vulkan, "{}", e.what());
|
||||
std::abort();
|
||||
}
|
||||
|
||||
// Compile IR to SPIR-V
|
||||
auto spv_code = Shader::Backend::SPIRV::EmitSPIRV(profile, programs[i], binding);
|
||||
|
|
|
@ -58,7 +58,7 @@ LONG WINAPI GuestFaultSignalHandler(EXCEPTION_POINTERS* pExp) noexcept {
|
|||
}
|
||||
#endif
|
||||
|
||||
static constexpr u64 StreamBufferSize = 128_MB;
|
||||
static constexpr u64 StreamBufferSize = 512_MB;
|
||||
static constexpr u64 PageShift = 12;
|
||||
|
||||
TextureCache::TextureCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_)
|
||||
|
|
Loading…
Reference in a new issue