From f8879779dc0662bf7598bd1fa966f7bb26fbb373 Mon Sep 17 00:00:00 2001 From: Stefanos Kornilios Mitsis Poiitidis <skmp@nilware.io> Date: Sat, 14 Sep 2024 12:07:52 +0300 Subject: [PATCH] threading; up to 9 integer args --- externals/box64 | 2 +- .../libraries/kernel/thread_management.cpp | 8 +- src/core/libraries/libs.h | 5 +- src/core/linker.cpp | 135 ++++++++++++++---- 4 files changed, 118 insertions(+), 32 deletions(-) diff --git a/externals/box64 b/externals/box64 index ee232489..5810c661 160000 --- a/externals/box64 +++ b/externals/box64 @@ -1 +1 @@ -Subproject commit ee232489491741bc7e9d0306212be4135fd6fbda +Subproject commit 5810c66154452ee2b5c4e6a973fac045be48cf55 diff --git a/src/core/libraries/kernel/thread_management.cpp b/src/core/libraries/kernel/thread_management.cpp index 0da51425..10b8d9e4 100644 --- a/src/core/libraries/kernel/thread_management.cpp +++ b/src/core/libraries/kernel/thread_management.cpp @@ -26,6 +26,9 @@ #include <sys/mman.h> #endif +namespace Core { + void* RunThreadEntry(u64 thread, void* arg); +} namespace Libraries::Kernel { thread_local ScePthread g_pthread_self{}; @@ -994,13 +997,12 @@ static void* run_thread(void* arg) { #ifdef ARCH_X86_64 Core::InitializeThreadPatchStack(); #endif - auto* linker = Common::Singleton<Core::Linker>::Instance(); - linker->InitTlsForThread(false); void* ret = nullptr; g_pthread_self = thread; pthread_cleanup_push(cleanup_thread, thread); thread->is_started = true; - ret = thread->entry(thread->arg); + ret = Core::RunThreadEntry((u64)thread->entry, thread->arg); + // ret = thread->entry(thread->arg); pthread_cleanup_pop(1); return ret; } diff --git a/src/core/libraries/libs.h b/src/core/libraries/libs.h index 1b795a41..07ba35e5 100644 --- a/src/core/libraries/libs.h +++ b/src/core/libraries/libs.h @@ -28,7 +28,10 @@ struct wrapper_impl<name, PS4_SYSV_ABI R (*)(Args...), f> { std::string_view(name.value) != "sceUserServiceGetEvent") { // LOG_WARNING(Core_Linker, "Function {} called", name.value); } - LOG_WARNING(Core_Linker, "Function {} called", name.value); + if (sizeof...(Args) > 6) { + LOG_CRITICAL(Core_Linker, "Function {} called with more than 6 args", name.value); + } + // LOG_WARNING(Core_Linker, "Function {} called", name.value); if constexpr (std::is_same_v<R, s32> || std::is_same_v<R, u32>) { const u32 ret = f(args...); if (ret != 0 && std::string_view(name.value) != "scePthreadEqual") { diff --git a/src/core/linker.cpp b/src/core/linker.cpp index 1e600788..7da5dd29 100644 --- a/src/core/linker.cpp +++ b/src/core/linker.cpp @@ -107,6 +107,7 @@ box64context_t *my_context = NULL; uint64_t, uint64_t, uint64_t, uint64_t, uint64_t, uint64_t, double, double, double, double, double, double, double, double); + auto return_addr = *(uint64_t*)(emu->regs[_RSP].q[0]); HLEFunc hleFunc; memcpy(&hleFunc, (uint8_t*)addr + 1, 8); asm volatile( @@ -116,6 +117,8 @@ box64context_t *my_context = NULL; "mov x3, %7\n" "mov x4, %8\n" "mov x5, %9\n" + "mov x6, %19\n" + "mov x7, %20\n" "mov v0.16b, %10.16b\n" "mov v1.16b, %11.16b\n" @@ -125,7 +128,10 @@ box64context_t *my_context = NULL; "mov v5.16b, %15.16b\n" "mov v6.16b, %16.16b\n" "mov v7.16b, %17.16b\n" + "sub sp, sp, #16\n" + "str %w21, [sp, #8]\n" "blr %18\n" + "add sp, sp, #16\n" "mov %0, x0\n" "mov %1, x1\n" "mov %2.16b, v0.16b\n" @@ -134,13 +140,15 @@ box64context_t *my_context = NULL; : "r"(emu->regs[_RDI].q[0]), "r"(emu->regs[_RSI].q[0]), "r"(emu->regs[_RDX].q[0]), "r"(emu->regs[_RCX].q[0]), "r"(emu->regs[_R8].q[0]), "r"(emu->regs[_R9].q[0]), "w"(emu->xmm[0].u128), "w"(emu->xmm[1].u128), "w"(emu->xmm[2].u128), "w"(emu->xmm[3].u128), "w"(emu->xmm[4].u128), "w"(emu->xmm[5].u128), "w"(emu->xmm[6].u128), - "w"(emu->xmm[7].u128), "r"(hleFunc) + "w"(emu->xmm[7].u128), "r"(hleFunc), "r"(*(uint64_t*)(emu->regs[_RSP].q[0]+8)), "r"(*(uint64_t*)(emu->regs[_RSP].q[0]+16)), + "r"(*(uint64_t*)(emu->regs[_RSP].q[0]+24)) : "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", // Clobber volatile general-purpose registers "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", // Clobber volatile SIMD registers "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "memory", "cc" // Clobber memory and condition codes ); + *(uint64_t*)(emu->regs[_RSP].q[0]) = return_addr; // emu->regs[_RAX].q[0] = hleFunc( // emu->regs[_RDI].q[0], // emu->regs[_RSI].q[0], @@ -161,6 +169,11 @@ box64context_t *my_context = NULL; // ); } else if (num == 20) { emu->quit = 1; + } else if (num == 21) { + u64 next; + memcpy(&next, (uint8_t*)addr, 8); + LOG_INFO(Core_Linker, "Called: {} @{:x} RDI: {}", ((char*)addr + 8), next, emu->regs[_RDI].q[0]); + emu->ip.q[0] = next; } else { for(;;) { printf("Interruption %d at %p\n", num, addr); @@ -222,8 +235,8 @@ box64context_t *my_context = NULL; void* GetSegmentBase(uint32_t desc) { - printf("GetSegmentBase\n"); - return NULL; + printf("GetSegmentBase %d\n", desc); + return Core::GetTcbBase(); } uint64_t RunFunctionWithEmu(x64emu_t *emu, int QuitOnLongJump, uintptr_t fnc, int nargs, ...) { @@ -272,37 +285,70 @@ static void RunMainEntry(VAddr addr, EntryParams* params, ExitFunc exit_func) { : "rax", "rsi", "rdi"); #else -uint64_t rsp = GetRSP(emu); + uint64_t rsp = GetRSP(emu); -rsp = rsp & ~16; -uint8_t* code = (uint8_t*)malloc(2); - code[0] = 0xCD; - code[1] = 0x14; - rsp -= 8; - - for (int i = params->argc; i > 0; i--) { - rsp = rsp - 8; - *(void**)rsp = ¶ms->argv[i - 1]; - } + rsp = rsp & ~16; + static uint8_t code[2]; + code[0] = 0xCD; + code[1] = 0x14; + rsp -= 8; + *(uint8_t**)rsp = code; + for (int i = params->argc; i > 0; i--) { rsp = rsp - 8; - *(u64*)rsp = params->argc; + *(void**)rsp = ¶ms->argv[i - 1]; + } - uint64_t rsi = (u64)params; - uint64_t rdi = (u64)exit_func; + rsp = rsp - 8; + *(u64*)rsp = params->argc; - SetRIP(emu, addr); - SetRSP(emu, rsp); - SetRSI(emu, rsi); - SetRDI(emu, rdi); - emu->quit = 0; + uint64_t rsi = (u64)params; + uint64_t rdi = (u64)exit_func; - Run(emu, 0); + SetRIP(emu, addr); + SetRSP(emu, rsp); + SetRSI(emu, rsi); + SetRDI(emu, rdi); + emu->quit = 0; + + Run(emu, 0); UNIMPLEMENTED_MSG("Missing RunMainEntry() implementation for target CPU architecture."); #endif } +void* RunThreadEntry(u64 thread, void* arg) { + emu = NewX64Emu(my_context, 0, (uintptr_t)0, 0, 0); + + auto* linker = Common::Singleton<Core::Linker>::Instance(); + + thread_set_emu(emu); + + auto rsp = (uint8_t*)malloc(2 * 1024 * 1024) + 2 * 1024 * 1024; + SetRSP(emu, (u64)rsp); + + linker->InitTlsForThread(false); + + uint8_t code[2]; + code[0] = 0xCD; + code[1] = 0x14; + rsp -= 8; + *(uint8_t**)rsp = code; + SetRIP(emu, thread); + SetRSP(emu, (u64)rsp); + SetRDI(emu, (u64)arg); + + emu->quit = 0; + + Run(emu, 0); + + auto rv = emu->regs[_RAX].q[0]; + // no need here, freed automatically by thread_set_emu cleanup + // FreeX64Emu(&emu); + + return (void*)rv; +} + Linker::Linker() : memory{Memory::Instance()} {} Linker::~Linker() = default; @@ -357,7 +403,7 @@ void Linker::Execute() { for (auto& m : m_modules) { if (m->IsSharedLib()) { // m->Start(0, nullptr, nullptr); - LOG_DEBUG(Core_Linker, "Starting shared library module {}", m->name); + LOG_INFO(Core_Linker, "Starting shared library module {}", m->name); const VAddr addr = m->dynamic_info.init_virtual_addr + m->GetBaseAddress(); uint8_t* code = (uint8_t*)malloc(2); code[0] = 0xCD; @@ -385,7 +431,7 @@ void Linker::Execute() { for (auto& m : m_modules) { if (!m->IsSharedLib()) { - LOG_DEBUG(Core_Linker, "Starting main library module {}", m->name); + LOG_INFO(Core_Linker, "Starting main library module {}", m->name); SetRSP(emu, stack_top); RunMainEntry(m->GetEntryAddress(), &p, ProgramExitFunc); } @@ -552,6 +598,12 @@ bool Linker::Resolve(const std::string& name, Loader::SymbolType sym_type, Modul const auto* record = m_hle_symbols.FindSymbol(sr); + if (record) { + *return_info = *record; + + return true; + } + if (!record) { // Check if it an export function const auto* p = FindExportedModule(*module, *library); @@ -562,6 +614,17 @@ bool Linker::Resolve(const std::string& name, Loader::SymbolType sym_type, Modul if (record) { *return_info = *record; +#if 0 // Set to 1 to log all symbol calls + const auto aeronid = AeroLib::FindByNid(sr.name.c_str()); + if (aeronid) { + uint8_t* stub = (uint8_t*)malloc(2 + 8 + strlen(aeronid->name) + 1); + stub[0] = 0xCD; + stub[1] = 0x15; + memcpy(&stub[2], &return_info->virtual_address, 8); + strcpy((char*)&stub[10], aeronid->name); + return_info->virtual_address = (uint64_t)stub; + } +#endif return true; } @@ -578,6 +641,24 @@ bool Linker::Resolve(const std::string& name, Loader::SymbolType sym_type, Modul return false; } +void* heap_api_malloc(Core::HeapAPI* heap_api, size_t size) { + static uint8_t code[2]; + code[0] = 0xCD; + code[1] = 0x14; + + uint64_t rsp = GetRSP(emu); + rsp -= 8; + *(uint8_t**)rsp = code; + + SetRIP(emu, (u64)heap_api->heap_malloc); + SetRSP(emu, rsp); + SetRDI(emu, size); + + Run(emu, 0); + + return (void*)emu->regs[_RAX].q[0]; +} + void* Linker::TlsGetAddr(u64 module_index, u64 offset) { std::scoped_lock lk{mutex}; @@ -604,7 +685,7 @@ void* Linker::TlsGetAddr(u64 module_index, u64 offset) { Module* module = m_modules[module_index - 1].get(); const u32 init_image_size = module->tls.init_image_size; // TODO: Determine if Windows will crash from this - u8* dest = reinterpret_cast<u8*>(heap_api->heap_malloc(module->tls.image_size)); + u8* dest = reinterpret_cast<u8*>(heap_api_malloc(heap_api, module->tls.image_size)); const u8* src = reinterpret_cast<const u8*>(module->tls.image_virtual_addr); std::memcpy(dest, src, init_image_size); std::memset(dest + init_image_size, 0, module->tls.image_size - init_image_size); @@ -636,7 +717,7 @@ void Linker::InitTlsForThread(bool is_primary) { } else { if (heap_api) { #ifndef WIN32 - addr_out = heap_api->heap_malloc(total_tls_size); + addr_out = heap_api_malloc(heap_api, total_tls_size); } else { addr_out = std::malloc(total_tls_size); #else