Merge pull request #685 from squidbus/patch-fixes

core: Fix CPU patch stack issues
This commit is contained in:
georgemoralis 2024-08-31 23:43:26 +03:00 committed by GitHub
commit 0c05a148e6
4 changed files with 14 additions and 25 deletions

View file

@ -636,7 +636,7 @@ target_link_libraries(shadps4 PRIVATE Boost::headers GPUOpen::VulkanMemoryAlloca
if (APPLE) if (APPLE)
# Reserve system-managed memory space. # Reserve system-managed memory space.
target_link_options(shadps4 PRIVATE -Wl,-no_pie,-no_fixup_chains,-no_huge,-pagezero_size,0x400000,-segaddr,GUEST_SYSTEM,0x400000,-image_base,0x10000000000) target_link_options(shadps4 PRIVATE -Wl,-no_pie,-no_fixup_chains,-no_huge,-pagezero_size,0x400000,-segaddr,GUEST_SYSTEM,0x400000,-image_base,0x20000000000)
# Link MoltenVK for Vulkan support # Link MoltenVK for Vulkan support
find_library(MOLTENVK MoltenVK REQUIRED) find_library(MOLTENVK MoltenVK REQUIRED)

View file

@ -126,39 +126,35 @@ static Xbyak::Reg AllocateScratchRegister(
static pthread_key_t stack_pointer_slot; static pthread_key_t stack_pointer_slot;
static pthread_key_t patch_stack_slot; static pthread_key_t patch_stack_slot;
static std::once_flag patch_context_slots_init_flag; static std::once_flag patch_context_slots_init_flag;
static constexpr u32 patch_stack_size = 0x1000;
static_assert(sizeof(void*) == sizeof(u64), static_assert(sizeof(void*) == sizeof(u64),
"Cannot fit a register inside a thread local storage slot."); "Cannot fit a register inside a thread local storage slot.");
static void FreePatchStack(void* patch_stack) {
// Subtract back to the bottom of the stack for free.
std::free(static_cast<u8*>(patch_stack) - patch_stack_size);
}
static void InitializePatchContextSlots() { static void InitializePatchContextSlots() {
ASSERT_MSG(pthread_key_create(&stack_pointer_slot, nullptr) == 0, ASSERT_MSG(pthread_key_create(&stack_pointer_slot, nullptr) == 0,
"Unable to allocate thread-local register for stack pointer."); "Unable to allocate thread-local register for stack pointer.");
ASSERT_MSG(pthread_key_create(&patch_stack_slot, nullptr) == 0, ASSERT_MSG(pthread_key_create(&patch_stack_slot, FreePatchStack) == 0,
"Unable to allocate thread-local register for patch stack."); "Unable to allocate thread-local register for patch stack.");
} }
void InitializeThreadPatchStack() { void InitializeThreadPatchStack() {
std::call_once(patch_context_slots_init_flag, InitializePatchContextSlots); std::call_once(patch_context_slots_init_flag, InitializePatchContextSlots);
const auto* patch_stack = std::malloc(0x1000); pthread_setspecific(patch_stack_slot,
pthread_setspecific(patch_stack_slot, patch_stack); static_cast<u8*>(std::malloc(patch_stack_size)) + patch_stack_size);
}
void CleanupThreadPatchStack() {
std::call_once(patch_context_slots_init_flag, InitializePatchContextSlots);
auto* patch_stack = pthread_getspecific(patch_stack_slot);
if (patch_stack != nullptr) {
std::free(patch_stack);
pthread_setspecific(patch_stack_slot, nullptr);
}
} }
/// Saves the stack pointer to thread local storage and loads the patch stack. /// Saves the stack pointer to thread local storage and loads the patch stack.
static void SaveStack(Xbyak::CodeGenerator& c) { static void SaveStack(Xbyak::CodeGenerator& c) {
std::call_once(patch_context_slots_init_flag, InitializePatchContextSlots); std::call_once(patch_context_slots_init_flag, InitializePatchContextSlots);
// Save stack pointer and load patch stack. // Save original stack pointer and load patch stack.
c.putSeg(gs); c.putSeg(gs);
c.mov(qword[reinterpret_cast<void*>(stack_pointer_slot * sizeof(void*))], rsp); c.mov(qword[reinterpret_cast<void*>(stack_pointer_slot * sizeof(void*))], rsp);
c.putSeg(gs); c.putSeg(gs);
@ -184,10 +180,6 @@ void InitializeThreadPatchStack() {
// No-op // No-op
} }
void CleanupThreadPatchStack() {
// No-op
}
/// Saves the stack pointer to thread local storage and loads the patch stack. /// Saves the stack pointer to thread local storage and loads the patch stack.
static void SaveStack(Xbyak::CodeGenerator& c) { static void SaveStack(Xbyak::CodeGenerator& c) {
UNIMPLEMENTED(); UNIMPLEMENTED();
@ -244,7 +236,7 @@ static void RestoreContext(Xbyak::CodeGenerator& c, const Xbyak::Operand& dst) {
if (!dst.isREG() || dst.getIdx() != reg) { if (!dst.isREG() || dst.getIdx() != reg) {
c.pop(Xbyak::Reg64(reg)); c.pop(Xbyak::Reg64(reg));
} else { } else {
c.add(rsp, 4); c.add(rsp, 8);
} }
} }
RestoreStack(c); RestoreStack(c);

View file

@ -987,15 +987,14 @@ static void cleanup_thread(void* arg) {
destructor(value); destructor(value);
} }
} }
Core::CleanupThreadPatchStack();
thread->is_almost_done = true; thread->is_almost_done = true;
} }
static void* run_thread(void* arg) { static void* run_thread(void* arg) {
auto* thread = static_cast<ScePthread>(arg); auto* thread = static_cast<ScePthread>(arg);
Common::SetCurrentThreadName(thread->name.c_str()); Common::SetCurrentThreadName(thread->name.c_str());
auto* linker = Common::Singleton<Core::Linker>::Instance();
Core::InitializeThreadPatchStack(); Core::InitializeThreadPatchStack();
auto* linker = Common::Singleton<Core::Linker>::Instance();
linker->InitTlsForThread(false); linker->InitTlsForThread(false);
void* ret = nullptr; void* ret = nullptr;
g_pthread_self = thread; g_pthread_self = thread;

View file

@ -85,8 +85,8 @@ void Linker::Execute() {
// Init primary thread. // Init primary thread.
Common::SetCurrentThreadName("GAME_MainThread"); Common::SetCurrentThreadName("GAME_MainThread");
Libraries::Kernel::pthreadInitSelfMainThread();
InitializeThreadPatchStack(); InitializeThreadPatchStack();
Libraries::Kernel::pthreadInitSelfMainThread();
InitTlsForThread(true); InitTlsForThread(true);
// Start shared library modules // Start shared library modules
@ -106,8 +106,6 @@ void Linker::Execute() {
RunMainEntry(m->GetEntryAddress(), &p, ProgramExitFunc); RunMainEntry(m->GetEntryAddress(), &p, ProgramExitFunc);
} }
} }
CleanupThreadPatchStack();
} }
s32 Linker::LoadModule(const std::filesystem::path& elf_name, bool is_dynamic) { s32 Linker::LoadModule(const std::filesystem::path& elf_name, bool is_dynamic) {