Initial support for compiling on ARM64. (#788)
Some checks are pending
Reuse / reuse (push) Waiting to run
Clang Format / clang-format (push) Waiting to run
Linux-Qt / build (push) Waiting to run
Linux / build (push) Waiting to run
macOS-Qt / build (push) Waiting to run
macOS / build (push) Waiting to run
Windows-Qt / build (push) Waiting to run
Windows / build (push) Waiting to run

This commit is contained in:
squidbus 2024-09-09 03:23:16 -07:00 committed by GitHub
parent adfb3af95f
commit 411449cd51
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
12 changed files with 166 additions and 25 deletions

View file

@ -31,6 +31,22 @@ endif()
option(ENABLE_QT_GUI "Enable the Qt GUI. If not selected then the emulator uses a minimal SDL-based UI instead" OFF)
# First, determine whether to use CMAKE_OSX_ARCHITECTURES or CMAKE_SYSTEM_PROCESSOR.
if (APPLE AND CMAKE_OSX_ARCHITECTURES)
set(BASE_ARCHITECTURE "${CMAKE_OSX_ARCHITECTURES}")
else()
set(BASE_ARCHITECTURE "${CMAKE_SYSTEM_PROCESSOR}")
endif()
# Next, match common architecture strings down to a known common value.
if (BASE_ARCHITECTURE MATCHES "(x86)|(X86)|(amd64)|(AMD64)")
set(ARCHITECTURE "x86_64")
elseif (BASE_ARCHITECTURE MATCHES "(aarch64)|(AARCH64)|(arm64)|(ARM64)")
set(ARCHITECTURE "arm64")
else()
message(FATAL_ERROR "Unsupported CPU architecture: ${BASE_ARCHITECTURE}")
endif()
# This function should be passed a list of all files in a target. It will automatically generate file groups
# following the directory hierarchy, so that the layout of the files in IDEs matches the one in the filesystem.
function(create_target_directory_groups target_name)
@ -309,6 +325,7 @@ set(COMMON src/common/logging/backend.cpp
src/common/logging/text_formatter.h
src/common/logging/types.h
src/common/alignment.h
src/common/arch.h
src/common/assert.cpp
src/common/assert.h
src/common/bit_field.h
@ -358,8 +375,6 @@ set(CORE src/core/aerolib/stubs.cpp
src/core/aerolib/aerolib.h
src/core/address_space.cpp
src/core/address_space.h
src/core/cpu_patches.cpp
src/core/cpu_patches.h
src/core/crypto/crypto.cpp
src/core/crypto/crypto.h
src/core/crypto/keys.h
@ -417,6 +432,12 @@ set(CORE src/core/aerolib/stubs.cpp
src/core/virtual_memory.h
)
if (ARCHITECTURE STREQUAL "x86_64")
set(CORE ${CORE}
src/core/cpu_patches.cpp
src/core/cpu_patches.h)
endif()
set(SHADER_RECOMPILER src/shader_recompiler/exception.h
src/shader_recompiler/profile.h
src/shader_recompiler/recompiler.cpp
@ -678,8 +699,10 @@ if (APPLE)
target_link_libraries(shadps4 PRIVATE ${MOLTENVK})
endif()
# Reserve system-managed memory space.
target_link_options(shadps4 PRIVATE -Wl,-no_pie,-no_fixup_chains,-no_huge,-pagezero_size,0x4000,-segaddr,TCB_SPACE,0x4000,-segaddr,GUEST_SYSTEM,0x400000,-image_base,0x20000000000)
if (ARCHITECTURE STREQUAL "x86_64")
# Reserve system-managed memory space.
target_link_options(shadps4 PRIVATE -Wl,-no_pie,-no_fixup_chains,-no_huge,-pagezero_size,0x4000,-segaddr,TCB_SPACE,0x4000,-segaddr,GUEST_SYSTEM,0x400000,-image_base,0x20000000000)
endif()
# Replacement for std::chrono::time_zone
target_link_libraries(shadps4 PRIVATE date::date-tz)

View file

@ -43,7 +43,6 @@ else()
endif()
if (NOT TARGET FFmpeg::ffmpeg)
set(ARCHITECTURE "x86_64")
add_subdirectory(ffmpeg-core)
add_library(FFmpeg::ffmpeg ALIAS ffmpeg)
endif()

10
src/common/arch.h Normal file
View file

@ -0,0 +1,10 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#if defined(__x86_64__) || defined(_M_X64)
#define ARCH_X86_64 1
#elif defined(__aarch64__) || defined(_M_ARM64)
#define ARCH_ARM64 1
#endif

View file

@ -1,10 +1,17 @@
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "common/arch.h"
#include "common/assert.h"
#include "common/logging/backend.h"
#if defined(ARCH_X86_64)
#define Crash() __asm__ __volatile__("int $3")
#elif defined(ARCH_ARM64)
#define Crash() __asm__ __volatile__("brk 0")
#else
#error "Missing Crash() implementation for target CPU architecture."
#endif
void assert_fail_impl() {
Common::Log::Stop();

View file

@ -3,6 +3,8 @@
#pragma once
#include "common/arch.h"
#ifdef _MSC_VER
#include <intrin.h>
#endif
@ -13,15 +15,20 @@ namespace Common {
#ifdef _MSC_VER
__forceinline static u64 FencedRDTSC() {
#ifdef ARCH_X86_64
_mm_lfence();
_ReadWriteBarrier();
const u64 result = __rdtsc();
_mm_lfence();
_ReadWriteBarrier();
return result;
#else
#error "Missing FencedRDTSC() implementation for target CPU architecture."
#endif
}
#else
static inline u64 FencedRDTSC() {
#ifdef ARCH_X86_64
u64 eax;
u64 edx;
asm volatile("lfence\n\t"
@ -29,6 +36,16 @@ static inline u64 FencedRDTSC() {
"lfence\n\t"
: "=a"(eax), "=d"(edx));
return (edx << 32) | eax;
#elif defined(ARCH_ARM64)
u64 ret;
asm volatile("isb\n\t"
"mrs %0, cntvct_el0\n\t"
"isb\n\t"
: "=r"(ret)::"memory");
return ret;
#else
#error "Missing FencedRDTSC() implementation for target CPU architecture."
#endif
}
#endif

View file

@ -3,11 +3,13 @@
#include <boost/icl/separate_interval_set.hpp>
#include "common/alignment.h"
#include "common/arch.h"
#include "common/assert.h"
#include "common/error.h"
#include "core/address_space.h"
#include "core/libraries/kernel/memory_management.h"
#include "core/memory.h"
#include "libraries/error_codes.h"
#ifdef _WIN32
#include <windows.h>
@ -15,9 +17,8 @@
#include <fcntl.h>
#include <sys/mman.h>
#endif
#include "libraries/error_codes.h"
#ifdef __APPLE__
#if defined(__APPLE__) && defined(ARCH_X86_64)
// Reserve space for the system address space using a zerofill section.
asm(".zerofill GUEST_SYSTEM,GUEST_SYSTEM,__guest_system,0xFBFC00000");
#endif
@ -308,12 +309,12 @@ struct AddressSpace::Impl {
constexpr int protection_flags = PROT_READ | PROT_WRITE;
constexpr int base_map_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE;
#ifdef __APPLE__
// On ARM64 Macs, we run into limitations due to the commpage from 0xFC0000000 - 0xFFFFFFFFF
// and the GPU carveout region from 0x1000000000 - 0x6FFFFFFFFF. We can allocate the system
// managed region, as well as system reserved if reduced in size slightly, but we cannot map
// the user region where we want, so we must let the OS put it wherever possible and hope
// the game won't rely on its location.
#if defined(__APPLE__) && defined(ARCH_X86_64)
// On ARM64 Macs under Rosetta 2, we run into limitations due to the commpage from
// 0xFC0000000 - 0xFFFFFFFFF and the GPU carveout region from 0x1000000000 - 0x6FFFFFFFFF.
// We can allocate the system managed region, as well as system reserved if reduced in size
// slightly, but we cannot map the user region where we want, so we must let the OS put it
// wherever possible and hope the game won't rely on its location.
system_managed_base = reinterpret_cast<u8*>(
mmap(reinterpret_cast<void*>(SYSTEM_MANAGED_MIN), system_managed_size, protection_flags,
base_map_flags | MAP_FIXED, -1, 0));
@ -325,12 +326,22 @@ struct AddressSpace::Impl {
protection_flags, base_map_flags, -1, 0));
#else
const auto virtual_size = system_managed_size + system_reserved_size + user_size;
#if defined(ARCH_X86_64)
const auto virtual_base =
reinterpret_cast<u8*>(mmap(reinterpret_cast<void*>(SYSTEM_MANAGED_MIN), virtual_size,
protection_flags, base_map_flags | MAP_FIXED, -1, 0));
system_managed_base = virtual_base;
system_reserved_base = reinterpret_cast<u8*>(SYSTEM_RESERVED_MIN);
user_base = reinterpret_cast<u8*>(USER_MIN);
#else
// Map memory wherever possible and instruction translation can handle offsetting to the
// base.
const auto virtual_base = reinterpret_cast<u8*>(
mmap(nullptr, virtual_size, protection_flags, base_map_flags, -1, 0));
system_managed_base = virtual_base;
system_reserved_base = virtual_base + SYSTEM_RESERVED_MIN - SYSTEM_MANAGED_MIN;
user_base = virtual_base + USER_MIN - SYSTEM_MANAGED_MIN;
#endif
#endif
if (system_managed_base == MAP_FAILED || system_reserved_base == MAP_FAILED ||
user_base == MAP_FAILED) {
@ -430,9 +441,11 @@ struct AddressSpace::Impl {
if (write) {
flags |= PROT_WRITE;
}
#ifdef ARCH_X86_64
if (execute) {
flags |= PROT_EXEC;
}
#endif
int ret = mprotect(reinterpret_cast<void*>(virtual_addr), size, flags);
ASSERT_MSG(ret == 0, "mprotect failed: {}", strerror(errno));
}
@ -463,8 +476,14 @@ AddressSpace::~AddressSpace() = default;
void* AddressSpace::Map(VAddr virtual_addr, size_t size, u64 alignment, PAddr phys_addr,
bool is_exec) {
return impl->Map(virtual_addr, phys_addr, size,
is_exec ? PAGE_EXECUTE_READWRITE : PAGE_READWRITE);
#if ARCH_X86_64
const auto prot = is_exec ? PAGE_EXECUTE_READWRITE : PAGE_READWRITE;
#else
// On non-native architectures, we can simplify things by ignoring the execute flag for the
// canonical copy of the memory and rely on the JIT to map translated code as executable.
constexpr auto prot = PAGE_READWRITE;
#endif
return impl->Map(virtual_addr, phys_addr, size, prot);
}
void* AddressSpace::MapFile(VAddr virtual_addr, size_t size, size_t offset, u32 prot,

View file

@ -4,6 +4,7 @@
#pragma once
#include <memory>
#include "common/arch.h"
#include "common/enum.h"
#include "common/types.h"
@ -23,7 +24,7 @@ constexpr VAddr CODE_BASE_OFFSET = 0x100000000ULL;
constexpr VAddr SYSTEM_MANAGED_MIN = 0x00000400000ULL;
constexpr VAddr SYSTEM_MANAGED_MAX = 0x07FFFFBFFFULL;
constexpr VAddr SYSTEM_RESERVED_MIN = 0x07FFFFC000ULL;
#ifdef __APPLE__
#if defined(__APPLE__) && defined(ARCH_X86_64)
// Can only comfortably reserve the first 0x7C0000000 of system reserved space.
constexpr VAddr SYSTEM_RESERVED_MAX = 0xFBFFFFFFFULL;
#else

View file

@ -6,6 +6,7 @@
#include <thread>
#include "common/alignment.h"
#include "common/arch.h"
#include "common/assert.h"
#include "common/error.h"
#include "common/logging/log.h"
@ -989,7 +990,9 @@ static void cleanup_thread(void* arg) {
static void* run_thread(void* arg) {
auto* thread = static_cast<ScePthread>(arg);
Common::SetCurrentThreadName(thread->name.c_str());
#ifdef ARCH_X86_64
Core::InitializeThreadPatchStack();
#endif
auto* linker = Common::Singleton<Core::Linker>::Instance();
linker->InitTlsForThread(false);
void* ret = nullptr;

View file

@ -2,6 +2,7 @@
// SPDX-License-Identifier: GPL-2.0-or-later
#include "common/alignment.h"
#include "common/arch.h"
#include "common/assert.h"
#include "common/config.h"
#include "common/logging/log.h"
@ -27,6 +28,7 @@ static PS4_SYSV_ABI void ProgramExitFunc() {
}
static void RunMainEntry(VAddr addr, EntryParams* params, ExitFunc exit_func) {
#ifdef ARCH_X86_64
// reinterpret_cast<entry_func_t>(addr)(params, exit_func); // can't be used, stack has to have
// a specific layout
asm volatile("andq $-16, %%rsp\n" // Align to 16 bytes
@ -46,6 +48,9 @@ static void RunMainEntry(VAddr addr, EntryParams* params, ExitFunc exit_func) {
:
: "r"(addr), "r"(params), "r"(exit_func)
: "rax", "rsi", "rdi");
#else
UNIMPLEMENTED_MSG("Missing RunMainEntry() implementation for target CPU architecture.");
#endif
}
Linker::Linker() : memory{Memory::Instance()} {}
@ -85,7 +90,9 @@ void Linker::Execute() {
// Init primary thread.
Common::SetCurrentThreadName("GAME_MainThread");
#ifdef ARCH_X86_64
InitializeThreadPatchStack();
#endif
Libraries::Kernel::pthreadInitSelfMainThread();
InitTlsForThread(true);

View file

@ -3,6 +3,7 @@
#include <xbyak/xbyak.h>
#include "common/alignment.h"
#include "common/arch.h"
#include "common/assert.h"
#include "common/logging/log.h"
#ifdef ENABLE_QT_GUI
@ -134,9 +135,11 @@ void Module::LoadModuleToMemory(u32& max_tls_index) {
LOG_INFO(Core_Linker, "segment_mode ..........: {}", segment_mode);
add_segment(elf_pheader[i]);
#ifdef ARCH_X86_64
if (elf_pheader[i].p_flags & PF_EXEC) {
PatchInstructions(segment_addr, segment_file_size, c);
}
#endif
break;
}
case PT_DYNAMIC:

View file

@ -2,23 +2,28 @@
// SPDX-License-Identifier: GPL-2.0-or-later
#include <mutex>
#include "common/arch.h"
#include "common/assert.h"
#include "common/types.h"
#include "core/tls.h"
#ifdef _WIN32
#include <windows.h>
#elif defined(__APPLE__)
#elif defined(__APPLE__) && defined(ARCH_X86_64)
#include <architecture/i386/table.h>
#include <boost/icl/interval_set.hpp>
#include <i386/user_ldt.h>
#include <sys/mman.h>
#elif !defined(ARCH_X86_64)
#include <pthread.h>
#endif
namespace Core {
#ifdef _WIN32
// Windows
static DWORD slot = 0;
static std::once_flag slot_alloc_flag;
@ -40,7 +45,9 @@ Tcb* GetTcbBase() {
return reinterpret_cast<Tcb*>(TlsGetValue(GetTcbKey()));
}
#elif defined(__APPLE__)
#elif defined(__APPLE__) && defined(ARCH_X86_64)
// Apple x86_64
// Reserve space in the 32-bit address range for allocating TCB pages.
asm(".zerofill TCB_SPACE,TCB_SPACE,__guest_system,0x3FC000");
@ -132,7 +139,9 @@ Tcb* GetTcbBase() {
return tcb;
}
#else
#elif defined(ARCH_X86_64)
// Other POSIX x86_64
void SetTcbBase(void* image_address) {
asm volatile("wrgsbase %0" ::"r"(image_address) : "memory");
@ -144,6 +153,32 @@ Tcb* GetTcbBase() {
return tcb;
}
#else
// POSIX non-x86_64
// Just sets up a simple thread-local variable to store it, then instruction translation can point
// code to it.
static pthread_key_t slot = 0;
static std::once_flag slot_alloc_flag;
static void AllocTcbKey() {
ASSERT(pthread_key_create(&slot, nullptr) == 0);
}
pthread_key_t GetTcbKey() {
std::call_once(slot_alloc_flag, &AllocTcbKey);
return slot;
}
void SetTcbBase(void* image_address) {
ASSERT(pthread_setspecific(GetTcbKey(), image_address) == 0);
}
Tcb* GetTcbBase() {
return static_cast<Tcb*>(pthread_getspecific(GetTcbKey()));
}
#endif
} // namespace Core

View file

@ -3,6 +3,7 @@
#include <thread>
#include "common/alignment.h"
#include "common/arch.h"
#include "common/assert.h"
#include "common/error.h"
#include "video_core/page_manager.h"
@ -159,6 +160,27 @@ struct PageManager::Impl {
int uffd;
};
#else
#if defined(__APPLE__)
#if defined(ARCH_X86_64)
#define IS_WRITE_ERROR(ctx) ((ctx)->uc_mcontext->__es.__err & 0x2)
#elif defined(ARCH_ARM64)
#define IS_WRITE_ERROR(ctx) ((ctx)->uc_mcontext->__es.__esr & 0x40)
#endif
#else
#if defined(ARCH_X86_64)
#define IS_WRITE_ERROR(ctx) ((ctx)->uc_mcontext.gregs[REG_ERR] & 0x2)
#endif
#endif
#ifndef IS_WRITE_ERROR
#error "Missing IS_WRITE_ERROR() implementation for target OS and CPU architecture.
#endif
struct PageManager::Impl {
Impl(Vulkan::Rasterizer* rasterizer_) {
rasterizer = rasterizer_;
@ -194,12 +216,7 @@ struct PageManager::Impl {
static void GuestFaultSignalHandler(int sig, siginfo_t* info, void* raw_context) {
ucontext_t* ctx = reinterpret_cast<ucontext_t*>(raw_context);
const VAddr address = reinterpret_cast<VAddr>(info->si_addr);
#ifdef __APPLE__
const u32 err = ctx->uc_mcontext->__es.__err;
#else
const greg_t err = ctx->uc_mcontext.gregs[REG_ERR];
#endif
if (err & 0x2) {
if (IS_WRITE_ERROR(ctx)) {
const VAddr addr_aligned = Common::AlignDown(address, PAGESIZE);
rasterizer->InvalidateMemory(addr_aligned, PAGESIZE);
} else {