cpu_patches: Patch just-in-time using signal handlers. (#852)

* cpu_patches: Patch just-in-time using illegal instruction handler.

* core: Add common signal dispatch system and use for on-demand TCB patches.

* page_manager: Re-add userfaultfd implementation.
This commit is contained in:
squidbus 2024-09-15 13:48:34 -07:00 committed by GitHub
parent b09b28c7f3
commit 75a4df53a5
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 404 additions and 143 deletions

View file

@ -439,6 +439,8 @@ set(CORE src/core/aerolib/stubs.cpp
src/core/module.cpp
src/core/module.h
src/core/platform.h
src/core/signals.cpp
src/core/signals.h
src/core/tls.cpp
src/core/tls.h
src/core/virtual_memory.cpp

View file

@ -1,12 +1,16 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <map>
#include <memory>
#include <mutex>
#include <set>
#include <Zydis/Zydis.h>
#include <xbyak/xbyak.h>
#include "common/alignment.h"
#include "common/assert.h"
#include "common/types.h"
#include "core/signals.h"
#include "core/tls.h"
#include "cpu_patches.h"
@ -534,7 +538,7 @@ static bool FilterRosetta2Only(const ZydisDecodedOperand*) {
return ret;
}
#endif // __APPLE__
#else // __APPLE__
static bool FilterTcbAccess(const ZydisDecodedOperand* operands) {
const auto& dst_op = operands[0];
@ -580,6 +584,8 @@ static void GenerateTcbAccess(const ZydisDecodedOperand* operands, Xbyak::CodeGe
#endif
}
#endif // __APPLE__
using PatchFilter = bool (*)(const ZydisDecodedOperand*);
using InstructionGenerator = void (*)(const ZydisDecodedOperand*, Xbyak::CodeGenerator&);
struct PatchInfo {
@ -615,39 +621,82 @@ static const std::unordered_map<ZydisMnemonic, PatchInfo> Patches = {
#endif
};
void PatchInstructions(u64 segment_addr, u64 segment_size, Xbyak::CodeGenerator& c) {
if (Patches.empty()) {
// Nothing to patch on this platform.
return;
static std::once_flag init_flag;
static ZydisDecoder instr_decoder;
struct PatchModule {
/// Mutex controlling access to module code regions.
std::mutex mutex{};
/// Start of the module.
u8* start;
/// End of the module.
u8* end;
/// Tracker for patched code locations.
std::set<u8*> patched;
/// Code generator for patching the module.
Xbyak::CodeGenerator patch_gen;
/// Code generator for writing trampoline patches.
Xbyak::CodeGenerator trampoline_gen;
PatchModule(u8* module_ptr, const u64 module_size, u8* trampoline_ptr,
const u64 trampoline_size)
: start(module_ptr), end(module_ptr + module_size), patch_gen(module_size, module_ptr),
trampoline_gen(trampoline_size, trampoline_ptr) {}
};
static std::map<u64, PatchModule> modules;
static PatchModule* GetModule(const void* ptr) {
auto upper_bound = modules.upper_bound(reinterpret_cast<u64>(ptr));
if (upper_bound == modules.begin()) {
return nullptr;
}
return &(std::prev(upper_bound)->second);
}
static bool TryPatch(void* code_address) {
auto* code = static_cast<u8*>(code_address);
auto* module = GetModule(code);
if (module == nullptr) {
return false;
}
std::unique_lock lock{module->mutex};
// Return early if already patched, in case multiple threads signaled at the same time.
if (std::ranges::find(module->patched, code) != module->patched.end()) {
return true;
}
ZydisDecoder instr_decoder;
ZydisDecodedInstruction instruction;
ZydisDecodedOperand operands[ZYDIS_MAX_OPERAND_COUNT];
ZydisDecoderInit(&instr_decoder, ZYDIS_MACHINE_MODE_LONG_64, ZYDIS_STACK_WIDTH_64);
u8* code = reinterpret_cast<u8*>(segment_addr);
u8* end = code + segment_size;
while (code < end) {
ZyanStatus status =
ZydisDecoderDecodeFull(&instr_decoder, code, end - code, &instruction, operands);
const auto status =
ZydisDecoderDecodeFull(&instr_decoder, code, module->end - code, &instruction, operands);
if (!ZYAN_SUCCESS(status)) {
code++;
continue;
return false;
}
if (Patches.contains(instruction.mnemonic)) {
auto patch_info = Patches.at(instruction.mnemonic);
const auto& patch_info = Patches.at(instruction.mnemonic);
if (patch_info.filter(operands)) {
auto patch_gen = Xbyak::CodeGenerator(instruction.length, code);
auto& patch_gen = module->patch_gen;
// Reset state and move to current code position.
patch_gen.reset();
patch_gen.setSize(code - patch_gen.getCode());
if (patch_info.trampoline) {
const auto trampoline_ptr = c.getCurr();
auto& trampoline_gen = module->trampoline_gen;
const auto trampoline_ptr = trampoline_gen.getCurr();
patch_info.generator(operands, c);
patch_info.generator(operands, trampoline_gen);
// Return to the following instruction at the end of the trampoline.
c.jmp(code + instruction.length);
trampoline_gen.jmp(code + instruction.length);
// Replace instruction with near jump to the trampoline.
patch_gen.jmp(trampoline_ptr, Xbyak::CodeGenerator::LabelType::T_NEAR);
@ -665,14 +714,61 @@ void PatchInstructions(u64 segment_addr, u64 segment_size, Xbyak::CodeGenerator&
// Fill remaining space with nops.
patch_gen.nop(instruction.length - patch_size);
module->patched.insert(code);
LOG_DEBUG(Core, "Patched instruction '{}' at: {}",
ZydisMnemonicGetString(instruction.mnemonic), fmt::ptr(code));
return true;
}
}
}
code += instruction.length;
return false;
}
static bool PatchesAccessViolationHandler(void* code_address, void* fault_address, bool is_write) {
return TryPatch(code_address);
}
static bool PatchesIllegalInstructionHandler(void* code_address) {
return TryPatch(code_address);
}
static void PatchesInit() {
ZydisDecoderInit(&instr_decoder, ZYDIS_MACHINE_MODE_LONG_64, ZYDIS_STACK_WIDTH_64);
if (!Patches.empty()) {
auto* signals = Signals::Instance();
// Should be called last.
constexpr auto priority = std::numeric_limits<u32>::max();
signals->RegisterAccessViolationHandler(PatchesAccessViolationHandler, priority);
signals->RegisterIllegalInstructionHandler(PatchesIllegalInstructionHandler, priority);
}
}
void RegisterPatchModule(void* module_ptr, u64 module_size, void* trampoline_area_ptr,
u64 trampoline_area_size) {
std::call_once(init_flag, PatchesInit);
const auto module_addr = reinterpret_cast<u64>(module_ptr);
modules.emplace(std::piecewise_construct, std::forward_as_tuple(module_addr),
std::forward_as_tuple(static_cast<u8*>(module_ptr), module_size,
static_cast<u8*>(trampoline_area_ptr),
trampoline_area_size));
}
void PrePatchInstructions(u64 segment_addr, u64 segment_size) {
#ifdef __APPLE__
// HACK: For some reason patching in the signal handler at the start of a page does not work
// under Rosetta 2. Patch any instructions at the start of a page ahead of time.
if (!Patches.empty()) {
auto* code_page = reinterpret_cast<u8*>(Common::AlignUp(segment_addr, 0x1000));
const auto* end_page = code_page + Common::AlignUp(segment_size, 0x1000);
while (code_page < end_page) {
TryPatch(code_page);
code_page += 0x1000;
}
}
#endif
}
} // namespace Core

View file

@ -3,10 +3,6 @@
#pragma once
namespace Xbyak {
class CodeGenerator;
}
namespace Core {
/// Initializes a stack for the current thread for use by patch implementations.
@ -15,7 +11,11 @@ void InitializeThreadPatchStack();
/// Cleans up the patch stack for the current thread.
void CleanupThreadPatchStack();
/// Patches CPU instructions that cannot run as-is on the host.
void PatchInstructions(u64 segment_addr, u64 segment_size, Xbyak::CodeGenerator& c);
/// Registers a module for patching, providing an area to generate trampoline code.
void RegisterPatchModule(void* module_ptr, u64 module_size, void* trampoline_area_ptr,
u64 trampoline_area_size);
/// Applies CPU patches that need to be done before beginning executions.
void PrePatchInstructions(u64 segment_addr, u64 segment_size);
} // namespace Core

View file

@ -1,7 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <xbyak/xbyak.h>
#include "common/alignment.h"
#include "common/arch.h"
#include "common/assert.h"
@ -92,9 +91,11 @@ void Module::LoadModuleToMemory(u32& max_tls_index) {
LoadOffset += CODE_BASE_INCR * (1 + aligned_base_size / CODE_BASE_INCR);
LOG_INFO(Core_Linker, "Loading module {} to {}", name, fmt::ptr(*out_addr));
#ifdef ARCH_X86_64
// Initialize trampoline generator.
void* trampoline_addr = std::bit_cast<void*>(base_virtual_addr + aligned_base_size);
Xbyak::CodeGenerator c(TrampolineSize, trampoline_addr);
RegisterPatchModule(*out_addr, aligned_base_size, trampoline_addr, TrampolineSize);
#endif
LOG_INFO(Core_Linker, "======== Load Module to Memory ========");
LOG_INFO(Core_Linker, "base_virtual_addr ......: {:#018x}", base_virtual_addr);
@ -135,7 +136,7 @@ void Module::LoadModuleToMemory(u32& max_tls_index) {
add_segment(elf_pheader[i]);
#ifdef ARCH_X86_64
if (elf_pheader[i].p_flags & PF_EXEC) {
PatchInstructions(segment_addr, segment_file_size, c);
PrePatchInstructions(segment_addr, segment_file_size);
}
#endif
break;

168
src/core/signals.cpp Normal file
View file

@ -0,0 +1,168 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "common/arch.h"
#include "common/assert.h"
#include "core/signals.h"
#ifdef _WIN32
#include <windows.h>
#else
#include <csignal>
#ifdef ARCH_X86_64
#include <Zydis/Decoder.h>
#include <Zydis/Formatter.h>
#endif
#endif
namespace Core {
#if defined(_WIN32)
static LONG WINAPI SignalHandler(EXCEPTION_POINTERS* pExp) noexcept {
const auto* signals = Signals::Instance();
auto* code_address = reinterpret_cast<void*>(pExp->ContextRecord->Rip);
bool handled = false;
switch (pExp->ExceptionRecord->ExceptionCode) {
case EXCEPTION_ACCESS_VIOLATION:
handled = signals->DispatchAccessViolation(
code_address, reinterpret_cast<void*>(pExp->ExceptionRecord->ExceptionInformation[1]),
pExp->ExceptionRecord->ExceptionInformation[0] == 1);
break;
case EXCEPTION_ILLEGAL_INSTRUCTION:
handled = signals->DispatchIllegalInstruction(code_address);
break;
default:
break;
}
return handled ? EXCEPTION_CONTINUE_EXECUTION : EXCEPTION_CONTINUE_SEARCH;
}
#else
#ifdef __APPLE__
#if defined(ARCH_X86_64)
#define CODE_ADDRESS(ctx) reinterpret_cast<void*>((ctx)->uc_mcontext->__ss.__rip)
#define IS_WRITE_ERROR(ctx) ((ctx)->uc_mcontext->__es.__err & 0x2)
#elif defined(ARCH_ARM64)
#define CODE_ADDRESS(ctx) reinterpret_cast<void*>((ctx)->uc_mcontext->__ss.__pc)
#define IS_WRITE_ERROR(ctx) ((ctx)->uc_mcontext->__es.__esr & 0x40)
#endif
#else
#if defined(ARCH_X86_64)
#define CODE_ADDRESS(ctx) reinterpret_cast<void*>((ctx)->uc_mcontext.gregs[REG_RIP])
#define IS_WRITE_ERROR(ctx) ((ctx)->uc_mcontext.gregs[REG_ERR] & 0x2)
#endif
#endif
#ifndef IS_WRITE_ERROR
#error "Missing IS_WRITE_ERROR() implementation for target OS and CPU architecture.
#endif
static std::string DisassembleInstruction(void* code_address) {
char buffer[256] = "<unable to decode>";
#ifdef ARCH_X86_64
ZydisDecoder decoder;
ZydisDecoderInit(&decoder, ZYDIS_MACHINE_MODE_LONG_64, ZYDIS_STACK_WIDTH_64);
ZydisDecodedInstruction instruction;
ZydisDecodedOperand operands[ZYDIS_MAX_OPERAND_COUNT];
static constexpr u64 max_length = 0x20;
const auto status =
ZydisDecoderDecodeFull(&decoder, code_address, max_length, &instruction, operands);
if (ZYAN_SUCCESS(status)) {
ZydisFormatter formatter;
ZydisFormatterInit(&formatter, ZYDIS_FORMATTER_STYLE_INTEL);
ZydisFormatterFormatInstruction(&formatter, &instruction, operands,
instruction.operand_count_visible, buffer, sizeof(buffer),
reinterpret_cast<u64>(code_address), ZYAN_NULL);
}
#endif
return buffer;
}
static void SignalHandler(int sig, siginfo_t* info, void* raw_context) {
const auto* ctx = static_cast<ucontext_t*>(raw_context);
const auto* signals = Signals::Instance();
auto* code_address = CODE_ADDRESS(ctx);
switch (sig) {
case SIGSEGV:
case SIGBUS:
if (const bool is_write = IS_WRITE_ERROR(ctx);
!signals->DispatchAccessViolation(code_address, info->si_addr, is_write)) {
UNREACHABLE_MSG("Unhandled access violation at code address {}: {} address {}",
fmt::ptr(code_address), is_write ? "Write to" : "Read from",
fmt::ptr(info->si_addr));
}
break;
case SIGILL:
if (!signals->DispatchIllegalInstruction(code_address)) {
UNREACHABLE_MSG("Unhandled illegal instruction at code address {}: {}",
fmt::ptr(code_address), DisassembleInstruction(code_address));
}
break;
default:
break;
}
}
#endif
SignalDispatch::SignalDispatch() {
#if defined(_WIN32)
ASSERT_MSG(handle = AddVectoredExceptionHandler(0, SignalHandler),
"Failed to register exception handler.");
#else
constexpr struct sigaction action {
.sa_flags = SA_SIGINFO | SA_ONSTACK, .sa_sigaction = SignalHandler, .sa_mask = 0,
};
ASSERT_MSG(sigaction(SIGSEGV, &action, nullptr) == 0 &&
sigaction(SIGBUS, &action, nullptr) == 0,
"Failed to register access violation signal handler.");
ASSERT_MSG(sigaction(SIGILL, &action, nullptr) == 0,
"Failed to register illegal instruction signal handler.");
#endif
}
SignalDispatch::~SignalDispatch() {
#if defined(_WIN32)
ASSERT_MSG(RemoveVectoredExceptionHandler(handle), "Failed to remove exception handler.");
#else
constexpr struct sigaction action {
.sa_flags = 0, .sa_handler = SIG_DFL, .sa_mask = 0,
};
ASSERT_MSG(sigaction(SIGSEGV, &action, nullptr) == 0 &&
sigaction(SIGBUS, &action, nullptr) == 0,
"Failed to remove access violation signal handler.");
ASSERT_MSG(sigaction(SIGILL, &action, nullptr) == 0,
"Failed to remove illegal instruction signal handler.");
#endif
}
bool SignalDispatch::DispatchAccessViolation(void* code_address, void* fault_address,
bool is_write) const {
for (const auto& [handler, _] : access_violation_handlers) {
if (handler(code_address, fault_address, is_write)) {
return true;
}
}
return false;
}
bool SignalDispatch::DispatchIllegalInstruction(void* code_address) const {
for (const auto& [handler, _] : illegal_instruction_handlers) {
if (handler(code_address)) {
return true;
}
}
return false;
}
} // namespace Core

56
src/core/signals.h Normal file
View file

@ -0,0 +1,56 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <set>
#include "common/singleton.h"
namespace Core {
using AccessViolationHandler = bool (*)(void* code_address, void* fault_address, bool is_write);
using IllegalInstructionHandler = bool (*)(void* code_address);
/// Receives OS signals and dispatches to the appropriate handlers.
class SignalDispatch {
public:
SignalDispatch();
~SignalDispatch();
/// Registers a handler for memory access violation signals.
void RegisterAccessViolationHandler(const AccessViolationHandler& handler, u32 priority) {
access_violation_handlers.emplace(handler, priority);
}
/// Registers a handler for illegal instruction signals.
void RegisterIllegalInstructionHandler(const IllegalInstructionHandler& handler, u32 priority) {
illegal_instruction_handlers.emplace(handler, priority);
}
/// Dispatches an access violation signal, returning whether it was successfully handled.
bool DispatchAccessViolation(void* code_address, void* fault_address, bool is_write) const;
/// Dispatches an illegal instruction signal, returning whether it was successfully handled.
bool DispatchIllegalInstruction(void* code_address) const;
private:
template <typename T>
struct HandlerEntry {
T handler;
u32 priority;
std::strong_ordering operator<=>(const HandlerEntry& right) const {
return priority <=> right.priority;
}
};
std::set<HandlerEntry<AccessViolationHandler>> access_violation_handlers;
std::set<HandlerEntry<IllegalInstructionHandler>> illegal_instruction_handlers;
#ifdef _WIN32
void* handle{};
#endif
};
using Signals = Common::Singleton<SignalDispatch>;
} // namespace Core

View file

@ -2,21 +2,21 @@
// SPDX-License-Identifier: GPL-2.0-or-later
#include <thread>
#include <boost/icl/interval_set.hpp>
#include "common/alignment.h"
#include "common/arch.h"
#include "common/assert.h"
#include "common/error.h"
#include "core/signals.h"
#include "video_core/page_manager.h"
#include "video_core/renderer_vulkan/vk_rasterizer.h"
#ifndef _WIN64
#include <fcntl.h>
#include <poll.h>
#include <signal.h>
#include <sys/ioctl.h>
#include <sys/mman.h>
#ifdef ENABLE_USERFAULTFD
#include <fcntl.h>
#include <linux/userfaultfd.h>
#include <poll.h>
#include <sys/ioctl.h>
#endif
#else
#include <windows.h>
@ -27,45 +27,7 @@ namespace VideoCore {
constexpr size_t PAGESIZE = 4_KB;
constexpr size_t PAGEBITS = 12;
#ifdef _WIN64
struct PageManager::Impl {
Impl(Vulkan::Rasterizer* rasterizer_) {
rasterizer = rasterizer_;
veh_handle = AddVectoredExceptionHandler(0, GuestFaultSignalHandler);
ASSERT_MSG(veh_handle, "Failed to register an exception handler");
}
void OnMap(VAddr address, size_t size) {}
void OnUnmap(VAddr address, size_t size) {}
void Protect(VAddr address, size_t size, bool allow_write) {
DWORD prot = allow_write ? PAGE_READWRITE : PAGE_READONLY;
DWORD old_prot{};
BOOL result = VirtualProtect(std::bit_cast<LPVOID>(address), size, prot, &old_prot);
ASSERT_MSG(result != 0, "Region protection failed");
}
static LONG WINAPI GuestFaultSignalHandler(EXCEPTION_POINTERS* pExp) noexcept {
const u32 ec = pExp->ExceptionRecord->ExceptionCode;
if (ec == EXCEPTION_ACCESS_VIOLATION) {
const auto info = pExp->ExceptionRecord->ExceptionInformation;
if (info[0] == 1) { // Write violation
const VAddr addr_aligned = Common::AlignDown(info[1], PAGESIZE);
rasterizer->InvalidateMemory(addr_aligned, PAGESIZE);
return EXCEPTION_CONTINUE_EXECUTION;
} /* else {
UNREACHABLE();
}*/
}
return EXCEPTION_CONTINUE_SEARCH; // pass further
}
inline static Vulkan::Rasterizer* rasterizer;
void* veh_handle{};
};
#elif ENABLE_USERFAULTFD
#if ENABLE_USERFAULTFD
struct PageManager::Impl {
Impl(Vulkan::Rasterizer* rasterizer_) : rasterizer{rasterizer_} {
uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
@ -160,72 +122,48 @@ struct PageManager::Impl {
int uffd;
};
#else
#if defined(__APPLE__)
#if defined(ARCH_X86_64)
#define IS_WRITE_ERROR(ctx) ((ctx)->uc_mcontext->__es.__err & 0x2)
#elif defined(ARCH_ARM64)
#define IS_WRITE_ERROR(ctx) ((ctx)->uc_mcontext->__es.__esr & 0x40)
#endif
#else
#if defined(ARCH_X86_64)
#define IS_WRITE_ERROR(ctx) ((ctx)->uc_mcontext.gregs[REG_ERR] & 0x2)
#endif
#endif
#ifndef IS_WRITE_ERROR
#error "Missing IS_WRITE_ERROR() implementation for target OS and CPU architecture.
#endif
struct PageManager::Impl {
Impl(Vulkan::Rasterizer* rasterizer_) {
rasterizer = rasterizer_;
#ifdef __APPLE__
// Read-only memory write results in SIGBUS on Apple.
static constexpr int SignalType = SIGBUS;
#else
static constexpr int SignalType = SIGSEGV;
#endif
sigset_t signal_mask;
sigemptyset(&signal_mask);
sigaddset(&signal_mask, SignalType);
using HandlerType = decltype(sigaction::sa_sigaction);
struct sigaction guest_access_fault {};
guest_access_fault.sa_flags = SA_SIGINFO | SA_ONSTACK;
guest_access_fault.sa_sigaction = &GuestFaultSignalHandler;
guest_access_fault.sa_mask = signal_mask;
sigaction(SignalType, &guest_access_fault, nullptr);
// Should be called first.
constexpr auto priority = std::numeric_limits<u32>::min();
Core::Signals::Instance()->RegisterAccessViolationHandler(GuestFaultSignalHandler,
priority);
}
void OnMap(VAddr address, size_t size) {}
void OnMap(VAddr address, size_t size) {
owned_ranges += boost::icl::interval<VAddr>::right_open(address, address + size);
}
void OnUnmap(VAddr address, size_t size) {}
void OnUnmap(VAddr address, size_t size) {
owned_ranges -= boost::icl::interval<VAddr>::right_open(address, address + size);
}
void Protect(VAddr address, size_t size, bool allow_write) {
#ifdef _WIN32
DWORD prot = allow_write ? PAGE_READWRITE : PAGE_READONLY;
DWORD old_prot{};
BOOL result = VirtualProtect(std::bit_cast<LPVOID>(address), size, prot, &old_prot);
ASSERT_MSG(result != 0, "Region protection failed");
#else
mprotect(reinterpret_cast<void*>(address), size,
PROT_READ | (allow_write ? PROT_WRITE : 0));
#endif
}
static void GuestFaultSignalHandler(int sig, siginfo_t* info, void* raw_context) {
ucontext_t* ctx = reinterpret_cast<ucontext_t*>(raw_context);
const VAddr address = reinterpret_cast<VAddr>(info->si_addr);
if (IS_WRITE_ERROR(ctx)) {
const VAddr addr_aligned = Common::AlignDown(address, PAGESIZE);
static bool GuestFaultSignalHandler(void* code_address, void* fault_address, bool is_write) {
const auto addr = reinterpret_cast<VAddr>(fault_address);
if (is_write && owned_ranges.find(addr) != owned_ranges.end()) {
const VAddr addr_aligned = Common::AlignDown(addr, PAGESIZE);
rasterizer->InvalidateMemory(addr_aligned, PAGESIZE);
} else {
// Read not supported!
UNREACHABLE();
return true;
}
return false;
}
inline static Vulkan::Rasterizer* rasterizer;
inline static boost::icl::interval_set<VAddr> owned_ranges;
};
#endif