diff --git a/src/citra_qt/debugger/graphics/graphics_tracing.cpp b/src/citra_qt/debugger/graphics/graphics_tracing.cpp
index a38d15bc5..1fb2e1810 100644
--- a/src/citra_qt/debugger/graphics/graphics_tracing.cpp
+++ b/src/citra_qt/debugger/graphics/graphics_tracing.cpp
@@ -11,7 +11,6 @@
 #include <QFileDialog>
 #include <QMessageBox>
 #include <QPushButton>
-#include <boost/range/algorithm/copy.hpp>
 #include <nihstro/float24.h>
 #include "citra_qt/debugger/graphics/graphics_tracing.h"
 #include "common/common_types.h"
@@ -88,10 +87,13 @@ void GraphicsTracingWidget::StartRecording() {
                 std::back_inserter(state.lcd_registers));
     std::copy_n((u32*)&Pica::g_state.regs, sizeof(Pica::g_state.regs) / sizeof(u32),
                 std::back_inserter(state.pica_registers));
-    boost::copy(default_attributes, std::back_inserter(state.default_attributes));
-    boost::copy(shader_binary, std::back_inserter(state.vs_program_binary));
-    boost::copy(swizzle_data, std::back_inserter(state.vs_swizzle_data));
-    boost::copy(vs_float_uniforms, std::back_inserter(state.vs_float_uniforms));
+    std::copy(default_attributes.begin(), default_attributes.end(),
+              std::back_inserter(state.default_attributes));
+    std::copy(shader_binary.begin(), shader_binary.end(),
+              std::back_inserter(state.vs_program_binary));
+    std::copy(swizzle_data.begin(), swizzle_data.end(), std::back_inserter(state.vs_swizzle_data));
+    std::copy(vs_float_uniforms.begin(), vs_float_uniforms.end(),
+              std::back_inserter(state.vs_float_uniforms));
     // boost::copy(TODO: Not implemented, std::back_inserter(state.gs_program_binary));
     // boost::copy(TODO: Not implemented, std::back_inserter(state.gs_swizzle_data));
     // boost::copy(TODO: Not implemented, std::back_inserter(state.gs_float_uniforms));
diff --git a/src/citra_qt/debugger/registers.cpp b/src/citra_qt/debugger/registers.cpp
index df065ca5c..872146cc8 100644
--- a/src/citra_qt/debugger/registers.cpp
+++ b/src/citra_qt/debugger/registers.cpp
@@ -67,7 +67,7 @@ void RegistersWidget::OnDebugModeEntered() {
     }
 
     // TODO: Handle all cores
-    const ARM_Interface& core = system.GetCore(0);
+    const auto& core = system.GetCore(0);
     for (int i = 0; i < core_registers->childCount(); ++i) {
         core_registers->child(i)->setText(
             1, QStringLiteral("0x%1").arg(core.GetReg(i), 8, 16, QLatin1Char('0')));
@@ -203,7 +203,7 @@ void RegistersWidget::CreateVFPSystemRegisterChildren() {
 
 void RegistersWidget::UpdateVFPSystemRegisterValues() {
     // TODO: handle all cores
-    const ARM_Interface& core = system.GetCore(0);
+    const auto& core = system.GetCore(0);
     const u32 fpscr_val = core.GetVFPSystemReg(VFP_FPSCR);
     const u32 fpexc_val = core.GetVFPSystemReg(VFP_FPEXC);
 
diff --git a/src/citra_qt/debugger/wait_tree.cpp b/src/citra_qt/debugger/wait_tree.cpp
index a277dc81a..1a2acece3 100644
--- a/src/citra_qt/debugger/wait_tree.cpp
+++ b/src/citra_qt/debugger/wait_tree.cpp
@@ -206,8 +206,8 @@ QString WaitTreeThread::GetText() const {
         break;
     }
     QString pc_info = tr(" PC = 0x%1 LR = 0x%2")
-                          .arg(thread.context->GetProgramCounter(), 8, 16, QLatin1Char('0'))
-                          .arg(thread.context->GetLinkRegister(), 8, 16, QLatin1Char('0'));
+                          .arg(thread.context.GetProgramCounter(), 8, 16, QLatin1Char('0'))
+                          .arg(thread.context.GetLinkRegister(), 8, 16, QLatin1Char('0'));
     return QStringLiteral("%1%2 (%3) ").arg(WaitTreeWaitObject::GetText(), pc_info, status);
 }
 
diff --git a/src/core/arm/arm_interface.h b/src/core/arm/arm_interface.h
index 97c487aba..48ad88218 100644
--- a/src/core/arm/arm_interface.h
+++ b/src/core/arm/arm_interface.h
@@ -19,6 +19,8 @@ namespace Memory {
 struct PageTable;
 };
 
+namespace Core {
+
 /// Generic ARM11 CPU interface
 class ARM_Interface : NonCopyable {
 public:
@@ -26,81 +28,44 @@ public:
         : timer(timer), id(id){};
     virtual ~ARM_Interface() {}
 
-    class ThreadContext {
-        friend class boost::serialization::access;
-
-        template <class Archive>
-        void save(Archive& ar, const unsigned int file_version) const {
-            for (std::size_t i = 0; i < 16; i++) {
-                const auto r = GetCpuRegister(i);
-                ar << r;
-            }
-            for (std::size_t i = 0; i < 64; i++) {
-                const auto r = GetFpuRegister(i);
-                ar << r;
-            }
-            const auto r1 = GetCpsr();
-            ar << r1;
-            const auto r2 = GetFpscr();
-            ar << r2;
-            const auto r3 = GetFpexc();
-            ar << r3;
-        }
-
-        template <class Archive>
-        void load(Archive& ar, const unsigned int file_version) {
-            u32 r;
-            for (std::size_t i = 0; i < 16; i++) {
-                ar >> r;
-                SetCpuRegister(i, r);
-            }
-            for (std::size_t i = 0; i < 64; i++) {
-                ar >> r;
-                SetFpuRegister(i, r);
-            }
-            ar >> r;
-            SetCpsr(r);
-            ar >> r;
-            SetFpscr(r);
-            ar >> r;
-            SetFpexc(r);
-        }
-
-        BOOST_SERIALIZATION_SPLIT_MEMBER()
-    public:
-        virtual ~ThreadContext() = default;
-
-        virtual void Reset() = 0;
-        virtual u32 GetCpuRegister(std::size_t index) const = 0;
-        virtual void SetCpuRegister(std::size_t index, u32 value) = 0;
-        virtual u32 GetCpsr() const = 0;
-        virtual void SetCpsr(u32 value) = 0;
-        virtual u32 GetFpuRegister(std::size_t index) const = 0;
-        virtual void SetFpuRegister(std::size_t index, u32 value) = 0;
-        virtual u32 GetFpscr() const = 0;
-        virtual void SetFpscr(u32 value) = 0;
-        virtual u32 GetFpexc() const = 0;
-        virtual void SetFpexc(u32 value) = 0;
-
+    struct ThreadContext {
         u32 GetStackPointer() const {
-            return GetCpuRegister(13);
+            return cpu_registers[13];
         }
         void SetStackPointer(u32 value) {
-            return SetCpuRegister(13, value);
+            cpu_registers[13] = value;
         }
 
         u32 GetLinkRegister() const {
-            return GetCpuRegister(14);
+            return cpu_registers[14];
         }
         void SetLinkRegister(u32 value) {
-            return SetCpuRegister(14, value);
+            cpu_registers[14] = value;
         }
 
         u32 GetProgramCounter() const {
-            return GetCpuRegister(15);
+            return cpu_registers[15];
         }
         void SetProgramCounter(u32 value) {
-            return SetCpuRegister(15, value);
+            cpu_registers[15] = value;
+        }
+
+        std::array<u32, 16> cpu_registers{};
+        u32 cpsr{};
+        std::array<u32, 64> fpu_registers{};
+        u32 fpscr{};
+        u32 fpexc{};
+
+    private:
+        friend class boost::serialization::access;
+
+        template <class Archive>
+        void serialize(Archive& ar, const unsigned int file_version) {
+            ar& cpu_registers;
+            ar& fpu_registers;
+            ar& cpsr;
+            ar& fpscr;
+            ar& fpexc;
         }
     };
 
@@ -132,7 +97,7 @@ public:
      */
     virtual void SetPC(u32 addr) = 0;
 
-    /*
+    /**
      * Get the current Program Counter
      * @return Returns current PC
      */
@@ -206,29 +171,21 @@ public:
      */
     virtual void SetCP15Register(CP15Register reg, u32 value) = 0;
 
-    /**
-     * Creates a CPU context
-     * @note The created context may only be used with this instance.
-     */
-    virtual std::unique_ptr<ThreadContext> NewContext() const = 0;
-
     /**
      * Saves the current CPU context
      * @param ctx Thread context to save
      */
-    virtual void SaveContext(const std::unique_ptr<ThreadContext>& ctx) = 0;
+    virtual void SaveContext(ThreadContext& ctx) = 0;
 
     /**
      * Loads a CPU context
      * @param ctx Thread context to load
      */
-    virtual void LoadContext(const std::unique_ptr<ThreadContext>& ctx) = 0;
+    virtual void LoadContext(const ThreadContext& ctx) = 0;
 
     /// Prepare core for thread reschedule (if needed to correctly handle state)
     virtual void PrepareReschedule() = 0;
 
-    virtual void PurgeState() = 0;
-
     Core::Timing::Timer& GetTimer() {
         return *timer;
     }
@@ -298,7 +255,7 @@ private:
 
     template <class Archive>
     void load(Archive& ar, const unsigned int file_version) {
-        PurgeState();
+        ClearInstructionCache();
         ar >> timer;
         ar >> id;
         std::shared_ptr<Memory::PageTable> page_table{};
@@ -344,5 +301,7 @@ private:
     BOOST_SERIALIZATION_SPLIT_MEMBER()
 };
 
-BOOST_CLASS_VERSION(ARM_Interface, 1)
-BOOST_CLASS_VERSION(ARM_Interface::ThreadContext, 1)
+} // namespace Core
+
+BOOST_CLASS_VERSION(Core::ARM_Interface, 1)
+BOOST_CLASS_VERSION(Core::ARM_Interface::ThreadContext, 1)
diff --git a/src/core/arm/dynarmic/arm_dynarmic.cpp b/src/core/arm/dynarmic/arm_dynarmic.cpp
index 4ec675251..dbf6ea71c 100644
--- a/src/core/arm/dynarmic/arm_dynarmic.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic.cpp
@@ -17,61 +17,7 @@
 #include "core/hle/kernel/svc.h"
 #include "core/memory.h"
 
-class DynarmicThreadContext final : public ARM_Interface::ThreadContext {
-public:
-    DynarmicThreadContext() {
-        Reset();
-    }
-    ~DynarmicThreadContext() override = default;
-
-    void Reset() override {
-        regs = {};
-        ext_regs = {};
-        cpsr = 0;
-        fpscr = 0;
-        fpexc = 0;
-    }
-
-    u32 GetCpuRegister(std::size_t index) const override {
-        return regs[index];
-    }
-    void SetCpuRegister(std::size_t index, u32 value) override {
-        regs[index] = value;
-    }
-    u32 GetCpsr() const override {
-        return cpsr;
-    }
-    void SetCpsr(u32 value) override {
-        cpsr = value;
-    }
-    u32 GetFpuRegister(std::size_t index) const override {
-        return ext_regs[index];
-    }
-    void SetFpuRegister(std::size_t index, u32 value) override {
-        ext_regs[index] = value;
-    }
-    u32 GetFpscr() const override {
-        return fpscr;
-    }
-    void SetFpscr(u32 value) override {
-        fpscr = value;
-    }
-    u32 GetFpexc() const override {
-        return fpexc;
-    }
-    void SetFpexc(u32 value) override {
-        fpexc = value;
-    }
-
-private:
-    friend class ARM_Dynarmic;
-
-    std::array<u32, 16> regs;
-    std::array<u32, 64> ext_regs;
-    u32 cpsr;
-    u32 fpscr;
-    u32 fpexc;
-};
+namespace Core {
 
 class DynarmicUserCallbacks final : public Dynarmic::A32::UserCallbacks {
 public:
@@ -173,10 +119,10 @@ public:
     Memory::MemorySystem& memory;
 };
 
-ARM_Dynarmic::ARM_Dynarmic(Core::System* system_, Memory::MemorySystem& memory_, u32 core_id_,
+ARM_Dynarmic::ARM_Dynarmic(Core::System& system_, Memory::MemorySystem& memory_, u32 core_id_,
                            std::shared_ptr<Core::Timing::Timer> timer_,
                            Core::ExclusiveMonitor& exclusive_monitor_)
-    : ARM_Interface(core_id_, timer_), system(*system_), memory(memory_),
+    : ARM_Interface(core_id_, timer_), system(system_), memory(memory_),
       cb(std::make_unique<DynarmicUserCallbacks>(*this)),
       exclusive_monitor{dynamic_cast<Core::DynarmicExclusiveMonitor&>(exclusive_monitor_)} {
     SetPageTable(memory.GetCurrentPageTable());
@@ -285,30 +231,20 @@ void ARM_Dynarmic::SetCP15Register(CP15Register reg, u32 value) {
     }
 }
 
-std::unique_ptr<ARM_Interface::ThreadContext> ARM_Dynarmic::NewContext() const {
-    return std::make_unique<DynarmicThreadContext>();
+void ARM_Dynarmic::SaveContext(ThreadContext& ctx) {
+    ctx.cpu_registers = jit->Regs();
+    ctx.cpsr = jit->Cpsr();
+    ctx.fpu_registers = jit->ExtRegs();
+    ctx.fpscr = jit->Fpscr();
+    ctx.fpexc = fpexc;
 }
 
-void ARM_Dynarmic::SaveContext(const std::unique_ptr<ThreadContext>& arg) {
-    DynarmicThreadContext* ctx = dynamic_cast<DynarmicThreadContext*>(arg.get());
-    ASSERT(ctx);
-
-    ctx->regs = jit->Regs();
-    ctx->ext_regs = jit->ExtRegs();
-    ctx->cpsr = jit->Cpsr();
-    ctx->fpscr = jit->Fpscr();
-    ctx->fpexc = fpexc;
-}
-
-void ARM_Dynarmic::LoadContext(const std::unique_ptr<ThreadContext>& arg) {
-    const DynarmicThreadContext* ctx = dynamic_cast<DynarmicThreadContext*>(arg.get());
-    ASSERT(ctx);
-
-    jit->Regs() = ctx->regs;
-    jit->ExtRegs() = ctx->ext_regs;
-    jit->SetCpsr(ctx->cpsr);
-    jit->SetFpscr(ctx->fpscr);
-    fpexc = ctx->fpexc;
+void ARM_Dynarmic::LoadContext(const ThreadContext& ctx) {
+    jit->Regs() = ctx.cpu_registers;
+    jit->SetCpsr(ctx.cpsr);
+    jit->ExtRegs() = ctx.fpu_registers;
+    jit->SetFpscr(ctx.fpscr);
+    fpexc = ctx.fpexc;
 }
 
 void ARM_Dynarmic::PrepareReschedule() {
@@ -337,7 +273,7 @@ std::shared_ptr<Memory::PageTable> ARM_Dynarmic::GetPageTable() const {
 
 void ARM_Dynarmic::SetPageTable(const std::shared_ptr<Memory::PageTable>& page_table) {
     current_page_table = page_table;
-    auto ctx{NewContext()};
+    ThreadContext ctx{};
     if (jit) {
         SaveContext(ctx);
     }
@@ -378,6 +314,4 @@ std::unique_ptr<Dynarmic::A32::Jit> ARM_Dynarmic::MakeJit() {
     return std::make_unique<Dynarmic::A32::Jit>(config);
 }
 
-void ARM_Dynarmic::PurgeState() {
-    ClearInstructionCache();
-}
+} // namespace Core
diff --git a/src/core/arm/dynarmic/arm_dynarmic.h b/src/core/arm/dynarmic/arm_dynarmic.h
index 45d7a4ac6..ec0c13390 100644
--- a/src/core/arm/dynarmic/arm_dynarmic.h
+++ b/src/core/arm/dynarmic/arm_dynarmic.h
@@ -17,16 +17,15 @@ class MemorySystem;
 } // namespace Memory
 
 namespace Core {
+
+class DynarmicUserCallbacks;
 class DynarmicExclusiveMonitor;
 class ExclusiveMonitor;
 class System;
-} // namespace Core
-
-class DynarmicUserCallbacks;
 
 class ARM_Dynarmic final : public ARM_Interface {
 public:
-    explicit ARM_Dynarmic(Core::System* system_, Memory::MemorySystem& memory_, u32 core_id_,
+    explicit ARM_Dynarmic(Core::System& system_, Memory::MemorySystem& memory_, u32 core_id_,
                           std::shared_ptr<Core::Timing::Timer> timer,
                           Core::ExclusiveMonitor& exclusive_monitor_);
     ~ARM_Dynarmic() override;
@@ -47,9 +46,8 @@ public:
     u32 GetCP15Register(CP15Register reg) const override;
     void SetCP15Register(CP15Register reg, u32 value) override;
 
-    std::unique_ptr<ThreadContext> NewContext() const override;
-    void SaveContext(const std::unique_ptr<ThreadContext>& arg) override;
-    void LoadContext(const std::unique_ptr<ThreadContext>& arg) override;
+    void SaveContext(ThreadContext& ctx) override;
+    void LoadContext(const ThreadContext& ctx) override;
 
     void PrepareReschedule() override;
 
@@ -57,7 +55,6 @@ public:
     void InvalidateCacheRange(u32 start_address, std::size_t length) override;
     void ClearExclusiveState() override;
     void SetPageTable(const std::shared_ptr<Memory::PageTable>& page_table) override;
-    void PurgeState() override;
 
 protected:
     std::shared_ptr<Memory::PageTable> GetPageTable() const override;
@@ -79,3 +76,5 @@ private:
     std::shared_ptr<Memory::PageTable> current_page_table = nullptr;
     std::map<std::shared_ptr<Memory::PageTable>, std::unique_ptr<Dynarmic::A32::Jit>> jits;
 };
+
+} // namespace Core
diff --git a/src/core/arm/dynarmic/arm_exclusive_monitor.h b/src/core/arm/dynarmic/arm_exclusive_monitor.h
index 05f9f391f..c6d40c301 100644
--- a/src/core/arm/dynarmic/arm_exclusive_monitor.h
+++ b/src/core/arm/dynarmic/arm_exclusive_monitor.h
@@ -32,7 +32,7 @@ public:
     bool ExclusiveWrite64(std::size_t core_index, VAddr vaddr, u64 value) override;
 
 private:
-    friend class ::ARM_Dynarmic;
+    friend class Core::ARM_Dynarmic;
     Dynarmic::ExclusiveMonitor monitor;
     Memory::MemorySystem& memory;
 };
diff --git a/src/core/arm/dyncom/arm_dyncom.cpp b/src/core/arm/dyncom/arm_dyncom.cpp
index 099a5c06a..c9bb66e38 100644
--- a/src/core/arm/dyncom/arm_dyncom.cpp
+++ b/src/core/arm/dyncom/arm_dyncom.cpp
@@ -12,73 +12,18 @@
 #include "core/core.h"
 #include "core/core_timing.h"
 
-class DynComThreadContext final : public ARM_Interface::ThreadContext {
-public:
-    DynComThreadContext() {
-        Reset();
-    }
-    ~DynComThreadContext() override = default;
+namespace Core {
 
-    void Reset() override {
-        cpu_registers = {};
-        cpsr = 0;
-        fpu_registers = {};
-        fpscr = 0;
-        fpexc = 0;
-    }
-
-    u32 GetCpuRegister(std::size_t index) const override {
-        return cpu_registers[index];
-    }
-    void SetCpuRegister(std::size_t index, u32 value) override {
-        cpu_registers[index] = value;
-    }
-    u32 GetCpsr() const override {
-        return cpsr;
-    }
-    void SetCpsr(u32 value) override {
-        cpsr = value;
-    }
-    u32 GetFpuRegister(std::size_t index) const override {
-        return fpu_registers[index];
-    }
-    void SetFpuRegister(std::size_t index, u32 value) override {
-        fpu_registers[index] = value;
-    }
-    u32 GetFpscr() const override {
-        return fpscr;
-    }
-    void SetFpscr(u32 value) override {
-        fpscr = value;
-    }
-    u32 GetFpexc() const override {
-        return fpexc;
-    }
-    void SetFpexc(u32 value) override {
-        fpexc = value;
-    }
-
-private:
-    friend class ARM_DynCom;
-
-    std::array<u32, 16> cpu_registers;
-    u32 cpsr;
-    std::array<u32, 64> fpu_registers;
-    u32 fpscr;
-    u32 fpexc;
-};
-
-ARM_DynCom::ARM_DynCom(Core::System* system, Memory::MemorySystem& memory,
+ARM_DynCom::ARM_DynCom(Core::System& system_, Memory::MemorySystem& memory,
                        PrivilegeMode initial_mode, u32 id,
                        std::shared_ptr<Core::Timing::Timer> timer)
-    : ARM_Interface(id, timer), system(system) {
+    : ARM_Interface(id, timer), system(system_) {
     state = std::make_unique<ARMul_State>(system, memory, initial_mode);
 }
 
 ARM_DynCom::~ARM_DynCom() {}
 
 void ARM_DynCom::Run() {
-    DEBUG_ASSERT(system != nullptr);
     ExecuteInstructions(std::max<s64>(timer->GetDowncount(), 0));
 }
 
@@ -103,8 +48,6 @@ std::shared_ptr<Memory::PageTable> ARM_DynCom::GetPageTable() const {
     return nullptr;
 }
 
-void ARM_DynCom::PurgeState() {}
-
 void ARM_DynCom::SetPC(u32 pc) {
     state->Reg[15] = pc;
 }
@@ -155,39 +98,31 @@ void ARM_DynCom::SetCP15Register(CP15Register reg, u32 value) {
 
 void ARM_DynCom::ExecuteInstructions(u64 num_instructions) {
     state->NumInstrsToExecute = num_instructions;
-    unsigned ticks_executed = InterpreterMainLoop(state.get());
-    if (system != nullptr) {
+    const u32 ticks_executed = InterpreterMainLoop(state.get());
+    if (timer) {
         timer->AddTicks(ticks_executed);
     }
     state->ServeBreak();
 }
 
-std::unique_ptr<ARM_Interface::ThreadContext> ARM_DynCom::NewContext() const {
-    return std::make_unique<DynComThreadContext>();
+void ARM_DynCom::SaveContext(ThreadContext& ctx) {
+    ctx.cpu_registers = state->Reg;
+    ctx.cpsr = state->Cpsr;
+    ctx.fpu_registers = state->ExtReg;
+    ctx.fpscr = state->VFP[VFP_FPSCR];
+    ctx.fpexc = state->VFP[VFP_FPEXC];
 }
 
-void ARM_DynCom::SaveContext(const std::unique_ptr<ThreadContext>& arg) {
-    DynComThreadContext* ctx = dynamic_cast<DynComThreadContext*>(arg.get());
-    ASSERT(ctx);
-
-    ctx->cpu_registers = state->Reg;
-    ctx->cpsr = state->Cpsr;
-    ctx->fpu_registers = state->ExtReg;
-    ctx->fpscr = state->VFP[VFP_FPSCR];
-    ctx->fpexc = state->VFP[VFP_FPEXC];
-}
-
-void ARM_DynCom::LoadContext(const std::unique_ptr<ThreadContext>& arg) {
-    DynComThreadContext* ctx = dynamic_cast<DynComThreadContext*>(arg.get());
-    ASSERT(ctx);
-
-    state->Reg = ctx->cpu_registers;
-    state->Cpsr = ctx->cpsr;
-    state->ExtReg = ctx->fpu_registers;
-    state->VFP[VFP_FPSCR] = ctx->fpscr;
-    state->VFP[VFP_FPEXC] = ctx->fpexc;
+void ARM_DynCom::LoadContext(const ThreadContext& ctx) {
+    state->Reg = ctx.cpu_registers;
+    state->Cpsr = ctx.cpsr;
+    state->ExtReg = ctx.fpu_registers;
+    state->VFP[VFP_FPSCR] = ctx.fpscr;
+    state->VFP[VFP_FPEXC] = ctx.fpexc;
 }
 
 void ARM_DynCom::PrepareReschedule() {
     state->NumInstrsToExecute = 0;
 }
+
+} // namespace Core
diff --git a/src/core/arm/dyncom/arm_dyncom.h b/src/core/arm/dyncom/arm_dyncom.h
index 75f25bebf..5b367ebc7 100644
--- a/src/core/arm/dyncom/arm_dyncom.h
+++ b/src/core/arm/dyncom/arm_dyncom.h
@@ -10,17 +10,17 @@
 #include "core/arm/skyeye_common/arm_regformat.h"
 #include "core/arm/skyeye_common/armstate.h"
 
-namespace Core {
-class System;
-}
-
 namespace Memory {
 class MemorySystem;
 }
 
+namespace Core {
+
+class System;
+
 class ARM_DynCom final : public ARM_Interface {
 public:
-    explicit ARM_DynCom(Core::System* system, Memory::MemorySystem& memory,
+    explicit ARM_DynCom(Core::System& system, Memory::MemorySystem& memory,
                         PrivilegeMode initial_mode, u32 id,
                         std::shared_ptr<Core::Timing::Timer> timer);
     ~ARM_DynCom() override;
@@ -45,13 +45,11 @@ public:
     u32 GetCP15Register(CP15Register reg) const override;
     void SetCP15Register(CP15Register reg, u32 value) override;
 
-    std::unique_ptr<ThreadContext> NewContext() const override;
-    void SaveContext(const std::unique_ptr<ThreadContext>& arg) override;
-    void LoadContext(const std::unique_ptr<ThreadContext>& arg) override;
+    void SaveContext(ThreadContext& ctx) override;
+    void LoadContext(const ThreadContext& ctx) override;
 
     void SetPageTable(const std::shared_ptr<Memory::PageTable>& page_table) override;
     void PrepareReschedule() override;
-    void PurgeState() override;
 
 protected:
     std::shared_ptr<Memory::PageTable> GetPageTable() const override;
@@ -59,6 +57,8 @@ protected:
 private:
     void ExecuteInstructions(u64 num_instructions);
 
-    Core::System* system;
+    Core::System& system;
     std::unique_ptr<ARMul_State> state;
 };
+
+} // namespace Core
diff --git a/src/core/arm/dyncom/arm_dyncom_interpreter.cpp b/src/core/arm/dyncom/arm_dyncom_interpreter.cpp
index 396560eaf..42c40232d 100644
--- a/src/core/arm/dyncom/arm_dyncom_interpreter.cpp
+++ b/src/core/arm/dyncom/arm_dyncom_interpreter.cpp
@@ -3866,11 +3866,11 @@ SWI_INST : {
     if (inst_base->cond == ConditionCode::AL || CondPassed(cpu, inst_base->cond)) {
         DEBUG_ASSERT(cpu->system != nullptr);
         swi_inst* const inst_cream = (swi_inst*)inst_base->component;
-        cpu->system->GetRunningCore().GetTimer().AddTicks(num_instrs);
+        cpu->system.GetRunningCore().GetTimer().AddTicks(num_instrs);
         cpu->NumInstrsToExecute =
             num_instrs >= cpu->NumInstrsToExecute ? 0 : cpu->NumInstrsToExecute - num_instrs;
         num_instrs = 0;
-        Kernel::SVCContext{*cpu->system}.CallSVC(inst_cream->num & 0xFFFF);
+        Kernel::SVCContext{cpu->system}.CallSVC(inst_cream->num & 0xFFFF);
         // The kernel would call ERET to get here, which clears exclusive memory state.
         cpu->UnsetExclusiveMemoryAddress();
     }
diff --git a/src/core/arm/skyeye_common/armstate.cpp b/src/core/arm/skyeye_common/armstate.cpp
index 4971d8c2e..9de68dc11 100644
--- a/src/core/arm/skyeye_common/armstate.cpp
+++ b/src/core/arm/skyeye_common/armstate.cpp
@@ -10,9 +10,9 @@
 #include "core/core.h"
 #include "core/memory.h"
 
-ARMul_State::ARMul_State(Core::System* system, Memory::MemorySystem& memory,
+ARMul_State::ARMul_State(Core::System& system_, Memory::MemorySystem& memory_,
                          PrivilegeMode initial_mode)
-    : system(system), memory(memory) {
+    : system{system_}, memory{memory_} {
     Reset();
     ChangePrivilegeMode(initial_mode);
 }
@@ -609,9 +609,8 @@ void ARMul_State::ServeBreak() {
         DEBUG_ASSERT(Reg[15] == last_bkpt.address);
     }
 
-    DEBUG_ASSERT(system != nullptr);
-    Kernel::Thread* thread = system->Kernel().GetCurrentThreadManager().GetCurrentThread();
-    system->GetRunningCore().SaveContext(thread->context);
+    Kernel::Thread* thread = system.Kernel().GetCurrentThreadManager().GetCurrentThread();
+    system.GetRunningCore().SaveContext(thread->context);
 
     if (last_bkpt_hit || GDBStub::IsMemoryBreak() || GDBStub::GetCpuStepFlag()) {
         last_bkpt_hit = false;
diff --git a/src/core/arm/skyeye_common/armstate.h b/src/core/arm/skyeye_common/armstate.h
index 7a04e929a..f36c46c07 100644
--- a/src/core/arm/skyeye_common/armstate.h
+++ b/src/core/arm/skyeye_common/armstate.h
@@ -147,7 +147,7 @@ enum {
 
 struct ARMul_State final {
 public:
-    explicit ARMul_State(Core::System* system, Memory::MemorySystem& memory,
+    explicit ARMul_State(Core::System& system, Memory::MemorySystem& memory,
                          PrivilegeMode initial_mode);
 
     void ChangePrivilegeMode(u32 new_mode);
@@ -206,7 +206,7 @@ public:
 
     void ServeBreak();
 
-    Core::System* system;
+    Core::System& system;
     Memory::MemorySystem& memory;
 
     std::array<u32, 16> Reg{}; // The current register file
diff --git a/src/core/core.cpp b/src/core/core.cpp
index b39c81223..ddd3f4c5b 100644
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -384,7 +384,7 @@ System::ResultStatus System::Init(Frontend::EmuWindow& emu_window,
 #if CITRA_ARCH(x86_64) || CITRA_ARCH(arm64)
         for (u32 i = 0; i < num_cores; ++i) {
             cpu_cores.push_back(std::make_shared<ARM_Dynarmic>(
-                this, *memory, i, timing->GetTimer(i), *exclusive_monitor));
+                *this, *memory, i, timing->GetTimer(i), *exclusive_monitor));
         }
 #else
         for (u32 i = 0; i < num_cores; ++i) {
@@ -396,7 +396,7 @@ System::ResultStatus System::Init(Frontend::EmuWindow& emu_window,
     } else {
         for (u32 i = 0; i < num_cores; ++i) {
             cpu_cores.push_back(
-                std::make_shared<ARM_DynCom>(this, *memory, USER32MODE, i, timing->GetTimer(i)));
+                std::make_shared<ARM_DynCom>(*this, *memory, USER32MODE, i, timing->GetTimer(i)));
         }
     }
     running_core = cpu_cores[0].get();
diff --git a/src/core/core.h b/src/core/core.h
index ae3310ca7..06f865962 100644
--- a/src/core/core.h
+++ b/src/core/core.h
@@ -14,8 +14,6 @@
 #include "core/movie.h"
 #include "core/perf_stats.h"
 
-class ARM_Interface;
-
 namespace Frontend {
 class EmuWindow;
 class ImageInterface;
@@ -69,6 +67,7 @@ class AppLoader;
 
 namespace Core {
 
+class ARM_Interface;
 class TelemetrySession;
 class ExclusiveMonitor;
 class Timing;
diff --git a/src/core/gdbstub/gdbstub.cpp b/src/core/gdbstub/gdbstub.cpp
index b1d958f8e..660168bc1 100644
--- a/src/core/gdbstub/gdbstub.cpp
+++ b/src/core/gdbstub/gdbstub.cpp
@@ -179,9 +179,9 @@ static u32 RegRead(std::size_t id, Kernel::Thread* thread = nullptr) {
     }
 
     if (id <= PC_REGISTER) {
-        return thread->context->GetCpuRegister(id);
+        return thread->context.cpu_registers[id];
     } else if (id == CPSR_REGISTER) {
-        return thread->context->GetCpsr();
+        return thread->context.cpsr;
     } else {
         return 0;
     }
@@ -193,9 +193,9 @@ static void RegWrite(std::size_t id, u32 val, Kernel::Thread* thread = nullptr)
     }
 
     if (id <= PC_REGISTER) {
-        return thread->context->SetCpuRegister(id, val);
+        thread->context.cpu_registers[id] = val;
     } else if (id == CPSR_REGISTER) {
-        return thread->context->SetCpsr(val);
+        thread->context.cpsr = val;
     }
 }
 
@@ -205,11 +205,11 @@ static u64 FpuRead(std::size_t id, Kernel::Thread* thread = nullptr) {
     }
 
     if (id >= D0_REGISTER && id < FPSCR_REGISTER) {
-        u64 ret = thread->context->GetFpuRegister(2 * (id - D0_REGISTER));
-        ret |= static_cast<u64>(thread->context->GetFpuRegister(2 * (id - D0_REGISTER) + 1)) << 32;
+        u64 ret = thread->context.fpu_registers[2 * (id - D0_REGISTER)];
+        ret |= static_cast<u64>(thread->context.fpu_registers[2 * (id - D0_REGISTER) + 1]) << 32;
         return ret;
     } else if (id == FPSCR_REGISTER) {
-        return thread->context->GetFpscr();
+        return thread->context.fpscr;
     } else {
         return 0;
     }
@@ -221,10 +221,10 @@ static void FpuWrite(std::size_t id, u64 val, Kernel::Thread* thread = nullptr)
     }
 
     if (id >= D0_REGISTER && id < FPSCR_REGISTER) {
-        thread->context->SetFpuRegister(2 * (id - D0_REGISTER), static_cast<u32>(val));
-        thread->context->SetFpuRegister(2 * (id - D0_REGISTER) + 1, static_cast<u32>(val >> 32));
+        thread->context.fpu_registers[2 * (id - D0_REGISTER)] = static_cast<u32>(val);
+        thread->context.fpu_registers[2 * (id - D0_REGISTER) + 1] = static_cast<u32>(val >> 32);
     } else if (id == FPSCR_REGISTER) {
-        return thread->context->SetFpscr(static_cast<u32>(val));
+        thread->context.fpscr = static_cast<u32>(val);
     }
 }
 
diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp
index d5cbf0fd6..5bd40d20a 100644
--- a/src/core/hle/kernel/kernel.cpp
+++ b/src/core/hle/kernel/kernel.cpp
@@ -87,15 +87,14 @@ void KernelSystem::SetCurrentMemoryPageTable(std::shared_ptr<Memory::PageTable>
     }
 }
 
-void KernelSystem::SetCPUs(std::vector<std::shared_ptr<ARM_Interface>> cpus) {
+void KernelSystem::SetCPUs(std::vector<std::shared_ptr<Core::ARM_Interface>> cpus) {
     ASSERT(cpus.size() == thread_managers.size());
-    u32 i = 0;
-    for (const auto& cpu : cpus) {
-        thread_managers[i++]->SetCPU(*cpu);
+    for (u32 i = 0; i < cpus.size(); i++) {
+        thread_managers[i]->SetCPU(*cpus[i]);
     }
 }
 
-void KernelSystem::SetRunningCPU(ARM_Interface* cpu) {
+void KernelSystem::SetRunningCPU(Core::ARM_Interface* cpu) {
     if (current_process) {
         stored_processes[current_cpu->GetID()] = current_process;
     }
diff --git a/src/core/hle/kernel/kernel.h b/src/core/hle/kernel/kernel.h
index c381e97cb..aa1aa3afd 100644
--- a/src/core/hle/kernel/kernel.h
+++ b/src/core/hle/kernel/kernel.h
@@ -30,8 +30,9 @@ class MemorySystem;
 }
 
 namespace Core {
+class ARM_Interface;
 class Timing;
-}
+} // namespace Core
 
 namespace IPCDebugger {
 class Recorder;
@@ -275,9 +276,9 @@ public:
 
     void SetCurrentMemoryPageTable(std::shared_ptr<Memory::PageTable> page_table);
 
-    void SetCPUs(std::vector<std::shared_ptr<ARM_Interface>> cpu);
+    void SetCPUs(std::vector<std::shared_ptr<Core::ARM_Interface>> cpu);
 
-    void SetRunningCPU(ARM_Interface* cpu);
+    void SetRunningCPU(Core::ARM_Interface* cpu);
 
     ThreadManager& GetThreadManager(u32 core_id);
     const ThreadManager& GetThreadManager(u32 core_id) const;
@@ -324,7 +325,7 @@ public:
     /// Map of named ports managed by the kernel, which can be retrieved using the ConnectToPort
     std::unordered_map<std::string, std::shared_ptr<ClientPort>> named_ports;
 
-    ARM_Interface* current_cpu = nullptr;
+    Core::ARM_Interface* current_cpu = nullptr;
 
     Memory::MemorySystem& memory;
 
diff --git a/src/core/hle/kernel/process.h b/src/core/hle/kernel/process.h
index 1c55cce82..a7d493f19 100644
--- a/src/core/hle/kernel/process.h
+++ b/src/core/hle/kernel/process.h
@@ -255,6 +255,7 @@ private:
     template <class Archive>
     void serialize(Archive& ar, const unsigned int file_version);
 };
+
 } // namespace Kernel
 
 BOOST_CLASS_EXPORT_KEY(Kernel::CodeSet)
diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp
index c66e5cb03..c3fafddfe 100644
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -1245,8 +1245,8 @@ ResultCode SVC::CreateThread(Handle* out_handle, u32 entry_point, u32 arg, VAddr
                    kernel.CreateThread(name, entry_point, priority, arg, processor_id, stack_top,
                                        current_process));
 
-    thread->context->SetFpscr(FPSCR_DEFAULT_NAN | FPSCR_FLUSH_TO_ZERO |
-                              FPSCR_ROUND_TOZERO); // 0x03C00000
+    thread->context.fpscr =
+        FPSCR_DEFAULT_NAN | FPSCR_FLUSH_TO_ZERO | FPSCR_ROUND_TOZERO; // 0x03C00000
 
     CASCADE_RESULT(*out_handle, current_process->handle_table.Create(std::move(thread)));
 
diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp
index fa3f88784..7059e2085 100644
--- a/src/core/hle/kernel/thread.cpp
+++ b/src/core/hle/kernel/thread.cpp
@@ -4,22 +4,16 @@
 
 #include <algorithm>
 #include <climits>
-#include <list>
-#include <vector>
 #include <boost/serialization/string.hpp>
 #include "common/archives.h"
 #include "common/assert.h"
 #include "common/common_types.h"
 #include "common/logging/log.h"
-#include "common/math_util.h"
 #include "common/serialization/boost_flat_set.h"
 #include "core/arm/arm_interface.h"
 #include "core/arm/skyeye_common/armstate.h"
-#include "core/core.h"
 #include "core/hle/kernel/errors.h"
-#include "core/hle/kernel/handle_table.h"
 #include "core/hle/kernel/kernel.h"
-#include "core/hle/kernel/memory.h"
 #include "core/hle/kernel/mutex.h"
 #include "core/hle/kernel/process.h"
 #include "core/hle/kernel/thread.h"
@@ -33,7 +27,7 @@ namespace Kernel {
 template <class Archive>
 void Thread::serialize(Archive& ar, const unsigned int file_version) {
     ar& boost::serialization::base_object<WaitObject>(*this);
-    ar&* context.get();
+    ar& context;
     ar& thread_id;
     ar& status;
     ar& entry_point;
@@ -63,9 +57,9 @@ void Thread::Acquire(Thread* thread) {
 }
 
 Thread::Thread(KernelSystem& kernel, u32 core_id)
-    : WaitObject(kernel), context(kernel.GetThreadManager(core_id).NewContext()),
-      can_schedule(true), core_id(core_id), thread_manager(kernel.GetThreadManager(core_id)) {}
-Thread::~Thread() {}
+    : WaitObject(kernel), core_id(core_id), thread_manager(kernel.GetThreadManager(core_id)) {}
+
+Thread::~Thread() = default;
 
 Thread* ThreadManager::GetCurrentThread() const {
     return current_thread.get();
@@ -318,13 +312,12 @@ void ThreadManager::DebugThreadQueue() {
  * @param entry_point Address of entry point for execution
  * @param arg User argument for thread
  */
-static void ResetThreadContext(const std::unique_ptr<ARM_Interface::ThreadContext>& context,
-                               u32 stack_top, u32 entry_point, u32 arg) {
-    context->Reset();
-    context->SetCpuRegister(0, arg);
-    context->SetProgramCounter(entry_point);
-    context->SetStackPointer(stack_top);
-    context->SetCpsr(USER32MODE | ((entry_point & 1) << 5)); // Usermode and THUMB mode
+static void ResetThreadContext(Core::ARM_Interface::ThreadContext& context, u32 stack_top,
+                               u32 entry_point, u32 arg) {
+    context.cpu_registers[0] = arg;
+    context.SetProgramCounter(entry_point);
+    context.SetStackPointer(stack_top);
+    context.cpsr = USER32MODE | ((entry_point & 1) << 5); // Usermode and THUMB mode
 }
 
 ResultVal<std::shared_ptr<Thread>> KernelSystem::CreateThread(
@@ -417,8 +410,8 @@ std::shared_ptr<Thread> SetupMainThread(KernelSystem& kernel, u32 entry_point, u
 
     std::shared_ptr<Thread> thread = std::move(thread_res).Unwrap();
 
-    thread->context->SetFpscr(FPSCR_DEFAULT_NAN | FPSCR_FLUSH_TO_ZERO | FPSCR_ROUND_TOZERO |
-                              FPSCR_IXC); // 0x03C00010
+    thread->context.fpscr =
+        FPSCR_DEFAULT_NAN | FPSCR_FLUSH_TO_ZERO | FPSCR_ROUND_TOZERO | FPSCR_IXC; // 0x03C00010
 
     // Note: The newly created thread will be run when the scheduler fires.
     return thread;
@@ -447,11 +440,11 @@ void ThreadManager::Reschedule() {
 }
 
 void Thread::SetWaitSynchronizationResult(ResultCode result) {
-    context->SetCpuRegister(0, result.raw);
+    context.cpu_registers[0] = result.raw;
 }
 
 void Thread::SetWaitSynchronizationOutput(s32 output) {
-    context->SetCpuRegister(1, output);
+    context.cpu_registers[1] = output;
 }
 
 s32 Thread::GetWaitObjectIndex(const WaitObject* object) const {
diff --git a/src/core/hle/kernel/thread.h b/src/core/hle/kernel/thread.h
index 955962695..86c25a424 100644
--- a/src/core/hle/kernel/thread.h
+++ b/src/core/hle/kernel/thread.h
@@ -121,14 +121,10 @@ public:
      */
     std::span<const std::shared_ptr<Thread>> GetThreadList();
 
-    void SetCPU(ARM_Interface& cpu_) {
+    void SetCPU(Core::ARM_Interface& cpu_) {
         cpu = &cpu_;
     }
 
-    std::unique_ptr<ARM_Interface::ThreadContext> NewContext() {
-        return cpu->NewContext();
-    }
-
 private:
     /**
      * Switches the CPU's active thread context to that of the specified thread
@@ -150,7 +146,7 @@ private:
     void ThreadWakeupCallback(u64 thread_id, s64 cycles_late);
 
     Kernel::KernelSystem& kernel;
-    ARM_Interface* cpu;
+    Core::ARM_Interface* cpu;
 
     std::shared_ptr<Thread> current_thread;
     Common::ThreadQueueList<Thread*, ThreadPrioLowest + 1> ready_queue;
@@ -271,7 +267,7 @@ public:
      */
     void Stop();
 
-    /*
+    /**
      * Returns the Thread Local Storage address of the current thread
      * @returns VAddr of the thread's TLS
      */
@@ -279,7 +275,7 @@ public:
         return tls_address;
     }
 
-    /*
+    /**
      * Returns the address of the current thread's command buffer, located in the TLS.
      * @returns VAddr of the thread's command buffer.
      */
@@ -294,11 +290,11 @@ public:
         return status == ThreadStatus::WaitSynchAll;
     }
 
-    std::unique_ptr<ARM_Interface::ThreadContext> context;
+    Core::ARM_Interface::ThreadContext context{};
 
     u32 thread_id;
 
-    bool can_schedule;
+    bool can_schedule{true};
     ThreadStatus status;
     VAddr entry_point;
     VAddr stack_top;
@@ -321,16 +317,16 @@ public:
     std::weak_ptr<Process> owner_process{}; ///< Process that owns this thread
 
     /// Objects that the thread is waiting on, in the same order as they were
-    // passed to WaitSynchronization1/N.
+    /// passed to WaitSynchronization1/N.
     std::vector<std::shared_ptr<WaitObject>> wait_objects{};
 
     VAddr wait_address; ///< If waiting on an AddressArbiter, this is the arbitration address
 
     std::string name{};
 
-    // Callback that will be invoked when the thread is resumed from a waiting state. If the thread
-    // was waiting via WaitSynchronizationN then the object will be the last object that became
-    // available. In case of a timeout, the object will be nullptr.
+    /// Callback that will be invoked when the thread is resumed from a waiting state. If the thread
+    /// was waiting via WaitSynchronizationN then the object will be the last object that became
+    /// available. In case of a timeout, the object will be nullptr.
     std::shared_ptr<WakeupCallback> wakeup_callback{};
 
     const u32 core_id;
diff --git a/src/core/hle/service/ldr_ro/cro_helper.h b/src/core/hle/service/ldr_ro/cro_helper.h
index 265b6971e..7731153ab 100644
--- a/src/core/hle/service/ldr_ro/cro_helper.h
+++ b/src/core/hle/service/ldr_ro/cro_helper.h
@@ -15,8 +15,6 @@ namespace Kernel {
 class Process;
 }
 
-class ARM_Interface;
-
 namespace Service::LDR {
 
 #define ASSERT_CRO_STRUCT(name, size)                                                              \
diff --git a/src/core/memory.h b/src/core/memory.h
index 69a499434..79e1b523a 100644
--- a/src/core/memory.h
+++ b/src/core/memory.h
@@ -12,8 +12,6 @@
 #include "common/memory_ref.h"
 #include "core/mmio.h"
 
-class ARM_Interface;
-
 namespace Kernel {
 class Process;
 }
diff --git a/src/tests/core/arm/dyncom/arm_dyncom_vfp_tests.cpp b/src/tests/core/arm/dyncom/arm_dyncom_vfp_tests.cpp
index 8b045021e..933d9b775 100644
--- a/src/tests/core/arm/dyncom/arm_dyncom_vfp_tests.cpp
+++ b/src/tests/core/arm/dyncom/arm_dyncom_vfp_tests.cpp
@@ -23,7 +23,8 @@ TEST_CASE("ARM_DynCom (vfp): vadd", "[arm_dyncom]") {
     test_env.SetMemory32(0, 0xEE321A03); // vadd.f32 s2, s4, s6
     test_env.SetMemory32(4, 0xEAFFFFFE); // b +#0
 
-    ARM_DynCom dyncom(nullptr, test_env.GetMemory(), USER32MODE, 0, nullptr);
+    Core::System system;
+    Core::ARM_DynCom dyncom(system, test_env.GetMemory(), USER32MODE, 0, nullptr);
 
     std::vector<VfpTestCase> test_cases{{
 #include "vfp_vadd_f32.inc"