diff --git a/externals/dynarmic b/externals/dynarmic
index dfbd3912a..f343c5626 160000
--- a/externals/dynarmic
+++ b/externals/dynarmic
@@ -1 +1 @@
-Subproject commit dfbd3912a4b8e0d28e1a4045893a750f0107fbaa
+Subproject commit f343c56268ef3f8fbed5bbc513fbc56430a47255
diff --git a/src/core/arm/arm_interface.h b/src/core/arm/arm_interface.h
index ba528403c..436425c83 100644
--- a/src/core/arm/arm_interface.h
+++ b/src/core/arm/arm_interface.h
@@ -24,19 +24,11 @@ public:
         u32 fpexc;
     };
 
-    /**
-     * Runs the CPU for the given number of instructions
-     * @param num_instructions Number of instructions to run
-     */
-    void Run(int num_instructions) {
-        ExecuteInstructions(num_instructions);
-        this->num_instructions += num_instructions;
-    }
+    /// Runs the CPU until an event happens
+    virtual void Run() = 0;
 
     /// Step CPU by one instruction
-    void Step() {
-        Run(1);
-    }
+    virtual void Step() = 0;
 
     /// Clear all instruction cache
     virtual void ClearInstructionCache() = 0;
@@ -138,19 +130,4 @@ public:
 
     /// Prepare core for thread reschedule (if needed to correctly handle state)
     virtual void PrepareReschedule() = 0;
-
-    /// Getter for num_instructions
-    u64 GetNumInstructions() const {
-        return num_instructions;
-    }
-
-protected:
-    /**
-     * Executes the given number of instructions
-     * @param num_instructions Number of instructions to executes
-     */
-    virtual void ExecuteInstructions(int num_instructions) = 0;
-
-private:
-    u64 num_instructions = 0; ///< Number of instructions executed
 };
diff --git a/src/core/arm/dynarmic/arm_dynarmic.cpp b/src/core/arm/dynarmic/arm_dynarmic.cpp
index 0a6d5c260..1c99fc9b8 100644
--- a/src/core/arm/dynarmic/arm_dynarmic.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic.cpp
@@ -40,11 +40,20 @@ static bool IsReadOnlyMemory(u32 vaddr) {
     return false;
 }
 
+static void AddTicks(u64 ticks) {
+    CoreTiming::AddTicks(ticks);
+}
+
+static u64 GetTicksRemaining() {
+    int ticks = CoreTiming::GetDowncount();
+    return static_cast<u64>(ticks <= 0 ? 0 : ticks);
+}
+
 static Dynarmic::UserCallbacks GetUserCallbacks(
-    const std::shared_ptr<ARMul_State>& interpeter_state, Memory::PageTable* current_page_table) {
+    const std::shared_ptr<ARMul_State>& interpreter_state, Memory::PageTable* current_page_table) {
     Dynarmic::UserCallbacks user_callbacks{};
     user_callbacks.InterpreterFallback = &InterpreterFallback;
-    user_callbacks.user_arg = static_cast<void*>(interpeter_state.get());
+    user_callbacks.user_arg = static_cast<void*>(interpreter_state.get());
     user_callbacks.CallSVC = &SVC::CallSVC;
     user_callbacks.memory.IsReadOnlyMemory = &IsReadOnlyMemory;
     user_callbacks.memory.ReadCode = &Memory::Read32;
@@ -56,8 +65,10 @@ static Dynarmic::UserCallbacks GetUserCallbacks(
     user_callbacks.memory.Write16 = &Memory::Write16;
     user_callbacks.memory.Write32 = &Memory::Write32;
     user_callbacks.memory.Write64 = &Memory::Write64;
+    user_callbacks.AddTicks = &AddTicks;
+    user_callbacks.GetTicksRemaining = &GetTicksRemaining;
     user_callbacks.page_table = &current_page_table->pointers;
-    user_callbacks.coprocessors[15] = std::make_shared<DynarmicCP15>(interpeter_state);
+    user_callbacks.coprocessors[15] = std::make_shared<DynarmicCP15>(interpreter_state);
     return user_callbacks;
 }
 
@@ -66,6 +77,19 @@ ARM_Dynarmic::ARM_Dynarmic(PrivilegeMode initial_mode) {
     PageTableChanged();
 }
 
+MICROPROFILE_DEFINE(ARM_Jit, "ARM JIT", "ARM JIT", MP_RGB(255, 64, 64));
+
+void ARM_Dynarmic::Run() {
+    ASSERT(Memory::GetCurrentPageTable() == current_page_table);
+    MICROPROFILE_SCOPE(ARM_Jit);
+
+    jit->Run(GetTicksRemaining());
+}
+
+void ARM_Dynarmic::Step() {
+    InterpreterFallback(jit->Regs()[15], jit, static_cast<void*>(interpreter_state.get()));
+}
+
 void ARM_Dynarmic::SetPC(u32 pc) {
     jit->Regs()[15] = pc;
 }
@@ -124,17 +148,6 @@ void ARM_Dynarmic::SetCP15Register(CP15Register reg, u32 value) {
     interpreter_state->CP15[reg] = value;
 }
 
-MICROPROFILE_DEFINE(ARM_Jit, "ARM JIT", "ARM JIT", MP_RGB(255, 64, 64));
-
-void ARM_Dynarmic::ExecuteInstructions(int num_instructions) {
-    ASSERT(Memory::GetCurrentPageTable() == current_page_table);
-    MICROPROFILE_SCOPE(ARM_Jit);
-
-    std::size_t ticks_executed = jit->Run(static_cast<unsigned>(num_instructions));
-
-    CoreTiming::AddTicks(ticks_executed);
-}
-
 void ARM_Dynarmic::SaveContext(ARM_Interface::ThreadContext& ctx) {
     memcpy(ctx.cpu_registers, jit->Regs().data(), sizeof(ctx.cpu_registers));
     memcpy(ctx.fpu_registers, jit->ExtRegs().data(), sizeof(ctx.fpu_registers));
@@ -168,6 +181,7 @@ void ARM_Dynarmic::PrepareReschedule() {
 }
 
 void ARM_Dynarmic::ClearInstructionCache() {
+    // TODO: Clear interpreter cache when appropriate.
     for (const auto& j : jits) {
         j.second->ClearCache();
     }
diff --git a/src/core/arm/dynarmic/arm_dynarmic.h b/src/core/arm/dynarmic/arm_dynarmic.h
index 0b00158a5..ffedfbc91 100644
--- a/src/core/arm/dynarmic/arm_dynarmic.h
+++ b/src/core/arm/dynarmic/arm_dynarmic.h
@@ -19,6 +19,9 @@ class ARM_Dynarmic final : public ARM_Interface {
 public:
     ARM_Dynarmic(PrivilegeMode initial_mode);
 
+    void Run() override;
+    void Step() override;
+
     void SetPC(u32 pc) override;
     u32 GetPC() const override;
     u32 GetReg(int index) const override;
@@ -36,7 +39,6 @@ public:
     void LoadContext(const ThreadContext& ctx) override;
 
     void PrepareReschedule() override;
-    void ExecuteInstructions(int num_instructions) override;
 
     void ClearInstructionCache() override;
     void PageTableChanged() override;
diff --git a/src/core/arm/dyncom/arm_dyncom.cpp b/src/core/arm/dyncom/arm_dyncom.cpp
index 4d72aef77..7e0624e16 100644
--- a/src/core/arm/dyncom/arm_dyncom.cpp
+++ b/src/core/arm/dyncom/arm_dyncom.cpp
@@ -2,6 +2,7 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
+#include <algorithm>
 #include <cstring>
 #include <memory>
 #include "core/arm/dyncom/arm_dyncom.h"
@@ -20,6 +21,14 @@ ARM_DynCom::ARM_DynCom(PrivilegeMode initial_mode) {
 
 ARM_DynCom::~ARM_DynCom() {}
 
+void ARM_DynCom::Run() {
+    ExecuteInstructions(std::max(CoreTiming::GetDowncount(), 0));
+}
+
+void ARM_DynCom::Step() {
+    ExecuteInstructions(1);
+}
+
 void ARM_DynCom::ClearInstructionCache() {
     state->instruction_cache.clear();
     trans_cache_buf_top = 0;
@@ -79,10 +88,6 @@ void ARM_DynCom::SetCP15Register(CP15Register reg, u32 value) {
 
 void ARM_DynCom::ExecuteInstructions(int num_instructions) {
     state->NumInstrsToExecute = num_instructions;
-
-    // Dyncom only breaks on instruction dispatch. This only happens on every instruction when
-    // executing one instruction at a time. Otherwise, if a block is being executed, more
-    // instructions may actually be executed than specified.
     unsigned ticks_executed = InterpreterMainLoop(state.get());
     CoreTiming::AddTicks(ticks_executed);
 }
diff --git a/src/core/arm/dyncom/arm_dyncom.h b/src/core/arm/dyncom/arm_dyncom.h
index fc1ffed6a..3c6cc3bda 100644
--- a/src/core/arm/dyncom/arm_dyncom.h
+++ b/src/core/arm/dyncom/arm_dyncom.h
@@ -15,6 +15,9 @@ public:
     ARM_DynCom(PrivilegeMode initial_mode);
     ~ARM_DynCom();
 
+    void Run() override;
+    void Step() override;
+
     void ClearInstructionCache() override;
     void PageTableChanged() override;
 
@@ -35,8 +38,9 @@ public:
     void LoadContext(const ThreadContext& ctx) override;
 
     void PrepareReschedule() override;
-    void ExecuteInstructions(int num_instructions) override;
 
 private:
+    void ExecuteInstructions(int num_instructions);
+
     std::unique_ptr<ARMul_State> state;
 };
diff --git a/src/core/arm/dyncom/arm_dyncom_interpreter.cpp b/src/core/arm/dyncom/arm_dyncom_interpreter.cpp
index 6b20ffa87..9ef52477c 100644
--- a/src/core/arm/dyncom/arm_dyncom_interpreter.cpp
+++ b/src/core/arm/dyncom/arm_dyncom_interpreter.cpp
@@ -18,6 +18,7 @@
 #include "core/arm/skyeye_common/armstate.h"
 #include "core/arm/skyeye_common/armsupp.h"
 #include "core/arm/skyeye_common/vfp/vfp.h"
+#include "core/core_timing.h"
 #include "core/gdbstub/gdbstub.h"
 #include "core/hle/svc.h"
 #include "core/memory.h"
@@ -3858,6 +3859,10 @@ SUB_INST : {
 SWI_INST : {
     if (inst_base->cond == ConditionCode::AL || CondPassed(cpu, inst_base->cond)) {
         swi_inst* const inst_cream = (swi_inst*)inst_base->component;
+        CoreTiming::AddTicks(num_instrs);
+        cpu->NumInstrsToExecute =
+            num_instrs >= cpu->NumInstrsToExecute ? 0 : cpu->NumInstrsToExecute - num_instrs;
+        num_instrs = 0;
         SVC::CallSVC(inst_cream->num & 0xFFFF);
     }
 
diff --git a/src/core/core.cpp b/src/core/core.cpp
index d18532190..44f304b15 100644
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -27,7 +27,7 @@ namespace Core {
 
 /*static*/ System System::s_instance;
 
-System::ResultStatus System::RunLoop(int tight_loop) {
+System::ResultStatus System::RunLoop(bool tight_loop) {
     status = ResultStatus::Success;
     if (!cpu_core) {
         return ResultStatus::ErrorNotInitialized;
@@ -57,7 +57,11 @@ System::ResultStatus System::RunLoop(int tight_loop) {
         PrepareReschedule();
     } else {
         CoreTiming::Advance();
-        cpu_core->Run(tight_loop);
+        if (tight_loop) {
+            cpu_core->Run();
+        } else {
+            cpu_core->Step();
+        }
     }
 
     HW::Update();
@@ -67,7 +71,7 @@ System::ResultStatus System::RunLoop(int tight_loop) {
 }
 
 System::ResultStatus System::SingleStep() {
-    return RunLoop(1);
+    return RunLoop(false);
 }
 
 System::ResultStatus System::Load(EmuWindow* emu_window, const std::string& filepath) {
diff --git a/src/core/core.h b/src/core/core.h
index 9805cc694..3bf0b2402 100644
--- a/src/core/core.h
+++ b/src/core/core.h
@@ -50,10 +50,10 @@ public:
      * is not required to do a full dispatch with each instruction. NOTE: the number of instructions
      * requested is not guaranteed to run, as this will be interrupted preemptively if a hardware
      * update is requested (e.g. on a thread switch).
-     * @param tight_loop Number of instructions to execute.
+     * @param tight_loop If false, the CPU single-steps.
      * @return Result status, indicating whethor or not the operation succeeded.
      */
-    ResultStatus RunLoop(int tight_loop = 1000);
+    ResultStatus RunLoop(bool tight_loop = true);
 
     /**
      * Step the CPU one instruction
diff --git a/src/tests/core/arm/dyncom/arm_dyncom_vfp_tests.cpp b/src/tests/core/arm/dyncom/arm_dyncom_vfp_tests.cpp
index 83719a58e..04ac83d2b 100644
--- a/src/tests/core/arm/dyncom/arm_dyncom_vfp_tests.cpp
+++ b/src/tests/core/arm/dyncom/arm_dyncom_vfp_tests.cpp
@@ -34,7 +34,7 @@ TEST_CASE("ARM_DynCom (vfp): vadd", "[arm_dyncom]") {
         dyncom.SetVFPSystemReg(VFP_FPSCR, test_case.initial_fpscr);
         dyncom.SetVFPReg(4, test_case.a);
         dyncom.SetVFPReg(6, test_case.b);
-        dyncom.ExecuteInstructions(1);
+        dyncom.Step();
         if (dyncom.GetVFPReg(2) != test_case.result ||
             dyncom.GetVFPSystemReg(VFP_FPSCR) != test_case.final_fpscr) {
             printf("f: %x\n", test_case.initial_fpscr);