From 6eb7d5bd3ee7f0b79f3fd7adbe931dccbacf7e1b Mon Sep 17 00:00:00 2001
From: Gilles Siberlin <gilles.siberlin@outlook.com>
Date: Sat, 1 Jun 2024 05:31:50 +0200
Subject: [PATCH] Implement hook insertion (#73)

* Implement function hook insertion

* Fix recompiled code indentation

* Add _matherr to renamed_funcs

* Replace after_vram by before_vram

* Emit dummy value if relocatable_sections_ordered is empty
---
 RSPRecomp/src/rsp_recomp.cpp |  26 +-
 include/recomp_port.h        |   8 +
 src/analysis.cpp             | 476 +++++++++++++++++------------------
 src/config.cpp               | 151 +++++++----
 src/main.cpp                 |  62 ++++-
 src/recompilation.cpp        |  47 ++--
 6 files changed, 440 insertions(+), 330 deletions(-)
diff --git a/RSPRecomp/src/rsp_recomp.cpp b/RSPRecomp/src/rsp_recomp.cpp
index dd8bf74..38b914a 100644
--- a/RSPRecomp/src/rsp_recomp.cpp
+++ b/RSPRecomp/src/rsp_recomp.cpp
@@ -564,25 +564,25 @@ struct RSPRecompilerConfig {
 };
 
 std::filesystem::path concat_if_not_empty(const std::filesystem::path& parent, const std::filesystem::path& child) {
-	if (!child.empty()) {
-		return parent / child;
-	}
-	return child;
+    if (!child.empty()) {
+        return parent / child;
+    }
+    return child;
 }
 
 template <typename T>
 std::vector<T> toml_to_vec(const toml::array* array) {
-	std::vector<T> ret;
+    std::vector<T> ret;
 
-	// Reserve room for all the funcs in the map.
-	ret.reserve(array->size());
+    // Reserve room for all the funcs in the map.
+    ret.reserve(array->size());
     array->for_each([&ret](auto&& el) {
         if constexpr (toml::is_integer<decltype(el)>) {
             ret.push_back(*el);
         }
     });
 
-	return ret;
+    return ret;
 }
 
 template <typename T>
@@ -601,9 +601,9 @@ std::unordered_set<T> toml_to_set(const toml::array* array) {
 bool read_config(const std::filesystem::path& config_path, RSPRecompilerConfig& out) {
     RSPRecompilerConfig ret{};
 
-	try {
+    try {
         const toml::table config_data = toml::parse_file(config_path.u8string());
-		std::filesystem::path basedir = std::filesystem::path{ config_path }.parent_path();
+        std::filesystem::path basedir = std::filesystem::path{ config_path }.parent_path();
 
         std::optional<uint32_t> text_offset = config_data["text_offset"].value<uint32_t>();
         if (text_offset.has_value()) {
@@ -653,20 +653,20 @@ bool read_config(const std::filesystem::path& config_path, RSPRecompilerConfig&
             throw toml::parse_error("Missing output_function_name in config file", config_data.source());
         }
 
-		// Extra indirect branch targets (optional)
+        // Extra indirect branch targets (optional)
         const toml::node_view branch_targets_data = config_data["extra_indirect_branch_targets"];
         if (branch_targets_data.is_array()) {
             const toml::array* branch_targets_array = branch_targets_data.as_array();
             ret.extra_indirect_branch_targets = toml_to_vec<uint32_t>(branch_targets_array);
         }
 
-		// Unsupported_instructions (optional)
+        // Unsupported_instructions (optional)
         const toml::node_view unsupported_instructions_data = config_data["unsupported_instructions"];
         if (unsupported_instructions_data.is_array()) {
             const toml::array* unsupported_instructions_array = unsupported_instructions_data.as_array();
             ret.unsupported_instructions = toml_to_set<uint32_t>(unsupported_instructions_array);
         }
-	}
+    }
     catch (const toml::parse_error& err) {
         std::cerr << "Syntax error parsing toml: " << *err.source().path << " (" << err.source().begin <<  "):\n" << err.description() << std::endl;
         return false;
diff --git a/include/recomp_port.h b/include/recomp_port.h
index a74a8b5..3828f01 100644
--- a/include/recomp_port.h
+++ b/include/recomp_port.h
@@ -40,6 +40,12 @@ namespace RecompPort {
         uint32_t value;
     };
 
+    struct FunctionHook {
+        std::string func_name;
+        int32_t before_vram;
+        std::string text;
+    };
+
     struct FunctionSize {
         std::string func_name;
         uint32_t size_bytes;
@@ -71,6 +77,7 @@ namespace RecompPort {
         std::vector<std::string> ignored_funcs;
         DeclaredFunctionMap declared_funcs;
         std::vector<InstructionPatch> instruction_patches;
+        std::vector<FunctionHook> function_hooks;
         std::vector<FunctionSize> manual_func_sizes;
         std::vector<ManualFunction> manual_functions;
         std::string bss_section_suffix;
@@ -110,6 +117,7 @@ namespace RecompPort {
         bool ignored;
         bool reimplemented;
         bool stubbed;
+        std::unordered_map<int32_t, std::string> function_hooks;
 
         Function(uint32_t vram, uint32_t rom, std::vector<uint32_t> words, std::string name, ELFIO::Elf_Half section_index, bool ignored = false, bool reimplemented = false, bool stubbed = false)
                 : vram(vram), rom(rom), words(std::move(words)), name(std::move(name)), section_index(section_index), ignored(ignored), reimplemented(reimplemented), stubbed(stubbed) {}
diff --git a/src/analysis.cpp b/src/analysis.cpp
index f98b737..5a689a0 100644
--- a/src/analysis.cpp
+++ b/src/analysis.cpp
@@ -10,271 +10,271 @@ extern "C" const char* RabbitizerRegister_getNameGpr(uint8_t regValue);
 
 // If 64-bit addressing is ever implemented, these will need to be changed to 64-bit values
 struct RegState {
-	// For tracking a register that will be used to load from RAM
-	uint32_t prev_lui;
-	uint32_t prev_addiu_vram;
-	uint32_t prev_addu_vram;
-	uint8_t prev_addend_reg;
-	bool valid_lui;
-	bool valid_addiu;
-	bool valid_addend;
-	// For tracking a register that has been loaded from RAM
-	uint32_t loaded_lw_vram;
-	uint32_t loaded_addu_vram;
-	uint32_t loaded_address;
-	uint8_t loaded_addend_reg;
-	bool valid_loaded;
+    // For tracking a register that will be used to load from RAM
+    uint32_t prev_lui;
+    uint32_t prev_addiu_vram;
+    uint32_t prev_addu_vram;
+    uint8_t prev_addend_reg;
+    bool valid_lui;
+    bool valid_addiu;
+    bool valid_addend;
+    // For tracking a register that has been loaded from RAM
+    uint32_t loaded_lw_vram;
+    uint32_t loaded_addu_vram;
+    uint32_t loaded_address;
+    uint8_t loaded_addend_reg;
+    bool valid_loaded;
 
-	RegState() = default;
+    RegState() = default;
 
-	void invalidate() {
-		prev_lui = 0;
-		prev_addiu_vram = 0;
-		prev_addu_vram = 0;
-		prev_addend_reg = 0;
+    void invalidate() {
+        prev_lui = 0;
+        prev_addiu_vram = 0;
+        prev_addu_vram = 0;
+        prev_addend_reg = 0;
 
-		valid_lui = false;
-		valid_addiu = false;
-		valid_addend = false;
+        valid_lui = false;
+        valid_addiu = false;
+        valid_addend = false;
 
-		loaded_lw_vram = 0;
-		loaded_addu_vram = 0;
-		loaded_address = 0;
-		loaded_addend_reg = 0;
+        loaded_lw_vram = 0;
+        loaded_addu_vram = 0;
+        loaded_address = 0;
+        loaded_addend_reg = 0;
 
-		valid_loaded = false;
-	}
+        valid_loaded = false;
+    }
 };
 
 using InstrId = rabbitizer::InstrId::UniqueId;
 using RegId = rabbitizer::Registers::Cpu::GprO32;
 
 bool analyze_instruction(const rabbitizer::InstructionCpu& instr, const RecompPort::Function& func, RecompPort::FunctionStats& stats,
-	RegState reg_states[32], std::vector<RegState>& stack_states) {
-	// Temporary register state for tracking the register being operated on
-	RegState temp{};
+    RegState reg_states[32], std::vector<RegState>& stack_states) {
+    // Temporary register state for tracking the register being operated on
+    RegState temp{};
 
-	int rd = (int)instr.GetO32_rd();
-	int rs = (int)instr.GetO32_rs();
-	int base = rs;
-	int rt = (int)instr.GetO32_rt();
-	int sa = (int)instr.Get_sa();
+    int rd = (int)instr.GetO32_rd();
+    int rs = (int)instr.GetO32_rs();
+    int base = rs;
+    int rt = (int)instr.GetO32_rt();
+    int sa = (int)instr.Get_sa();
 
-	uint16_t imm = instr.Get_immediate();
+    uint16_t imm = instr.Get_immediate();
 
-	auto check_move = [&]() {
-		if (rs == 0) {
-			// rs is zero so copy rt to rd
-			reg_states[rd] = reg_states[rt];
-		} else if (rt == 0) {
-			// rt is zero so copy rs to rd
-			reg_states[rd] = reg_states[rs];
-		} else {
-			// Not a move, invalidate rd
-			reg_states[rd].invalidate();
-		}
-	};
+    auto check_move = [&]() {
+        if (rs == 0) {
+            // rs is zero so copy rt to rd
+            reg_states[rd] = reg_states[rt];
+        } else if (rt == 0) {
+            // rt is zero so copy rs to rd
+            reg_states[rd] = reg_states[rs];
+        } else {
+            // Not a move, invalidate rd
+            reg_states[rd].invalidate();
+        }
+    };
 
-	switch (instr.getUniqueId()) {
-	case InstrId::cpu_lui:
-		// rt has been completely overwritten, so invalidate it
-		reg_states[rt].invalidate();
-		reg_states[rt].prev_lui = (int16_t)imm << 16;
-		reg_states[rt].valid_lui = true;
-		break;
-	case InstrId::cpu_addiu:
-		// The target reg is a copy of the source reg plus an immediate, so copy the source reg's state
-		reg_states[rt] = reg_states[rs];
-		// Set the addiu state if and only if there hasn't been an addiu already
-		if (!reg_states[rt].valid_addiu) {
-			reg_states[rt].prev_addiu_vram = (int16_t)imm;
-			reg_states[rt].valid_addiu = true;
-		} else {
-			// Otherwise, there have been 2 or more consecutive addius so invalidate the whole register
-			reg_states[rt].invalidate();
-		}
-		break;
-	case InstrId::cpu_addu:
-		// rd has been completely overwritten, so invalidate it
-		temp.invalidate();
-		// Exactly one of the two addend register states should have a valid lui at this time
-		if (reg_states[rs].valid_lui != reg_states[rt].valid_lui) {
-			// Track which of the two registers has the valid lui state and which is the addend
-			int valid_lui_reg = reg_states[rs].valid_lui ? rs : rt;
-			int addend_reg = reg_states[rs].valid_lui ? rt : rs;
+    switch (instr.getUniqueId()) {
+    case InstrId::cpu_lui:
+        // rt has been completely overwritten, so invalidate it
+        reg_states[rt].invalidate();
+        reg_states[rt].prev_lui = (int16_t)imm << 16;
+        reg_states[rt].valid_lui = true;
+        break;
+    case InstrId::cpu_addiu:
+        // The target reg is a copy of the source reg plus an immediate, so copy the source reg's state
+        reg_states[rt] = reg_states[rs];
+        // Set the addiu state if and only if there hasn't been an addiu already
+        if (!reg_states[rt].valid_addiu) {
+            reg_states[rt].prev_addiu_vram = (int16_t)imm;
+            reg_states[rt].valid_addiu = true;
+        } else {
+            // Otherwise, there have been 2 or more consecutive addius so invalidate the whole register
+            reg_states[rt].invalidate();
+        }
+        break;
+    case InstrId::cpu_addu:
+        // rd has been completely overwritten, so invalidate it
+        temp.invalidate();
+        // Exactly one of the two addend register states should have a valid lui at this time
+        if (reg_states[rs].valid_lui != reg_states[rt].valid_lui) {
+            // Track which of the two registers has the valid lui state and which is the addend
+            int valid_lui_reg = reg_states[rs].valid_lui ? rs : rt;
+            int addend_reg = reg_states[rs].valid_lui ? rt : rs;
 
-			// Copy the lui reg's state into the destination reg, then set the destination reg's addend to the other operand
-			temp = reg_states[valid_lui_reg];
-			temp.valid_addend = true;
-			temp.prev_addend_reg = addend_reg;
-			temp.prev_addu_vram = instr.getVram();
-		} else {
-			// Check if this is a move
-			check_move();
-		}
-		reg_states[rd] = temp;
-		break;
-	case InstrId::cpu_daddu:
-	case InstrId::cpu_or:
-		check_move();
-		break;
-	case InstrId::cpu_sw:
-		// If this is a store to the stack, copy the state of rt into the stack at the given offset
-		if (base == (int)RegId::GPR_O32_sp) {
-			if ((imm & 0b11) != 0) {
-				fmt::print(stderr, "Invalid alignment on offset for sw to stack: {}\n", (int16_t)imm);
-				return false;
-			}
-			if (((int16_t)imm) < 0) {
-				fmt::print(stderr, "Negative offset for sw to stack: {}\n", (int16_t)imm);
-				return false;
-			}
-			size_t stack_offset = imm / 4;
-			if (stack_offset >= stack_states.size()) {
-				stack_states.resize(stack_offset + 1);
-			}
-			stack_states[stack_offset] = reg_states[rt];
-		}
-		break;
-	case InstrId::cpu_lw:
-		// rt has been completely overwritten, so invalidate it
-		temp.invalidate();
-		// If this is a load from the stack, copy the state of the stack at the given offset to rt
-		if (base == (int)RegId::GPR_O32_sp) {
-			if ((imm & 0b11) != 0) {
-				fmt::print(stderr, "Invalid alignment on offset for lw from stack: {}\n", (int16_t)imm);
-				return false;
-			}
-			if (((int16_t)imm) < 0) {
-				fmt::print(stderr, "Negative offset for lw from stack: {}\n", (int16_t)imm);
-				return false;
-			}
-			size_t stack_offset = imm / 4;
-			if (stack_offset >= stack_states.size()) {
-				stack_states.resize(stack_offset + 1);
-			}
-			temp = stack_states[stack_offset];
-		}
-		// If the base register has a valid lui state and a valid addend before this, then this may be a load from a jump table
-		else if (reg_states[base].valid_lui && reg_states[base].valid_addend) {
-			// Exactly one of the lw and the base reg should have a valid lo16 value
-			bool nonzero_immediate = imm != 0;
-			if (nonzero_immediate != reg_states[base].valid_addiu) {
-				uint32_t lo16;
-				if (nonzero_immediate) {
-					lo16 = (int16_t)imm;
-				} else {
-					lo16 = reg_states[base].prev_addiu_vram;
-				}
+            // Copy the lui reg's state into the destination reg, then set the destination reg's addend to the other operand
+            temp = reg_states[valid_lui_reg];
+            temp.valid_addend = true;
+            temp.prev_addend_reg = addend_reg;
+            temp.prev_addu_vram = instr.getVram();
+        } else {
+            // Check if this is a move
+            check_move();
+        }
+        reg_states[rd] = temp;
+        break;
+    case InstrId::cpu_daddu:
+    case InstrId::cpu_or:
+        check_move();
+        break;
+    case InstrId::cpu_sw:
+        // If this is a store to the stack, copy the state of rt into the stack at the given offset
+        if (base == (int)RegId::GPR_O32_sp) {
+            if ((imm & 0b11) != 0) {
+                fmt::print(stderr, "Invalid alignment on offset for sw to stack: {}\n", (int16_t)imm);
+                return false;
+            }
+            if (((int16_t)imm) < 0) {
+                fmt::print(stderr, "Negative offset for sw to stack: {}\n", (int16_t)imm);
+                return false;
+            }
+            size_t stack_offset = imm / 4;
+            if (stack_offset >= stack_states.size()) {
+                stack_states.resize(stack_offset + 1);
+            }
+            stack_states[stack_offset] = reg_states[rt];
+        }
+        break;
+    case InstrId::cpu_lw:
+        // rt has been completely overwritten, so invalidate it
+        temp.invalidate();
+        // If this is a load from the stack, copy the state of the stack at the given offset to rt
+        if (base == (int)RegId::GPR_O32_sp) {
+            if ((imm & 0b11) != 0) {
+                fmt::print(stderr, "Invalid alignment on offset for lw from stack: {}\n", (int16_t)imm);
+                return false;
+            }
+            if (((int16_t)imm) < 0) {
+                fmt::print(stderr, "Negative offset for lw from stack: {}\n", (int16_t)imm);
+                return false;
+            }
+            size_t stack_offset = imm / 4;
+            if (stack_offset >= stack_states.size()) {
+                stack_states.resize(stack_offset + 1);
+            }
+            temp = stack_states[stack_offset];
+        }
+        // If the base register has a valid lui state and a valid addend before this, then this may be a load from a jump table
+        else if (reg_states[base].valid_lui && reg_states[base].valid_addend) {
+            // Exactly one of the lw and the base reg should have a valid lo16 value
+            bool nonzero_immediate = imm != 0;
+            if (nonzero_immediate != reg_states[base].valid_addiu) {
+                uint32_t lo16;
+                if (nonzero_immediate) {
+                    lo16 = (int16_t)imm;
+                } else {
+                    lo16 = reg_states[base].prev_addiu_vram;
+                }
 
-				uint32_t address = reg_states[base].prev_lui + lo16;
-				temp.valid_loaded = true;
-				temp.loaded_lw_vram = instr.getVram();
-				temp.loaded_address = address;
-				temp.loaded_addend_reg = reg_states[base].prev_addend_reg;
-				temp.loaded_addu_vram = reg_states[base].prev_addu_vram;
-			}
-		}
-		reg_states[rt] = temp;
-		break;
-	case InstrId::cpu_jr:
-		// Ignore jr $ra
-		if (rs == (int)rabbitizer::Registers::Cpu::GprO32::GPR_O32_ra) {
-			break;
-		}
-		// Check if the source reg has a valid loaded state and if so record that as a jump table
-		if (reg_states[rs].valid_loaded) {
-			stats.jump_tables.emplace_back(
-				reg_states[rs].loaded_address,
-				reg_states[rs].loaded_addend_reg,
-				0,
-				reg_states[rs].loaded_lw_vram,
-				reg_states[rs].loaded_addu_vram,
-				instr.getVram(),
-				std::vector<uint32_t>{}
-			);
-		} else if (reg_states[rs].valid_lui && reg_states[rs].valid_addiu && !reg_states[rs].valid_addend && !reg_states[rs].valid_loaded) {
-			uint32_t address = reg_states[rs].prev_addiu_vram + reg_states[rs].prev_lui;
-			stats.absolute_jumps.emplace_back(
-				address,
-				instr.getVram()
-			);
-		}
-		// Allow tail calls (TODO account for trailing nops due to bad function splits)
-		else if (instr.getVram() != func.vram + (func.words.size() - 2) * sizeof(func.words[0])) {
-			// Inconclusive analysis
-			fmt::print(stderr, "Failed to to find jump table for `jr {}` at 0x{:08X} in {}\n", RabbitizerRegister_getNameGpr(rs), instr.getVram(), func.name);
-			return false;
-		}
-		break;
-	default:
-		if (instr.modifiesRd()) {
-			reg_states[rd].invalidate();
-		}
-		if (instr.modifiesRt()) {
-			reg_states[rt].invalidate();
-		}
-		break;
-	}
-	return true;
+                uint32_t address = reg_states[base].prev_lui + lo16;
+                temp.valid_loaded = true;
+                temp.loaded_lw_vram = instr.getVram();
+                temp.loaded_address = address;
+                temp.loaded_addend_reg = reg_states[base].prev_addend_reg;
+                temp.loaded_addu_vram = reg_states[base].prev_addu_vram;
+            }
+        }
+        reg_states[rt] = temp;
+        break;
+    case InstrId::cpu_jr:
+        // Ignore jr $ra
+        if (rs == (int)rabbitizer::Registers::Cpu::GprO32::GPR_O32_ra) {
+            break;
+        }
+        // Check if the source reg has a valid loaded state and if so record that as a jump table
+        if (reg_states[rs].valid_loaded) {
+            stats.jump_tables.emplace_back(
+                reg_states[rs].loaded_address,
+                reg_states[rs].loaded_addend_reg,
+                0,
+                reg_states[rs].loaded_lw_vram,
+                reg_states[rs].loaded_addu_vram,
+                instr.getVram(),
+                std::vector<uint32_t>{}
+            );
+        } else if (reg_states[rs].valid_lui && reg_states[rs].valid_addiu && !reg_states[rs].valid_addend && !reg_states[rs].valid_loaded) {
+            uint32_t address = reg_states[rs].prev_addiu_vram + reg_states[rs].prev_lui;
+            stats.absolute_jumps.emplace_back(
+                address,
+                instr.getVram()
+            );
+        }
+        // Allow tail calls (TODO account for trailing nops due to bad function splits)
+        else if (instr.getVram() != func.vram + (func.words.size() - 2) * sizeof(func.words[0])) {
+            // Inconclusive analysis
+            fmt::print(stderr, "Failed to to find jump table for `jr {}` at 0x{:08X} in {}\n", RabbitizerRegister_getNameGpr(rs), instr.getVram(), func.name);
+            return false;
+        }
+        break;
+    default:
+        if (instr.modifiesRd()) {
+            reg_states[rd].invalidate();
+        }
+        if (instr.modifiesRt()) {
+            reg_states[rt].invalidate();
+        }
+        break;
+    }
+    return true;
 }
 
 bool RecompPort::analyze_function(const RecompPort::Context& context, const RecompPort::Function& func,
-	const std::vector<rabbitizer::InstructionCpu>& instructions, RecompPort::FunctionStats& stats) {
-	// Create a state to track each register (r0 won't be used)
-	RegState reg_states[32] {};
-	std::vector<RegState> stack_states{};
+    const std::vector<rabbitizer::InstructionCpu>& instructions, RecompPort::FunctionStats& stats) {
+    // Create a state to track each register (r0 won't be used)
+    RegState reg_states[32] {};
+    std::vector<RegState> stack_states{};
 
-	// Look for jump tables
-	// A linear search through the func won't be accurate due to not taking control flow into account, but it'll work for finding jtables
-	for (const auto& instr : instructions) {
-		if (!analyze_instruction(instr, func, stats, reg_states, stack_states)) {
-			return false;
-		}
-	}
+    // Look for jump tables
+    // A linear search through the func won't be accurate due to not taking control flow into account, but it'll work for finding jtables
+    for (const auto& instr : instructions) {
+        if (!analyze_instruction(instr, func, stats, reg_states, stack_states)) {
+            return false;
+        }
+    }
 
-	// Sort jump tables by their address
-	std::sort(stats.jump_tables.begin(), stats.jump_tables.end(),
-		[](const JumpTable& a, const JumpTable& b)
-	{
-		return a.vram < b.vram;
-	});
+    // Sort jump tables by their address
+    std::sort(stats.jump_tables.begin(), stats.jump_tables.end(),
+        [](const JumpTable& a, const JumpTable& b)
+    {
+        return a.vram < b.vram;
+    });
 
-	// Determine jump table sizes
-	for (size_t i = 0; i < stats.jump_tables.size(); i++) {
-		JumpTable& cur_jtbl = stats.jump_tables[i];
-		uint32_t end_address = (uint32_t)-1;
-		uint32_t entry_count = 0;
-		uint32_t vram = cur_jtbl.vram;
+    // Determine jump table sizes
+    for (size_t i = 0; i < stats.jump_tables.size(); i++) {
+        JumpTable& cur_jtbl = stats.jump_tables[i];
+        uint32_t end_address = (uint32_t)-1;
+        uint32_t entry_count = 0;
+        uint32_t vram = cur_jtbl.vram;
 
-		if (i < stats.jump_tables.size() - 1) {
-			end_address = stats.jump_tables[i + 1].vram;
-		}
+        if (i < stats.jump_tables.size() - 1) {
+            end_address = stats.jump_tables[i + 1].vram;
+        }
 
-		// TODO this assumes that the jump table is in the same section as the function itself
-		cur_jtbl.rom = cur_jtbl.vram + func.rom - func.vram;
+        // TODO this assumes that the jump table is in the same section as the function itself
+        cur_jtbl.rom = cur_jtbl.vram + func.rom - func.vram;
 
-		while (vram < end_address) {
-			// Retrieve the current entry of the jump table
-			// TODO same as above
-			uint32_t rom_addr = vram + func.rom - func.vram;
-			uint32_t jtbl_word = byteswap(*reinterpret_cast<const uint32_t*>(&context.rom[rom_addr]));
-			// Check if the entry is a valid address in the current function
-			if (jtbl_word < func.vram || jtbl_word > func.vram + func.words.size() * sizeof(func.words[0])) {
-				// If it's not then this is the end of the jump table
-				break;
-			}
-			cur_jtbl.entries.push_back(jtbl_word);
-			vram += 4;
-		}
+        while (vram < end_address) {
+            // Retrieve the current entry of the jump table
+            // TODO same as above
+            uint32_t rom_addr = vram + func.rom - func.vram;
+            uint32_t jtbl_word = byteswap(*reinterpret_cast<const uint32_t*>(&context.rom[rom_addr]));
+            // Check if the entry is a valid address in the current function
+            if (jtbl_word < func.vram || jtbl_word > func.vram + func.words.size() * sizeof(func.words[0])) {
+                // If it's not then this is the end of the jump table
+                break;
+            }
+            cur_jtbl.entries.push_back(jtbl_word);
+            vram += 4;
+        }
 
-		if (cur_jtbl.entries.size() == 0) {
-			fmt::print("Failed to determine size of jump table at 0x{:08X} for instruction at 0x{:08X}\n", cur_jtbl.vram, cur_jtbl.jr_vram);
-			return false;
-		}
+        if (cur_jtbl.entries.size() == 0) {
+            fmt::print("Failed to determine size of jump table at 0x{:08X} for instruction at 0x{:08X}\n", cur_jtbl.vram, cur_jtbl.jr_vram);
+            return false;
+        }
 
-		//fmt::print("Jtbl at 0x{:08X} (rom 0x{:08X}) with {} entries used by instr at 0x{:08X}\n", cur_jtbl.vram, cur_jtbl.rom, cur_jtbl.entries.size(), cur_jtbl.jr_vram);
-	}
+        //fmt::print("Jtbl at 0x{:08X} (rom 0x{:08X}) with {} entries used by instr at 0x{:08X}\n", cur_jtbl.vram, cur_jtbl.rom, cur_jtbl.entries.size(), cur_jtbl.jr_vram);
+    }
 
-	return true;
+    return true;
 }
diff --git a/src/config.cpp b/src/config.cpp
index b22f9bf..e106ae6 100644
--- a/src/config.cpp
+++ b/src/config.cpp
@@ -5,10 +5,10 @@
 #include "recomp_port.h"
 
 std::vector<RecompPort::ManualFunction> get_manual_funcs(const toml::array* manual_funcs_array) {
-	std::vector<RecompPort::ManualFunction> ret;
+    std::vector<RecompPort::ManualFunction> ret;
 
-	// Reserve room for all the funcs in the map.
-	ret.reserve(manual_funcs_array->size());
+    // Reserve room for all the funcs in the map.
+    ret.reserve(manual_funcs_array->size());
     manual_funcs_array->for_each([&ret](auto&& el) {
         if constexpr (toml::is_table<decltype(el)>) {
             std::optional<std::string> func_name = el["name"].template value<std::string>();
@@ -27,13 +27,13 @@ std::vector<RecompPort::ManualFunction> get_manual_funcs(const toml::array* manu
         }
     });
 
-	return ret;
+    return ret;
 }
 
 std::vector<std::string> get_stubbed_funcs(const toml::table* patches_data) {
-	std::vector<std::string> stubbed_funcs{};
+    std::vector<std::string> stubbed_funcs{};
 
-	// Check if the stubs array exists.
+    // Check if the stubs array exists.
     const toml::node_view stubs_data = (*patches_data)["stubs"];
 
     if (stubs_data.is_array()) {
@@ -53,13 +53,13 @@ std::vector<std::string> get_stubbed_funcs(const toml::table* patches_data) {
         });
     }
 
-	return stubbed_funcs;
+    return stubbed_funcs;
 }
 
 std::vector<std::string> get_ignored_funcs(const toml::table* patches_data) {
-	std::vector<std::string> ignored_funcs{};
+    std::vector<std::string> ignored_funcs{};
 
-	// Check if the ignored funcs array exists.
+    // Check if the ignored funcs array exists.
     const toml::node_view ignored_funcs_data = (*patches_data)["ignored"];
 
     if (ignored_funcs_data.is_array()) {
@@ -76,16 +76,16 @@ std::vector<std::string> get_ignored_funcs(const toml::table* patches_data) {
         });
     }
 
-	return ignored_funcs;
+    return ignored_funcs;
 }
 
 std::unordered_map<std::string, RecompPort::FunctionArgType> arg_type_map{
-	{"u32", RecompPort::FunctionArgType::u32},
-	{"s32", RecompPort::FunctionArgType::s32},
+    {"u32", RecompPort::FunctionArgType::u32},
+    {"s32", RecompPort::FunctionArgType::s32},
 };
 
 std::vector<RecompPort::FunctionArgType> parse_args(const toml::array* args_in) {
-	std::vector<RecompPort::FunctionArgType> ret(args_in->size());
+    std::vector<RecompPort::FunctionArgType> ret(args_in->size());
 
     args_in->for_each([&ret](auto&& el) {
         if constexpr (toml::is_string<decltype(el)>) {
@@ -104,13 +104,13 @@ std::vector<RecompPort::FunctionArgType> parse_args(const toml::array* args_in)
         }
     });
 
-	return ret;
+    return ret;
 }
 
 RecompPort::DeclaredFunctionMap get_declared_funcs(const toml::table* patches_data) {
-	RecompPort::DeclaredFunctionMap declared_funcs{};
+    RecompPort::DeclaredFunctionMap declared_funcs{};
 
-	// Check if the func array exists.
+    // Check if the func array exists.
     const toml::node_view funcs_data = (*patches_data)["func"];
 
     if (funcs_data.is_array()) {
@@ -138,13 +138,13 @@ RecompPort::DeclaredFunctionMap get_declared_funcs(const toml::table* patches_da
         });
     }
 
-	return declared_funcs;
+    return declared_funcs;
 }
 
 std::vector<RecompPort::FunctionSize> get_func_sizes(const toml::table* patches_data) {
-	std::vector<RecompPort::FunctionSize> func_sizes{};
+    std::vector<RecompPort::FunctionSize> func_sizes{};
 
-	// Check if the func size array exists.
+    // Check if the func size array exists.
     const toml::node_view funcs_data = (*patches_data)["function_sizes"];
     if (funcs_data.is_array()) {
         const toml::array* sizes_array = funcs_data.as_array();
@@ -177,13 +177,13 @@ std::vector<RecompPort::FunctionSize> get_func_sizes(const toml::table* patches_
         });
     }
 
-	return func_sizes;
+    return func_sizes;
 }
 
 std::vector<RecompPort::InstructionPatch> get_instruction_patches(const toml::table* patches_data) {
-	std::vector<RecompPort::InstructionPatch> ret;
+    std::vector<RecompPort::InstructionPatch> ret;
 
-	// Check if the instruction patch array exists.
+    // Check if the instruction patch array exists.
     const toml::node_view insn_patch_data = (*patches_data)["instruction"];
 
     if (insn_patch_data.is_array()) {
@@ -221,20 +221,64 @@ std::vector<RecompPort::InstructionPatch> get_instruction_patches(const toml::ta
         });
     }
 
-	return ret;
+    return ret;
+}
+
+std::vector<RecompPort::FunctionHook> get_function_hooks(const toml::table* patches_data) {
+    std::vector<RecompPort::FunctionHook> ret;
+
+    // Check if the function hook array exists.
+    const toml::node_view func_hook_data = (*patches_data)["hook"];
+
+    if (func_hook_data.is_array()) {
+        const toml::array* func_hook_array = func_hook_data.as_array();
+        ret.reserve(func_hook_array->size());
+
+        // Copy all the hooks into the output vector.
+        func_hook_array->for_each([&ret](auto&& el) {
+            if constexpr (toml::is_table<decltype(el)>) {
+                const toml::table& cur_hook = *el.as_table();
+
+                // Get the vram and make sure it's 4-byte aligned.
+                std::optional<uint32_t> before_vram = cur_hook["before_vram"].value<uint32_t>();
+                std::optional<std::string> func_name = cur_hook["func"].value<std::string>();
+                std::optional<std::string> text = cur_hook["text"].value<std::string>();
+
+                if (!func_name.has_value() || !text.has_value()) {
+                    throw toml::parse_error("Function hook is missing required value(s)", el.source());
+                }
+
+                if (before_vram.has_value() && before_vram.value() & 0b11) {
+                    // Not properly aligned, so throw an error (and make it look like a normal toml one).
+                    throw toml::parse_error("before_vram is not word-aligned", el.source());
+                }
+
+                ret.push_back(RecompPort::FunctionHook{
+                    .func_name = func_name.value(),
+                    .before_vram = before_vram.has_value() ? (int32_t)before_vram.value() : 0,
+                    .text = text.value(),
+                });
+            }
+            else {
+                throw toml::parse_error("Invalid function hook entry", el.source());
+            }
+        });
+    }
+
+    return ret;
 }
 
 std::filesystem::path concat_if_not_empty(const std::filesystem::path& parent, const std::filesystem::path& child) {
-	if (!child.empty()) {
-		return parent / child;
-	}
-	return child;
+    if (!child.empty()) {
+        return parent / child;
+    }
+    return child;
 }
 
 RecompPort::Config::Config(const char* path) {
-	// Start this config out as bad so that it has to finish parsing without errors to be good.
-	entrypoint = 0;
-	bad = true;
+    // Start this config out as bad so that it has to finish parsing without errors to be good.
+    entrypoint = 0;
+    bad = true;
     toml::table config_data{};
 
     try {
@@ -348,6 +392,9 @@ RecompPort::Config::Config(const char* path) {
 
             // Manual function sizes (optional)
             manual_func_sizes = get_func_sizes(table);
+
+            // Fonction hooks (optional)
+            function_hooks = get_function_hooks(table);
         }
     }
     catch (const toml::parse_error& err) {
@@ -355,34 +402,34 @@ RecompPort::Config::Config(const char* path) {
         return;
     }
 
-	// No errors occured, so mark this config file as good.
-	bad = false;
+    // No errors occured, so mark this config file as good.
+    bad = false;
 }
 
 const std::unordered_map<std::string, RecompPort::RelocType> reloc_type_name_map {
-	{ "R_MIPS_NONE", RecompPort::RelocType::R_MIPS_NONE },
-	{ "R_MIPS_16", RecompPort::RelocType::R_MIPS_16 },
-	{ "R_MIPS_32", RecompPort::RelocType::R_MIPS_32 },
-	{ "R_MIPS_REL32", RecompPort::RelocType::R_MIPS_REL32 },
-	{ "R_MIPS_26", RecompPort::RelocType::R_MIPS_26 },
-	{ "R_MIPS_HI16", RecompPort::RelocType::R_MIPS_HI16 },
-	{ "R_MIPS_LO16", RecompPort::RelocType::R_MIPS_LO16 },
-	{ "R_MIPS_GPREL16", RecompPort::RelocType::R_MIPS_GPREL16 },
+    { "R_MIPS_NONE", RecompPort::RelocType::R_MIPS_NONE },
+    { "R_MIPS_16", RecompPort::RelocType::R_MIPS_16 },
+    { "R_MIPS_32", RecompPort::RelocType::R_MIPS_32 },
+    { "R_MIPS_REL32", RecompPort::RelocType::R_MIPS_REL32 },
+    { "R_MIPS_26", RecompPort::RelocType::R_MIPS_26 },
+    { "R_MIPS_HI16", RecompPort::RelocType::R_MIPS_HI16 },
+    { "R_MIPS_LO16", RecompPort::RelocType::R_MIPS_LO16 },
+    { "R_MIPS_GPREL16", RecompPort::RelocType::R_MIPS_GPREL16 },
 };
 
 RecompPort::RelocType reloc_type_from_name(const std::string& reloc_type_name) {
-	auto find_it = reloc_type_name_map.find(reloc_type_name);
-	if (find_it != reloc_type_name_map.end()) {
-		return find_it->second;
-	}
-	return RecompPort::RelocType::R_MIPS_NONE;
+    auto find_it = reloc_type_name_map.find(reloc_type_name);
+    if (find_it != reloc_type_name_map.end()) {
+        return find_it->second;
+    }
+    return RecompPort::RelocType::R_MIPS_NONE;
 }
 
 bool RecompPort::Context::from_symbol_file(const std::filesystem::path& symbol_file_path, std::vector<uint8_t>&& rom, RecompPort::Context& out) {
-	RecompPort::Context ret{};
+    RecompPort::Context ret{};
 
-	try {
-		const toml::table config_data = toml::parse_file(symbol_file_path.u8string());
+    try {
+        const toml::table config_data = toml::parse_file(symbol_file_path.u8string());
         const toml::node_view config_sections_value = config_data["section"];
 
         if (!config_sections_value.is_array()) {
@@ -518,13 +565,13 @@ bool RecompPort::Context::from_symbol_file(const std::filesystem::path& symbol_f
                 throw toml::parse_error("Invalid section entry", el.source());
             }
         });
-	}
+    }
     catch (const toml::parse_error& err) {
         std::cerr << "Syntax error parsing toml: " << *err.source().path << " (" << err.source().begin <<  "):\n" << err.description() << std::endl;
         return false;
     }
 
-	ret.rom = std::move(rom);
-	out = std::move(ret);
-	return true;
+    ret.rom = std::move(rom);
+    out = std::move(ret);
+    return true;
 }
diff --git a/src/main.cpp b/src/main.cpp
index 00f6d9e..8faae53 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -628,6 +628,7 @@ std::unordered_set<std::string> renamed_funcs{
     "div64_64",
     "div64_32",
     "__moddi3",
+    "_matherr",
 };
 
 bool read_symbols(RecompPort::Context& context, const ELFIO::elfio& elf_file, ELFIO::section* symtab_section, uint32_t entrypoint, bool has_entrypoint, bool use_absolute_symbols) {
@@ -1499,6 +1500,41 @@ int main(int argc, char** argv) {
         func.words[instruction_index] = byteswap(patch.value);
     }
 
+    // Apply any function hooks.
+    for (const RecompPort::FunctionHook& patch : config.function_hooks) {
+        // Check if the specified function exists.
+        auto func_find = context.functions_by_name.find(patch.func_name);
+        if (func_find == context.functions_by_name.end()) {
+            // Function doesn't exist, present an error to the user instead of silently failing to stub it out.
+            // This helps prevent typos in the config file or functions renamed between versions from causing issues.
+            exit_failure(fmt::format("Function {} has a function hook but does not exist!", patch.func_name));
+        }
+
+        RecompPort::Function& func = context.functions[func_find->second];
+        int32_t func_vram = func.vram;
+
+        // Check that the function actually contains this vram address.
+        if (patch.before_vram < func_vram || patch.before_vram >= func_vram + func.words.size() * sizeof(func.words[0])) {
+            exit_failure(fmt::format("Function {} has a function hook for vram 0x{:08X} but doesn't contain that vram address!", patch.func_name, (uint32_t)patch.before_vram));
+        }
+
+        // No after_vram means this will be placed at the start of the function
+        size_t instruction_index = -1;
+
+        // Calculate the instruction index.
+        if (patch.before_vram != 0) {
+          instruction_index = (static_cast<size_t>(patch.before_vram) - func_vram) / sizeof(uint32_t);
+        }
+
+        // Check if a function hook already exits for that instruction index.
+        auto hook_find = func.function_hooks.find(instruction_index);
+        if (hook_find != func.function_hooks.end()) {
+            exit_failure(fmt::format("Function {} already has a function hook for vram 0x{:08X}!", patch.func_name, (uint32_t)patch.before_vram));
+        }
+
+        func.function_hooks[instruction_index] = patch.text;
+    }
+
     std::ofstream single_output_file;
 
     if (config.single_file_output) {
@@ -1700,18 +1736,22 @@ int main(int argc, char** argv) {
 
 
         fmt::print(overlay_file, "static int overlay_sections_by_index[] = {{\n");
-        for (const std::string& section : relocatable_sections_ordered) {
-            // Check if this is an empty overlay
-            if (section == "*") {
-                fmt::print(overlay_file, "    -1,\n");
-            }
-            else {
-                auto find_it = relocatable_section_indices.find(section);
-                if (find_it == relocatable_section_indices.end()) {
-                    fmt::print(stderr, "Failed to find written section index of relocatable section: {}\n", section);
-                    std::exit(EXIT_FAILURE);
+        if (relocatable_sections_ordered.empty()) {
+            fmt::print(overlay_file, "    -1,\n");
+        } else {
+            for (const std::string& section : relocatable_sections_ordered) {
+                // Check if this is an empty overlay
+                if (section == "*") {
+                    fmt::print(overlay_file, "    -1,\n");
+                }
+                else {
+                    auto find_it = relocatable_section_indices.find(section);
+                    if (find_it == relocatable_section_indices.end()) {
+                        fmt::print(stderr, "Failed to find written section index of relocatable section: {}\n", section);
+                        std::exit(EXIT_FAILURE);
+                    }
+                    fmt::print(overlay_file, "    {},\n", relocatable_section_indices[section]);
                 }
-                fmt::print(overlay_file, "    {},\n", relocatable_section_indices[section]);
             }
         }
         fmt::print(overlay_file, "}};\n");
diff --git a/src/recompilation.cpp b/src/recompilation.cpp
index 4b29725..e34ed3c 100644
--- a/src/recompilation.cpp
+++ b/src/recompilation.cpp
@@ -24,23 +24,33 @@ bool process_instruction(const RecompPort::Context& context, const RecompPort::C
     const auto& instr = instructions[instr_index];
     needs_link_branch = false;
     is_branch_likely = false;
+    uint32_t instr_vram = instr.getVram();
+
+    auto print_indent = [&]() {
+        fmt::print(output_file, "    ");
+    };
+
+    auto hook_find = func.function_hooks.find(instr_index);
+    if (hook_find != func.function_hooks.end()) {
+        fmt::print(output_file, "    {}\n", hook_find->second);
+        if (indent) {
+            print_indent();
+        }
+    }
 
     // Output a comment with the original instruction
     if (instr.isBranch() || instr.getUniqueId() == InstrId::cpu_j) {
-        fmt::print(output_file, "    // {}\n", instr.disassemble(0, fmt::format("L_{:08X}", (uint32_t)instr.getBranchVramGeneric())));
+        fmt::print(output_file, "    // 0x{:08X}: {}\n", instr_vram, instr.disassemble(0, fmt::format("L_{:08X}", (uint32_t)instr.getBranchVramGeneric())));
     } else if (instr.getUniqueId() == InstrId::cpu_jal) {
-        fmt::print(output_file, "    // {}\n", instr.disassemble(0, fmt::format("0x{:08X}", (uint32_t)instr.getBranchVramGeneric())));
+        fmt::print(output_file, "    // 0x{:08X}: {}\n", instr_vram, instr.disassemble(0, fmt::format("0x{:08X}", (uint32_t)instr.getBranchVramGeneric())));
     } else {
-        fmt::print(output_file, "    // {}\n", instr.disassemble(0));
+        fmt::print(output_file, "    // 0x{:08X}: {}\n", instr_vram, instr.disassemble(0));
     }
 
-    uint32_t instr_vram = instr.getVram();
-
     if (skipped_insns.contains(instr_vram)) {
         return true;
     }
 
-
     bool at_reloc = false;
     bool reloc_handled = false;
     RecompPort::RelocType reloc_type = RecompPort::RelocType::R_MIPS_NONE;
@@ -71,10 +81,6 @@ bool process_instruction(const RecompPort::Context& context, const RecompPort::C
         }
     }
 
-    auto print_indent = [&]() {
-        fmt::print(output_file, "    ");
-    };
-
     auto print_line = [&]<typename... Ts>(fmt::format_string<Ts...> fmt_str, Ts ...args) {
         print_indent();
         fmt::vprint(output_file, fmt_str, fmt::make_format_args(args...));
@@ -106,7 +112,7 @@ bool process_instruction(const RecompPort::Context& context, const RecompPort::C
         }
     };
 
-    auto print_func_call = [&](uint32_t target_func_vram, bool link_branch = true) {
+    auto print_func_call = [&](uint32_t target_func_vram, bool link_branch = true, bool indent = false) {
         const auto matching_funcs_find = context.functions_by_vram.find(target_func_vram);
         std::string jal_target_name;
         uint32_t section_vram_start = section.ram_addr;
@@ -173,7 +179,11 @@ bool process_instruction(const RecompPort::Context& context, const RecompPort::C
             }
         }
         needs_link_branch = link_branch;
-        print_unconditional_branch("{}(rdram, ctx)", jal_target_name);
+        if (indent) {
+            print_unconditional_branch("    {}(rdram, ctx)", jal_target_name);
+        } else {
+            print_unconditional_branch("{}(rdram, ctx)", jal_target_name);
+        }
         return true;
     };
 
@@ -183,9 +193,9 @@ bool process_instruction(const RecompPort::Context& context, const RecompPort::C
             if (context.functions_by_vram.find(branch_target) != context.functions_by_vram.end()) {
                 fmt::print(output_file, "{{\n    ");
                 fmt::print("Tail call in {} to 0x{:08X}\n", func.name, branch_target);
-                print_func_call(branch_target, false);
-                print_line("return");
-                fmt::print(output_file, ";\n    }}\n");
+                print_func_call(branch_target, false, true);
+                print_line("    return");
+                fmt::print(output_file, "    }}\n");
                 return;
             }
 
@@ -1103,7 +1113,7 @@ bool RecompPort::recompile_function(const RecompPort::Context& context, const Re
         // these variables shouldn't need to be preserved across function boundaries, so make them local for more efficient output
         "    uint64_t hi = 0, lo = 0, result = 0;\n"
         "    unsigned int rounding_mode = DEFAULT_ROUNDING_MODE;\n"
-        "    int c1cs = 0; \n", // cop1 conditional signal
+        "    int c1cs = 0;\n", // cop1 conditional signal
         func.name);
 
     // Skip analysis and recompilation of this function is stubbed.
@@ -1112,6 +1122,11 @@ bool RecompPort::recompile_function(const RecompPort::Context& context, const Re
         std::set<uint32_t> branch_labels;
         instructions.reserve(func.words.size());
 
+        auto hook_find = func.function_hooks.find(-1);
+        if (hook_find != func.function_hooks.end()) {
+            fmt::print(output_file, "    {}\n", hook_find->second);
+        }
+
         // First pass, disassemble each instruction and collect branch labels
         uint32_t vram = func.vram;
         for (uint32_t word : func.words) {