From 2300a4b6c95b34d46c48965e859d6f8afd0bbc13 Mon Sep 17 00:00:00 2001 From: Mr-Wiseguy <mrwiseguyromhacking@gmail.com> Date: Tue, 15 Nov 2022 19:55:48 -0500 Subject: [PATCH] Implemented register state tracking to identify jump tables for jr instructions --- RecompPort.vcxproj | 1 + RecompPort.vcxproj.filters | 3 + include/recomp_port.h | 15 +++ recomp.h | 2 + src/analysis.cpp | 229 +++++++++++++++++++++++++++++++++++++ src/main.cpp | 60 ++++++---- src/recompilation.cpp | 66 ++++++++++- 7 files changed, 346 insertions(+), 30 deletions(-) create mode 100644 src/analysis.cpp diff --git a/RecompPort.vcxproj b/RecompPort.vcxproj index 6978eca..132452b 100644 --- a/RecompPort.vcxproj +++ b/RecompPort.vcxproj @@ -137,6 +137,7 @@ </ProjectReference> </ItemGroup> <ItemGroup> + <ClCompile Include="src\analysis.cpp" /> <ClCompile Include="src\main.cpp" /> <ClCompile Include="src\recompilation.cpp" /> </ItemGroup> diff --git a/RecompPort.vcxproj.filters b/RecompPort.vcxproj.filters index c3edfb2..700a836 100644 --- a/RecompPort.vcxproj.filters +++ b/RecompPort.vcxproj.filters @@ -21,6 +21,9 @@ <ClCompile Include="src\recompilation.cpp"> <Filter>Source Files</Filter> </ClCompile> + <ClCompile Include="src\analysis.cpp"> + <Filter>Source Files</Filter> + </ClCompile> </ItemGroup> <ItemGroup> <ClInclude Include="lib\ELFIO\elfio\elfio.hpp"> diff --git a/include/recomp_port.h b/include/recomp_port.h index 834115d..7852650 100644 --- a/include/recomp_port.h +++ b/include/recomp_port.h @@ -19,6 +19,15 @@ constexpr uint32_t byteswap(uint32_t val) { namespace RecompPort { + struct JumpTable { + uint32_t vram; + uint32_t addend_reg; + uint32_t rom; + uint32_t lw_vram; + uint32_t jr_vram; + std::vector<uint32_t> entries; + }; + struct Function { uint32_t vram; uint32_t rom; @@ -26,11 +35,17 @@ namespace RecompPort { std::string name; }; + struct FunctionStats { + std::vector<JumpTable> jump_tables; + }; + struct Context { std::vector<RecompPort::Function> functions; std::unordered_map<uint32_t, std::vector<size_t>> functions_by_vram; + std::vector<uint8_t> rom; }; + bool analyze_function(const Context& context, const Function& function, const std::vector<rabbitizer::InstructionCpu>& instructions, FunctionStats& stats); bool recompile_function(const Context& context, const Function& func, std::string_view output_path); } diff --git a/recomp.h b/recomp.h index ef01f81..6cc002c 100644 --- a/recomp.h +++ b/recomp.h @@ -104,4 +104,6 @@ typedef struct { uint64_t hi, lo; } recomp_context; +void switch_error(const char* func, uint32_t vram, uint32_t jtbl); + #endif diff --git a/src/analysis.cpp b/src/analysis.cpp new file mode 100644 index 0000000..c3e1e47 --- /dev/null +++ b/src/analysis.cpp @@ -0,0 +1,229 @@ +#include <set> +#include <algorithm> + +#include "rabbitizer.hpp" +#include "fmt/format.h" + +#include "recomp_port.h" + +extern "C" const char* RabbitizerRegister_getNameGpr(uint8_t regValue); + +// If 64-bit addressing is ever implemented, these will need to be changed to 64-bit values +struct RegState { + // For tracking a register that will be used to load from RAM + uint32_t prev_lui; + uint32_t prev_addiu; + uint8_t prev_addend_reg; + bool valid_lui; + bool valid_addiu; + bool valid_addend; + // For tracking a register that has been loaded from RAM + uint32_t loaded_lw_addr; + uint32_t loaded_addr; + uint8_t loaded_addend_reg; + bool valid_loaded; + + RegState() = default; + + void invalidate() { + prev_lui = 0; + prev_addiu = 0; + prev_addend_reg = 0; + + valid_lui = false; + valid_addiu = false; + valid_addend = false; + + loaded_lw_addr = 0; + loaded_addr = 0; + loaded_addend_reg = 0; + + valid_loaded = false; + } +}; + +using InstrId = rabbitizer::InstrId::UniqueId; + +bool analyze_instruction(const rabbitizer::InstructionCpu& instr, const RecompPort::Function& func, RecompPort::FunctionStats& stats, + RegState reg_states[32]) { + // Temporary register state for tracking the register being operated on + RegState temp{}; + + int rd = (int)instr.GetO32_rd(); + int rs = (int)instr.GetO32_rs(); + int base = rs; + int rt = (int)instr.GetO32_rt(); + int sa = (int)instr.Get_sa(); + + uint16_t imm = instr.Get_immediate(); + + auto check_move = [&]() { + if (rs == 0) { + // rs is zero so copy rt to rd + reg_states[rd] = reg_states[rt]; + } else if (rt == 0) { + // rt is zero so copy rs to rd + reg_states[rd] = reg_states[rs]; + } else { + // Not a move, invalidate rd + reg_states[rd].invalidate(); + } + }; + + switch (instr.getUniqueId()) { + case InstrId::cpu_lui: + // rt has been completely overwritten, so invalidate it + reg_states[rt].invalidate(); + reg_states[rt].prev_lui = (int16_t)imm << 16; + reg_states[rt].valid_lui = true; + break; + case InstrId::cpu_addiu: + // The target reg is a copy of the source reg plus an immediate, so copy the source reg's state + reg_states[rt] = reg_states[rs]; + // Set the addiu state if and only if there hasn't been an addiu already + if (!reg_states[rt].valid_addiu) { + reg_states[rt].prev_addiu = (int16_t)imm; + reg_states[rt].valid_addiu = true; + } else { + // Otherwise, there have been 2 or more consecutive addius so invalidate the whole register + reg_states[rt].invalidate(); + } + break; + case InstrId::cpu_addu: + // rd has been completely overwritten, so invalidate it + temp.invalidate(); + // Exactly one of the two addend register states should have a valid lui at this time + if (reg_states[rs].valid_lui != reg_states[rt].valid_lui) { + // Track which of the two registers has the valid lui state and which is the addend + int valid_lui_reg = reg_states[rs].valid_lui ? rs : rt; + int addend_reg = reg_states[rs].valid_lui ? rt : rs; + + // Copy the lui reg's state into the destination reg, then set the destination reg's addend to the other operand + temp = reg_states[valid_lui_reg]; + temp.valid_addend = true; + temp.prev_addend_reg = addend_reg; + } else { + // Check if this is a move + check_move(); + } + reg_states[rd] = temp; + break; + case InstrId::cpu_daddu: + case InstrId::cpu_or: + check_move(); + break; + case InstrId::cpu_lw: + // rt has been completely overwritten, so invalidate it + temp.invalidate(); + // If the base register has a valid lui state and a valid addend before this, then this may be a load from a jump table + if (reg_states[base].valid_lui && reg_states[base].valid_addend) { + // Exactly one of the lw and the base reg should have a valid lo16 value + bool nonzero_immediate = imm != 0; + if (nonzero_immediate != reg_states[base].valid_addiu) { + uint32_t lo16; + if (nonzero_immediate) { + lo16 = (int16_t)imm; + } else { + lo16 = reg_states[base].prev_addiu; + } + + uint32_t address = reg_states[base].prev_lui + lo16; + temp.valid_loaded = true; + temp.loaded_lw_addr = instr.getVram(); + temp.loaded_addr = address; + temp.loaded_addend_reg = reg_states[base].prev_addend_reg; + } + } + reg_states[rt] = temp; + break; + case InstrId::cpu_jr: + // Ignore jr $ra + if (rs == (int)rabbitizer::Registers::Cpu::GprO32::GPR_O32_ra) { + break; + } + // Check if the source reg has a valid loaded state and if so record that as a jump table + if (reg_states[rs].valid_loaded) { + stats.jump_tables.emplace_back( + reg_states[rs].loaded_addr, + reg_states[rs].loaded_addend_reg, + 0, + reg_states[rs].loaded_lw_addr, + instr.getVram(), + std::vector<uint32_t>{} + ); + } else { + // Inconclusive analysis + fmt::print(stderr, "Failed to to find jump table for `jr {}` at 0x{:08X} in {}\n", RabbitizerRegister_getNameGpr(rs), instr.getVram(), func.name); + return false; + } + break; + default: + if (instr.modifiesRd()) { + reg_states[rd].invalidate(); + } + if (instr.modifiesRt()) { + reg_states[rt].invalidate(); + } + break; + } + return true; +} + +bool RecompPort::analyze_function(const RecompPort::Context& context, const RecompPort::Function& func, + const std::vector<rabbitizer::InstructionCpu>& instructions, RecompPort::FunctionStats& stats) { + // Create a state to track each register (r0 won't be used) + RegState reg_states[32] {}; + + // Look for jump tables + // A linear search through the func won't be accurate due to not taking control flow into account, but it'll work for finding jtables + for (const auto& instr : instructions) { + if (!analyze_instruction(instr, func, stats, reg_states)) { + return false; + } + } + + // Sort jump tables by their address + std::sort(stats.jump_tables.begin(), stats.jump_tables.end(), + [](const JumpTable& a, const JumpTable& b) + { + return a.vram < b.vram; + }); + + // Determine jump table sizes + for (size_t i = 0; i < stats.jump_tables.size(); i++) { + JumpTable& cur_jtbl = stats.jump_tables[i]; + uint32_t end_address = (uint32_t)-1; + uint32_t entry_count = 0; + uint32_t vram = cur_jtbl.vram; + + if (i < stats.jump_tables.size() - 1) { + end_address = stats.jump_tables[i + 1].vram; + } + + // TODO this assumes that the jump table is in the same section as the function itself + cur_jtbl.rom = cur_jtbl.vram + func.rom - func.vram; + + while (vram < end_address) { + // Retrieve the current entry of the jump table + // TODO same as above + uint32_t rom_addr = vram + func.rom - func.vram; + uint32_t jtbl_word = byteswap(*reinterpret_cast<const uint32_t*>(&context.rom[rom_addr])); + // Check if the entry is a valid address in the current function + if (jtbl_word < func.vram || jtbl_word > func.vram + func.words.size_bytes()) { + // If it's not then this is the end of the jump table + break; + } + cur_jtbl.entries.push_back(jtbl_word); + vram += 4; + } + + if (cur_jtbl.entries.size() == 0) { + fmt::print("Failed to determine size of jump table at 0x{:08X} for instruction at 0x{:08X}\n", cur_jtbl.vram, cur_jtbl.jr_vram); + return false; + } + + //fmt::print("Jtbl at 0x{:08X} (rom 0x{:08X}) with {} entries used by instr at 0x{:08X}\n", cur_jtbl.vram, cur_jtbl.rom, cur_jtbl.entries.size(), cur_jtbl.jr_vram); + } + + return true; +} diff --git a/src/main.cpp b/src/main.cpp index 7e2eda9..9d759c8 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -221,7 +221,7 @@ std::unordered_set<std::string> ignored_funcs { "__osSetConfig", "__osGetConfig", "__osSetWatchLo", - "__osGetWatchLo", + "__osGetWatchLo" }; int main(int argc, char** argv) { @@ -254,21 +254,25 @@ int main(int argc, char** argv) { // Pointer to the symbol table section ELFIO::section* symtab_section = nullptr; - // Size of the ROM as determined by the elf - ELFIO::Elf_Xword rom_size = 0; // ROM address of each section std::vector<ELFIO::Elf_Xword> section_rom_addrs{}; + + RecompPort::Context context{}; section_rom_addrs.resize(elf_file.sections.size()); + context.functions.reserve(1024); + context.rom.reserve(8 * 1024 * 1024); // Iterate over every section to record rom addresses and find the symbol table fmt::print("Sections\n"); for (const std::unique_ptr<ELFIO::section>& section : elf_file.sections) { - fmt::print(" {}: {} @ 0x{:08X}, 0x{:08X}\n", section->get_index(), section->get_name(), section->get_address(), rom_size); + //fmt::print(" {}: {} @ 0x{:08X}, 0x{:08X}\n", section->get_index(), section->get_name(), section->get_address(), context.rom.size()); // Set the rom address of this section to the current accumulated ROM size - section_rom_addrs[section->get_index()] = rom_size; - // If this section isn't bss (SHT_NOBITS) and ends up in the rom (SHF_ALLOC), increase the rom size by this section's size + section_rom_addrs[section->get_index()] = context.rom.size(); + // If this section isn't bss (SHT_NOBITS) and ends up in the rom (SHF_ALLOC), copy this section into the rom if (section->get_type() != ELFIO::SHT_NOBITS && section->get_flags() & ELFIO::SHF_ALLOC) { - rom_size += section->get_size(); + size_t cur_rom_size = context.rom.size(); + context.rom.resize(context.rom.size() + section->get_size()); + std::copy(section->get_data(), section->get_data() + section->get_size(), &context.rom[cur_rom_size]); } // Check if this section is the symbol table and record it if so if (section->get_type() == ELFIO::SHT_SYMTAB) { @@ -278,16 +282,13 @@ int main(int argc, char** argv) { // If no symbol table was found then exit if (symtab_section == nullptr) { - exit_failure("No symbol section found\n"); + exit_failure("No symbol table section found\n"); } ELFIO::symbol_section_accessor symbols{ elf_file, symtab_section }; fmt::print("Num symbols: {}\n", symbols.get_symbols_num()); - RecompPort::Context context{}; - context.functions.reserve(1024); - for (int sym_index = 0; sym_index < symbols.get_symbols_num(); sym_index++) { std::string name; ELFIO::Elf64_Addr value; @@ -303,19 +304,30 @@ int main(int argc, char** argv) { // Check if this symbol is a function or has no type (like a regular glabel would) // Symbols with no type have a dummy entry created so that their symbol can be looked up for function calls - if (type == ELFIO::STT_FUNC || (type == ELFIO::STT_NOTYPE && section_index < section_rom_addrs.size())) { - auto section_rom_addr = section_rom_addrs[section_index]; - auto section_offset = value - elf_file.sections[section_index]->get_address(); - const uint32_t* words = reinterpret_cast<const uint32_t*>(elf_file.sections[section_index]->get_data() + section_offset); - uint32_t vram = static_cast<uint32_t>(value); - uint32_t num_instructions = type == ELFIO::STT_FUNC ? size / 4 : 0; - context.functions_by_vram[vram].push_back(context.functions.size()); - context.functions.emplace_back( - vram, - static_cast<uint32_t>(section_offset + section_rom_addr), - std::span{ words, num_instructions }, - std::move(name) - ); + if (type == ELFIO::STT_FUNC || type == ELFIO::STT_NOTYPE) { + if (section_index < section_rom_addrs.size()) { + auto section_rom_addr = section_rom_addrs[section_index]; + auto section_offset = value - elf_file.sections[section_index]->get_address(); + const uint32_t* words = reinterpret_cast<const uint32_t*>(elf_file.sections[section_index]->get_data() + section_offset); + uint32_t vram = static_cast<uint32_t>(value); + uint32_t num_instructions = type == ELFIO::STT_FUNC ? size / 4 : 0; + context.functions_by_vram[vram].push_back(context.functions.size()); + context.functions.emplace_back( + vram, + static_cast<uint32_t>(section_offset + section_rom_addr), + std::span{ words, num_instructions }, + std::move(name) + ); + } else { + uint32_t vram = static_cast<uint32_t>(value); + context.functions_by_vram[vram].push_back(context.functions.size()); + context.functions.emplace_back( + vram, + 0, + std::span<const uint32_t>{}, + std::move(name) + ); + } } } diff --git a/src/recompilation.cpp b/src/recompilation.cpp index 8d28d41..51caaef 100644 --- a/src/recompilation.cpp +++ b/src/recompilation.cpp @@ -1,5 +1,6 @@ #include <vector> #include <set> +#include <unordered_set> #include "rabbitizer.hpp" #include "fmt/format.h" @@ -16,7 +17,7 @@ std::string_view ctx_gpr_prefix(int reg) { return ""; } -bool process_instruction(const RecompPort::Context& context, size_t instr_index, const std::vector<rabbitizer::InstructionCpu>& instructions, std::ofstream& output_file, bool indent, bool emit_link_branch, int link_branch_index, bool& needs_link_branch, bool& is_branch_likely) { +bool process_instruction(const RecompPort::Context& context, const RecompPort::Function& func, const RecompPort::FunctionStats& stats, const std::unordered_set<uint32_t>& skipped_insns, size_t instr_index, const std::vector<rabbitizer::InstructionCpu>& instructions, std::ofstream& output_file, bool indent, bool emit_link_branch, int link_branch_index, bool& needs_link_branch, bool& is_branch_likely) { const auto& instr = instructions[instr_index]; needs_link_branch = false; is_branch_likely = false; @@ -30,6 +31,10 @@ bool process_instruction(const RecompPort::Context& context, size_t instr_index, fmt::print(output_file, " // {}\n", instr.disassemble(0)); } + if (skipped_insns.contains(instr.getVram())) { + return true; + } + auto print_indent = [&]() { fmt::print(output_file, " "); }; @@ -49,7 +54,7 @@ bool process_instruction(const RecompPort::Context& context, size_t instr_index, if (instr_index < instructions.size() - 1) { bool dummy_needs_link_branch; bool dummy_is_branch_likely; - process_instruction(context, instr_index + 1, instructions, output_file, false, false, link_branch_index, dummy_needs_link_branch, dummy_is_branch_likely); + process_instruction(context, func, stats, skipped_insns, instr_index + 1, instructions, output_file, false, false, link_branch_index, dummy_needs_link_branch, dummy_is_branch_likely); } print_indent(); fmt::print(output_file, fmt_str, args...); @@ -65,7 +70,7 @@ bool process_instruction(const RecompPort::Context& context, size_t instr_index, if (instr_index < instructions.size() - 1) { bool dummy_needs_link_branch; bool dummy_is_branch_likely; - process_instruction(context, instr_index + 1, instructions, output_file, true, false, link_branch_index, dummy_needs_link_branch, dummy_is_branch_likely); + process_instruction(context, func, stats, skipped_insns, instr_index + 1, instructions, output_file, true, false, link_branch_index, dummy_needs_link_branch, dummy_is_branch_likely); } fmt::print(output_file, " "); fmt::print(output_file, fmt_str, args...); @@ -102,6 +107,9 @@ bool process_instruction(const RecompPort::Context& context, size_t instr_index, case InstrId::cpu_addu: print_line("{}{} = ADD32({}{}, {}{})", ctx_gpr_prefix(rd), rd, ctx_gpr_prefix(rs), rs, ctx_gpr_prefix(rt), rt); break; + case InstrId::cpu_daddu: + print_line("{}{} = {}{} + {}{}", ctx_gpr_prefix(rd), rd, ctx_gpr_prefix(rs), rs, ctx_gpr_prefix(rt), rt); + break; case InstrId::cpu_negu: // pseudo instruction for subu x, 0, y case InstrId::cpu_subu: print_line("{}{} = SUB32({}{}, {}{})", ctx_gpr_prefix(rd), rd, ctx_gpr_prefix(rs), rs, ctx_gpr_prefix(rt), rt); @@ -260,6 +268,13 @@ bool process_instruction(const RecompPort::Context& context, size_t instr_index, nonzero_func_index = cur_func_index; } } + if (nonzero_func_index == (size_t)-1) { + fmt::print(stderr, "[Warn] Potential jal resolution ambiguity\n"); + for (size_t cur_func_index : matching_funcs_vec) { + fmt::print(stderr, " {}\n", context.functions[cur_func_index].name); + } + nonzero_func_index = 0; + } real_func_index = nonzero_func_index; ambiguous = false; } else { @@ -295,7 +310,28 @@ bool process_instruction(const RecompPort::Context& context, size_t instr_index, if (rs == (int)rabbitizer::Registers::Cpu::GprO32::GPR_O32_ra) { print_unconditional_branch("return"); } else { - // TODO jump table handling + uint32_t instr_vram = instr.getVram(); + auto find_result = std::find_if(stats.jump_tables.begin(), stats.jump_tables.end(), + [instr_vram](const RecompPort::JumpTable& jtbl) { + return jtbl.jr_vram == instr_vram; + }); + if (find_result == stats.jump_tables.end()) { + fmt::print(stderr, "No jump table found for jr at 0x{:08X}\n", instr_vram); + } + const RecompPort::JumpTable& cur_jtbl = *find_result; + bool dummy_needs_link_branch, dummy_is_branch_likely; + process_instruction(context, func, stats, skipped_insns, instr_index + 1, instructions, output_file, false, false, link_branch_index, dummy_needs_link_branch, dummy_is_branch_likely); + print_indent(); + // TODO this will fail if the register holding the addend is mangled, add logic to emit a temp with the addend into the code + fmt::print(output_file, "switch ({}{} >> 2) {{\n", ctx_gpr_prefix(cur_jtbl.addend_reg), cur_jtbl.addend_reg, cur_jtbl.vram); + for (size_t entry_index = 0; entry_index < cur_jtbl.entries.size(); entry_index++) { + print_indent(); + print_line("case {}: goto L_{:08X}; break", entry_index, cur_jtbl.entries[entry_index]); + } + print_indent(); + print_line("default: switch_error(__func__, 0x{:08X}, 0x{:08X})", instr_vram, cur_jtbl.vram); + print_indent(); + fmt::print(output_file, "}}\n"); } break; case InstrId::cpu_bnel: @@ -685,7 +721,7 @@ bool process_instruction(const RecompPort::Context& context, size_t instr_index, } bool RecompPort::recompile_function(const RecompPort::Context& context, const RecompPort::Function& func, std::string_view output_path) { - fmt::print("Recompiling {}\n", func.name); + //fmt::print("Recompiling {}\n", func.name); std::vector<rabbitizer::InstructionCpu> instructions; // Open the output file and write the file header @@ -717,6 +753,24 @@ bool RecompPort::recompile_function(const RecompPort::Context& context, const Re vram += 4; } + // Analyze function + RecompPort::FunctionStats stats{}; + if (!RecompPort::analyze_function(context, func, instructions, stats)) { + fmt::print(stderr, "Failed to analyze {}\n", func.name); + output_file.clear(); + return false; + } + + std::unordered_set<uint32_t> skipped_insns{}; + + // Add jump table labels into function + for (const auto& jtbl : stats.jump_tables) { + skipped_insns.insert(jtbl.lw_vram); + for (uint32_t jtbl_entry : jtbl.entries) { + branch_labels.insert(jtbl_entry); + } + } + // Second pass, emit code for each instruction and emit labels auto cur_label = branch_labels.cbegin(); vram = func.vram; @@ -737,7 +791,7 @@ bool RecompPort::recompile_function(const RecompPort::Context& context, const Re ++cur_label; } // Process the current instruction and check for errors - if (process_instruction(context, instr_index, instructions, output_file, false, needs_link_branch, num_link_branches, needs_link_branch, is_branch_likely) == false) { + if (process_instruction(context, func, stats, skipped_insns, instr_index, instructions, output_file, false, needs_link_branch, num_link_branches, needs_link_branch, is_branch_likely) == false) { fmt::print(stderr, "Error in recompilation, clearing {}\n", output_path); output_file.clear(); return false;