From ba4aede49c9a5302ecfc1fa599f7acc3925042f9 Mon Sep 17 00:00:00 2001 From: Wiseguy <68165316+Mr-Wiseguy@users.noreply.github.com> Date: Tue, 2 Jul 2024 21:42:22 -0400 Subject: [PATCH] Add symbol reference file mechanism for elf recompilation (#82) * Consolidate context dumping toggle into a single bool, begin work on data symbol context dumping * Added data symbol context dumping * Fix mthi/mtlo implementation * Add option to control unpaired LO16 warnings --- include/recomp_port.h | 40 +++- src/config.cpp | 300 +++++++++++++++++++++---- src/main.cpp | 494 ++++++++++++++++++++++++++++++------------ src/recompilation.cpp | 218 ++++++++++++------- 4 files changed, 778 insertions(+), 274 deletions(-) diff --git a/include/recomp_port.h b/include/recomp_port.h index 3828f01..e70f3ae 100644 --- a/include/recomp_port.h +++ b/include/recomp_port.h @@ -68,11 +68,15 @@ namespace RecompPort { bool uses_mips3_float_mode; bool single_file_output; bool use_absolute_symbols; + bool unpaired_lo16_warnings; std::filesystem::path elf_path; std::filesystem::path symbols_file_path; + std::filesystem::path func_reference_syms_file_path; + std::vector data_reference_syms_file_paths; std::filesystem::path rom_file_path; std::filesystem::path output_func_path; std::filesystem::path relocatable_sections_path; + std::filesystem::path output_binary_path; std::vector stubbed_funcs; std::vector ignored_funcs; DeclaredFunctionMap declared_funcs; @@ -137,12 +141,15 @@ namespace RecompPort { struct Reloc { uint32_t address; - uint32_t target_address; + uint32_t section_offset; uint32_t symbol_index; uint32_t target_section; RelocType type; + bool reference_symbol; }; + constexpr uint16_t SectionSelf = (uint16_t)-1; + constexpr uint16_t SectionAbsolute = (uint16_t)-2; struct Section { ELFIO::Elf_Xword rom_addr = 0; ELFIO::Elf64_Addr ram_addr = 0; @@ -153,6 +160,7 @@ namespace RecompPort { ELFIO::Elf_Half bss_section_index = (ELFIO::Elf_Half)-1; bool executable = false; bool relocatable = false; + bool has_mips32_relocs = false; }; struct FunctionStats { @@ -160,6 +168,19 @@ namespace RecompPort { std::vector absolute_jumps; }; + struct ReferenceSection { + uint32_t rom_addr; + uint32_t ram_addr; + uint32_t size; + bool relocatable; + }; + + struct ReferenceSymbol { + uint16_t section_index; + uint32_t section_offset; + bool is_function; + }; + struct Context { // ROM address of each section std::vector
sections; @@ -174,6 +195,16 @@ namespace RecompPort { std::unordered_set relocatable_sections; // Functions with manual size overrides std::unordered_map manually_sized_funcs; + + //// Reference symbols (used for populating relocations for patches) + // A list of the sections that contain the reference symbols. + std::vector reference_sections; + // A list of the reference symbols. + std::vector reference_symbols; + // Name of every reference symbol in the same order as `reference_symbols`. + std::vector reference_symbol_names; + // Mapping of symbol name to reference symbol index. + std::unordered_map reference_symbols_by_name; int executable_section_count; Context(const ELFIO::elfio& elf_file) { @@ -186,7 +217,12 @@ namespace RecompPort { executable_section_count = 0; } - static bool from_symbol_file(const std::filesystem::path& symbol_file_path, std::vector&& rom, Context& out); + // Imports sections and function symbols from a provided context into this context's reference sections and reference functions. + void import_reference_context(const Context& reference_context); + // Reads a data symbol file and adds its contents into this context's reference data symbols. + bool read_data_reference_syms(const std::filesystem::path& data_syms_file_path); + + static bool from_symbol_file(const std::filesystem::path& symbol_file_path, std::vector&& rom, Context& out, bool with_relocs); Context() = default; }; diff --git a/src/config.cpp b/src/config.cpp index e106ae6..7126e55 100644 --- a/src/config.cpp +++ b/src/config.cpp @@ -4,6 +4,13 @@ #include "fmt/format.h" #include "recomp_port.h" +std::filesystem::path concat_if_not_empty(const std::filesystem::path& parent, const std::filesystem::path& child) { + if (!child.empty()) { + return parent / child; + } + return child; +} + std::vector get_manual_funcs(const toml::array* manual_funcs_array) { std::vector ret; @@ -30,6 +37,23 @@ std::vector get_manual_funcs(const toml::array* manu return ret; } +std::vector get_data_syms_paths(const toml::array* data_syms_paths_array, const std::filesystem::path& basedir) { + std::vector ret; + + // Reserve room for all the funcs in the map. + ret.reserve(data_syms_paths_array->size()); + data_syms_paths_array->for_each([&ret, &basedir](auto&& el) { + if constexpr (toml::is_string) { + ret.emplace_back(concat_if_not_empty(basedir, el.template value_exact().value())); + } + else { + throw toml::parse_error("Invalid type for data reference symbol file entry", el.source()); + } + }); + + return ret; +} + std::vector get_stubbed_funcs(const toml::table* patches_data) { std::vector stubbed_funcs{}; @@ -268,13 +292,6 @@ std::vector get_function_hooks(const toml::table* patc return ret; } -std::filesystem::path concat_if_not_empty(const std::filesystem::path& parent, const std::filesystem::path& child) { - if (!child.empty()) { - return parent / child; - } - return child; -} - RecompPort::Config::Config(const char* path) { // Start this config out as bad so that it has to finish parsing without errors to be good. entrypoint = 0; @@ -370,7 +387,25 @@ RecompPort::Config::Config(const char* path) { toml::node_view manual_functions_data = input_data["manual_funcs"]; if (manual_functions_data.is_array()) { const toml::array* array = manual_functions_data.as_array(); - get_manual_funcs(array); + manual_functions = get_manual_funcs(array); + } + + // Output binary path when using an elf file input, includes patching reference symbol MIPS32 relocs (optional) + std::optional output_binary_path_opt = input_data["output_binary_path"].value(); + if (output_binary_path_opt.has_value()) { + output_binary_path = concat_if_not_empty(basedir, output_binary_path_opt.value()); + } + else { + output_binary_path = ""; + } + + // Control whether the recompiler warns about unpaired LO16 relocs (optional, defaults to true) + std::optional unpaired_lo16_warnings_opt = input_data["unpaired_lo16_warnings"].value(); + if (unpaired_lo16_warnings_opt.has_value()) { + unpaired_lo16_warnings = unpaired_lo16_warnings_opt.value(); + } + else { + unpaired_lo16_warnings = true; } // Patches section (optional) @@ -396,6 +431,28 @@ RecompPort::Config::Config(const char* path) { // Fonction hooks (optional) function_hooks = get_function_hooks(table); } + + // Function reference symbols file (optional) + std::optional func_reference_syms_file_opt = input_data["func_reference_syms_file"].value(); + if (func_reference_syms_file_opt.has_value()) { + if (!symbols_file_path.empty()) { + throw toml::parse_error("Reference symbol files can only be used in elf input mode", input_data["func_reference_syms_file"].node()->source()); + } + func_reference_syms_file_path = concat_if_not_empty(basedir, func_reference_syms_file_opt.value()); + } + + // Data reference symbols files (optional) + toml::node_view data_reference_syms_file_data = input_data["data_reference_syms_files"]; + if (data_reference_syms_file_data.is_array()) { + if (!symbols_file_path.empty()) { + throw toml::parse_error("Reference symbol files can only be used in elf input mode", data_reference_syms_file_data.node()->source()); + } + if (func_reference_syms_file_path.empty()) { + throw toml::parse_error("Data reference symbol files can only be used if a function reference symbol file is also in use", data_reference_syms_file_data.node()->source()); + } + const toml::array* array = data_reference_syms_file_data.as_array(); + data_reference_syms_file_paths = get_data_syms_paths(array, basedir); + } } catch (const toml::parse_error& err) { std::cerr << "Syntax error parsing toml: " << *err.source().path << " (" << err.source().begin << "):\n" << err.description() << std::endl; @@ -425,7 +482,7 @@ RecompPort::RelocType reloc_type_from_name(const std::string& reloc_type_name) { return RecompPort::RelocType::R_MIPS_NONE; } -bool RecompPort::Context::from_symbol_file(const std::filesystem::path& symbol_file_path, std::vector&& rom, RecompPort::Context& out) { +bool RecompPort::Context::from_symbol_file(const std::filesystem::path& symbol_file_path, std::vector&& rom, RecompPort::Context& out, bool with_relocs) { RecompPort::Context ret{}; try { @@ -439,7 +496,7 @@ bool RecompPort::Context::from_symbol_file(const std::filesystem::path& symbol_f const toml::array* config_sections = config_sections_value.as_array(); ret.section_functions.resize(config_sections->size()); - config_sections->for_each([&ret, &rom](auto&& el) { + config_sections->for_each([&ret, &rom, with_relocs](auto&& el) { if constexpr (toml::is_table) { std::optional rom_addr = el["rom"].template value(); std::optional vram_addr = el["vram"].template value(); @@ -496,15 +553,18 @@ bool RecompPort::Context::from_symbol_file(const std::filesystem::path& symbol_f throw toml::parse_error("Function's rom address isn't word aligned", func_el.source()); } - if (cur_func.rom + func_size > rom.size()) { - // Function is out of bounds of the provided rom. - throw toml::parse_error("Functio is out of bounds of the provided rom", func_el.source()); - } + // Read the function's words if a rom was provided. + if (!rom.empty()) { + if (cur_func.rom + func_size > rom.size()) { + // Function is out of bounds of the provided rom. + throw toml::parse_error("Function is out of bounds of the provided rom", func_el.source()); + } - // Get the function's words from the rom. - cur_func.words.reserve(func_size / sizeof(uint32_t)); - for (size_t rom_addr = cur_func.rom; rom_addr < cur_func.rom + func_size; rom_addr += sizeof(uint32_t)) { - cur_func.words.push_back(*reinterpret_cast(rom.data() + rom_addr)); + // Get the function's words from the rom. + cur_func.words.reserve(func_size / sizeof(uint32_t)); + for (size_t rom_addr = cur_func.rom; rom_addr < cur_func.rom + func_size; rom_addr += sizeof(uint32_t)) { + cur_func.words.push_back(*reinterpret_cast(rom.data() + rom_addr)); + } } section.function_addrs.push_back(cur_func.vram); @@ -525,38 +585,39 @@ bool RecompPort::Context::from_symbol_file(const std::filesystem::path& symbol_f // Mark the section as relocatable, since it has relocs. section.relocatable = true; - // Read relocs for the section. - const toml::array* relocs_array = relocs_value.as_array(); - relocs_array->for_each([&ret, &rom, §ion, section_index](auto&& reloc_el) { - if constexpr (toml::is_table) { - std::optional vram = reloc_el["vram"].template value(); - std::optional target_vram = reloc_el["target_vram"].template value(); - std::optional type_string = reloc_el["type"].template value(); + if (with_relocs) { + // Read relocs for the section. + const toml::array* relocs_array = relocs_value.as_array(); + relocs_array->for_each([&ret, &rom, §ion, section_index](auto&& reloc_el) { + if constexpr (toml::is_table) { + std::optional vram = reloc_el["vram"].template value(); + std::optional target_vram = reloc_el["target_vram"].template value(); + std::optional type_string = reloc_el["type"].template value(); - if (!vram.has_value() || !target_vram.has_value() || !type_string.has_value()) { - throw toml::parse_error("Reloc entry missing required field(s)", reloc_el.source()); + if (!vram.has_value() || !target_vram.has_value() || !type_string.has_value()) { + throw toml::parse_error("Reloc entry missing required field(s)", reloc_el.source()); + } + + RelocType reloc_type = reloc_type_from_name(type_string.value()); + + if (reloc_type != RelocType::R_MIPS_HI16 && reloc_type != RelocType::R_MIPS_LO16 && reloc_type != RelocType::R_MIPS_32) { + throw toml::parse_error("Invalid reloc entry type", reloc_el.source()); + } + + Reloc cur_reloc{}; + cur_reloc.address = vram.value(); + cur_reloc.section_offset = target_vram.value() - section.ram_addr; + cur_reloc.symbol_index = (uint32_t)-1; + cur_reloc.target_section = section_index; + cur_reloc.type = reloc_type; + + section.relocs.emplace_back(cur_reloc); } - - RelocType reloc_type = reloc_type_from_name(type_string.value()); - - // TODO also accept MIPS32 for TLB relocations. - if (reloc_type != RelocType::R_MIPS_HI16 && reloc_type != RelocType::R_MIPS_LO16) { - throw toml::parse_error("Invalid reloc entry type", reloc_el.source()); + else { + throw toml::parse_error("Invalid reloc entry", reloc_el.source()); } - - Reloc cur_reloc{}; - cur_reloc.address = vram.value(); - cur_reloc.target_address = target_vram.value(); - cur_reloc.symbol_index = (uint32_t)-1; - cur_reloc.target_section = section_index; - cur_reloc.type = reloc_type; - - section.relocs.emplace_back(cur_reloc); - } - else { - throw toml::parse_error("Invalid reloc entry", reloc_el.source()); - } - }); + }); + } } else { section.relocatable = false; @@ -575,3 +636,148 @@ bool RecompPort::Context::from_symbol_file(const std::filesystem::path& symbol_f out = std::move(ret); return true; } + +void RecompPort::Context::import_reference_context(const RecompPort::Context& reference_context) { + reference_sections.resize(reference_context.sections.size()); + reference_symbols.reserve(reference_context.functions.size()); + reference_symbol_names.reserve(reference_context.functions.size()); + + // Copy the reference context's sections into the real context's reference sections. + for (size_t section_index = 0; section_index < reference_context.sections.size(); section_index++) { + const RecompPort::Section& section_in = reference_context.sections[section_index]; + RecompPort::ReferenceSection& section_out = reference_sections[section_index]; + + section_out.rom_addr = section_in.rom_addr; + section_out.ram_addr = section_in.ram_addr; + section_out.size = section_in.size; + section_out.relocatable = section_in.relocatable; + } + + // Copy the functions from the reference context into the reference context's function map. + for (const RecompPort::Function& func_in: reference_context.functions) { + const RecompPort::Section& func_section = reference_context.sections[func_in.section_index]; + + reference_symbols_by_name.emplace(func_in.name, reference_symbols.size()); + + reference_symbols.emplace_back(RecompPort::ReferenceSymbol{ + .section_index = func_in.section_index, + .section_offset = func_in.vram - static_cast(func_section.ram_addr), + .is_function = true + }); + reference_symbol_names.emplace_back(func_in.name); + } +} + +// Reads a data symbol file and adds its contents into this context's reference data symbols. +bool RecompPort::Context::read_data_reference_syms(const std::filesystem::path& data_syms_file_path) { + try { + const toml::table data_syms_file_data = toml::parse_file(data_syms_file_path.u8string()); + const toml::node_view data_sections_value = data_syms_file_data["section"]; + + if (!data_sections_value.is_array()) { + return false; + } + + // Create a mapping of rom address to section to ensure that the same section indexes are used for both function and data reference symbols. + std::unordered_map ref_section_indices_by_vrom; + + for (uint16_t section_index = 0; section_index < reference_sections.size(); section_index++) { + ref_section_indices_by_vrom.emplace(reference_sections[section_index].rom_addr, section_index); + } + + const toml::array* data_sections = data_sections_value.as_array(); + + data_sections->for_each([this, &ref_section_indices_by_vrom](auto&& el) { + if constexpr (toml::is_table) { + std::optional rom_addr = el["rom"].template value(); + std::optional vram_addr = el["vram"].template value(); + std::optional size = el["size"].template value(); + std::optional name = el["name"].template value(); + + if (!vram_addr.has_value() || !size.has_value() || !name.has_value()) { + throw toml::parse_error("Section entry missing required field(s)", el.source()); + } + + uint16_t ref_section_index; + if (!rom_addr.has_value()) { + ref_section_index = RecompPort::SectionAbsolute; // Non-relocatable bss section or absolute symbols, mark this as an absolute symbol + } + else if (rom_addr.value() > 0xFFFFFFFF) { + throw toml::parse_error("Section has invalid ROM address", el.source()); + } + else { + // Find the matching section from the function reference symbol file to ensure + auto find_section_it = ref_section_indices_by_vrom.find(rom_addr.value()); + if (find_section_it != ref_section_indices_by_vrom.end()) { + ref_section_index = find_section_it->second; + } + else { + ref_section_index = RecompPort::SectionAbsolute; // Not in the function symbol reference file, so this section can be treated as non-relocatable. + } + } + + static ReferenceSection dummy_absolute_section { + .rom_addr = 0, + .ram_addr = 0, + .size = 0, + .relocatable = 0 + }; + const ReferenceSection& ref_section = ref_section_index == RecompPort::SectionAbsolute ? dummy_absolute_section : this->reference_sections[ref_section_index]; + + // Sanity check this section against the matching one in the function reference symbol file if one exists. + if (ref_section_index != RecompPort::SectionAbsolute) { + if (ref_section.ram_addr != vram_addr.value()) { + throw toml::parse_error("Section vram address differs from matching ROM address section in the function symbol reference file", el.source()); + } + + if (ref_section.size != size.value()) { + throw toml::parse_error("Section size address differs from matching ROM address section in the function symbol reference file", el.source()); + } + } + + // Read functions for the section. + const toml::node_view cur_symbols_value = el["symbols"]; + if (!cur_symbols_value.is_array()) { + throw toml::parse_error("Invalid symbols array", cur_symbols_value.node()->source()); + } + + uint32_t ref_section_vram = ref_section.ram_addr; + const toml::array* cur_symbols = cur_symbols_value.as_array(); + cur_symbols->for_each([this, ref_section_index, ref_section_vram](auto&& data_sym_el) { + + if constexpr (toml::is_table) { + std::optional name = data_sym_el["name"].template value(); + std::optional vram_addr = data_sym_el["vram"].template value(); + + if (!name.has_value() || !vram_addr.has_value()) { + throw toml::parse_error("Reference data symbol entry is missing required field(s)", data_sym_el.source()); + } + + this->reference_symbols_by_name.emplace(name.value(), reference_symbols.size()); + + this->reference_symbols.emplace_back( + ReferenceSymbol { + .section_index = ref_section_index, + .section_offset = vram_addr.value() - ref_section_vram, + .is_function = false + } + ); + this->reference_symbol_names.emplace_back(name.value()); + } + else { + throw toml::parse_error("Invalid data symbol entry", data_sym_el.source()); + } + }); + } else { + throw toml::parse_error("Invalid section entry", el.source()); + } + }); + } + catch (const toml::parse_error& err) { + std::cerr << "Syntax error parsing toml: " << *err.source().path << " (" << err.source().begin << "):\n" << err.description() << std::endl; + return false; + } + + return true; +} + diff --git a/src/main.cpp b/src/main.cpp index d1e46e2..d95ad65 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -672,11 +672,33 @@ std::unordered_set renamed_funcs{ "_matherr", }; -bool read_symbols(RecompPort::Context& context, const ELFIO::elfio& elf_file, ELFIO::section* symtab_section, uint32_t entrypoint, bool has_entrypoint, bool use_absolute_symbols) { +struct DataSymbol { + uint32_t vram; + std::string name; + + DataSymbol(uint32_t vram, std::string&& name) : vram(vram), name(std::move(name)) {} +}; + +bool read_symbols(RecompPort::Context& context, const ELFIO::elfio& elf_file, ELFIO::section* symtab_section, uint32_t entrypoint, bool has_entrypoint, bool use_absolute_symbols, bool dumping_context, std::unordered_map>& data_syms) { bool found_entrypoint_func = false; ELFIO::symbol_section_accessor symbols{ elf_file, symtab_section }; fmt::print("Num symbols: {}\n", symbols.get_symbols_num()); + std::unordered_map bss_section_to_target_section{}; + + // Create a mapping of bss section to the corresponding non-bss section. This is only used when dumping context in order + // for patches and mods to correctly relocate symbols in bss. This mapping only matters for relocatable sections. + if (dumping_context) { + // Process bss and reloc sections + for (size_t cur_section_index = 0; cur_section_index < context.sections.size(); cur_section_index++) { + const RecompPort::Section& cur_section = context.sections[cur_section_index]; + // Check if a bss section was found that corresponds with this section. + if (cur_section.bss_section_index != (uint16_t)-1) { + bss_section_to_target_section[cur_section.bss_section_index] = cur_section_index; + } + } + } + for (int sym_index = 0; sym_index < symbols.get_symbols_num(); sym_index++) { std::string name; ELFIO::Elf64_Addr value; @@ -687,6 +709,7 @@ bool read_symbols(RecompPort::Context& context, const ELFIO::elfio& elf_file, EL unsigned char other; bool ignored = false; bool reimplemented = false; + bool recorded_symbol = false; // Read symbol properties symbols.get_symbol(sym_index, name, value, size, bind, type, @@ -709,105 +732,135 @@ bool read_symbols(RecompPort::Context& context, const ELFIO::elfio& elf_file, EL continue; } - if (section_index >= context.sections.size()) { - continue; - } - - // Check if this symbol is the entrypoint - if (has_entrypoint && value == entrypoint && type == ELFIO::STT_FUNC) { - if (found_entrypoint_func) { - fmt::print(stderr, "Ambiguous entrypoint: {}\n", name); - return false; - } - found_entrypoint_func = true; - fmt::print("Found entrypoint, original name: {}\n", name); - size = 0x50; // dummy size for entrypoints, should cover them all - name = "recomp_entrypoint"; - } - - // Check if this symbol has a size override - auto size_find = context.manually_sized_funcs.find(name); - if (size_find != context.manually_sized_funcs.end()) { - size = size_find->second; - type = ELFIO::STT_FUNC; - } - - if (reimplemented_funcs.contains(name)) { - reimplemented = true; - name = name + "_recomp"; - ignored = true; - } else if (ignored_funcs.contains(name)) { - name = name + "_recomp"; - ignored = true; - } - - auto& section = context.sections[section_index]; - - // Check if this symbol is a function or has no type (like a regular glabel would) - // Symbols with no type have a dummy entry created so that their symbol can be looked up for function calls - if (ignored || type == ELFIO::STT_FUNC || type == ELFIO::STT_NOTYPE || type == ELFIO::STT_OBJECT) { - if (renamed_funcs.contains(name)) { - name = name + "_recomp"; - ignored = false; + if (section_index < context.sections.size()) { + // Check if this symbol is the entrypoint + if (has_entrypoint && value == entrypoint && type == ELFIO::STT_FUNC) { + if (found_entrypoint_func) { + fmt::print(stderr, "Ambiguous entrypoint: {}\n", name); + return false; + } + found_entrypoint_func = true; + fmt::print("Found entrypoint, original name: {}\n", name); + size = 0x50; // dummy size for entrypoints, should cover them all + name = "recomp_entrypoint"; } - if (section_index < context.sections.size()) { - auto section_offset = value - elf_file.sections[section_index]->get_address(); - const uint32_t* words = reinterpret_cast(elf_file.sections[section_index]->get_data() + section_offset); - uint32_t vram = static_cast(value); - uint32_t num_instructions = type == ELFIO::STT_FUNC ? size / 4 : 0; - uint32_t rom_address = static_cast(section_offset + section.rom_addr); + // Check if this symbol has a size override + auto size_find = context.manually_sized_funcs.find(name); + if (size_find != context.manually_sized_funcs.end()) { + size = size_find->second; + type = ELFIO::STT_FUNC; + } - section.function_addrs.push_back(vram); - context.functions_by_vram[vram].push_back(context.functions.size()); + if (!dumping_context) { + if (reimplemented_funcs.contains(name)) { + reimplemented = true; + name = name + "_recomp"; + ignored = true; + } else if (ignored_funcs.contains(name)) { + name = name + "_recomp"; + ignored = true; + } + } - // Find the entrypoint by rom address in case it doesn't have vram as its value - if (has_entrypoint && rom_address == 0x1000 && type == ELFIO::STT_FUNC) { - vram = entrypoint; - found_entrypoint_func = true; - name = "recomp_entrypoint"; - if (size == 0) { - num_instructions = 0x50 / 4; + auto& section = context.sections[section_index]; + + // Check if this symbol is a function or has no type (like a regular glabel would) + // Symbols with no type have a dummy entry created so that their symbol can be looked up for function calls + if (ignored || type == ELFIO::STT_FUNC || type == ELFIO::STT_NOTYPE || type == ELFIO::STT_OBJECT) { + if (!dumping_context) { + if (renamed_funcs.contains(name)) { + name = name + "_recomp"; + ignored = false; } } - // Suffix local symbols to prevent name conflicts. - if (bind == ELFIO::STB_LOCAL) { - name = fmt::format("{}_{:08X}", name, rom_address); - } - - if (num_instructions > 0) { - context.section_functions[section_index].push_back(context.functions.size()); - } - context.functions_by_name[name] = context.functions.size(); + if (section_index < context.sections.size()) { + auto section_offset = value - elf_file.sections[section_index]->get_address(); + const uint32_t* words = reinterpret_cast(elf_file.sections[section_index]->get_data() + section_offset); + uint32_t vram = static_cast(value); + uint32_t num_instructions = type == ELFIO::STT_FUNC ? size / 4 : 0; + uint32_t rom_address = static_cast(section_offset + section.rom_addr); - std::vector insn_words(num_instructions); - insn_words.assign(words, words + num_instructions); + section.function_addrs.push_back(vram); + context.functions_by_vram[vram].push_back(context.functions.size()); - context.functions.emplace_back( - vram, - rom_address, - std::move(insn_words), - std::move(name), - section_index, - ignored, - reimplemented - ); - } else { - uint32_t vram = static_cast(value); - section.function_addrs.push_back(vram); - context.functions_by_vram[vram].push_back(context.functions.size()); - context.functions.emplace_back( - vram, - 0, - std::vector{}, - std::move(name), - section_index, - ignored, - reimplemented - ); + // Find the entrypoint by rom address in case it doesn't have vram as its value + if (has_entrypoint && rom_address == 0x1000 && type == ELFIO::STT_FUNC) { + vram = entrypoint; + found_entrypoint_func = true; + name = "recomp_entrypoint"; + if (size == 0) { + num_instructions = 0x50 / 4; + } + } + + // Suffix local symbols to prevent name conflicts. + if (bind == ELFIO::STB_LOCAL) { + name = fmt::format("{}_{:08X}", name, rom_address); + } + + if (num_instructions > 0) { + context.section_functions[section_index].push_back(context.functions.size()); + recorded_symbol = true; + } + context.functions_by_name[name] = context.functions.size(); + + std::vector insn_words(num_instructions); + insn_words.assign(words, words + num_instructions); + + context.functions.emplace_back( + vram, + rom_address, + std::move(insn_words), + name, + section_index, + ignored, + reimplemented + ); + } else { + // TODO is this case needed anymore? + uint32_t vram = static_cast(value); + section.function_addrs.push_back(vram); + context.functions_by_vram[vram].push_back(context.functions.size()); + context.functions.emplace_back( + vram, + 0, + std::vector{}, + name, + section_index, + ignored, + reimplemented + ); + } } } + + // The symbol wasn't detected as a function, so add it to the data symbols if the context is being dumped. + if (!recorded_symbol && dumping_context && !name.empty()) { + uint32_t vram = static_cast(value); + + // Place this symbol in the absolute symbol list if it's in the absolute section. + uint16_t target_section_index = section_index; + if (section_index == ELFIO::SHN_ABS) { + target_section_index = RecompPort::SectionAbsolute; + } + else if (section_index >= context.sections.size()) { + fmt::print("Symbol \"{}\" not in a valid section ({})\n", name, section_index); + } + + // Move this symbol into the corresponding non-bss section if it's in a bss section. + auto find_bss_it = bss_section_to_target_section.find(target_section_index); + if (find_bss_it != bss_section_to_target_section.end()) { + fmt::print("mapping {} to {}\n", context.sections[section_index].name, context.sections[find_bss_it->second].name); + target_section_index = find_bss_it->second; + } + + data_syms[target_section_index].emplace_back( + vram, + std::move(name) + ); + } } return found_entrypoint_func; @@ -945,8 +998,9 @@ ELFIO::section* read_sections(RecompPort::Context& context, const RecompPort::Co std::string reloc_target_section = section_name.substr(strlen(".rel")); - // If this reloc section is for a section that has been marked as relocatable, record it in the reloc section lookup - if (context.relocatable_sections.contains(reloc_target_section)) { + // If this reloc section is for a section that has been marked as relocatable, record it in the reloc section lookup. + // Alternatively, if this recompilation uses reference symbols then record all reloc sections. + if (!context.reference_sections.empty() || context.relocatable_sections.contains(reloc_target_section)) { reloc_sections_by_name[reloc_target_section] = section.get(); } } @@ -1020,14 +1074,15 @@ ELFIO::section* read_sections(RecompPort::Context& context, const RecompPort::Co // TODO make sure that a reloc section was found for every section marked as relocatable // Process bss and reloc sections - for (RecompPort::Section §ion_out : context.sections) { + for (size_t section_index = 0; section_index < context.sections.size(); section_index++) { + RecompPort::Section& section_out = context.sections[section_index]; // Check if a bss section was found that corresponds with this section auto bss_find = bss_sections_by_name.find(section_out.name); if (bss_find != bss_sections_by_name.end()) { section_out.bss_section_index = bss_find->second->get_index(); } - if (section_out.relocatable) { + if (!context.reference_symbols.empty() || section_out.relocatable) { // Check if a reloc section was found that corresponds with this section auto reloc_find = reloc_sections_by_name.find(section_out.name); if (reloc_find != reloc_sections_by_name.end()) { @@ -1053,8 +1108,9 @@ ELFIO::section* read_sections(RecompPort::Context& context, const RecompPort::Co RecompPort::Reloc& reloc_out = section_out.relocs[i]; // Get the real full_immediate by extracting the immediate from the instruction - uint32_t instr_word = byteswap(*reinterpret_cast(context.rom.data() + section_out.rom_addr + rel_offset - section_out.ram_addr)); - rabbitizer::InstructionCpu instr{ instr_word, static_cast(rel_offset) }; + uint32_t reloc_rom_addr = section_out.rom_addr + rel_offset - section_out.ram_addr; + uint32_t reloc_rom_word = byteswap(*reinterpret_cast(context.rom.data() + reloc_rom_addr)); + rabbitizer::InstructionCpu instr{ reloc_rom_word, static_cast(rel_offset) }; //context.rom section_out.rom_addr; reloc_out.address = rel_offset; @@ -1072,10 +1128,48 @@ ELFIO::section* read_sections(RecompPort::Context& context, const RecompPort::Co bool found_rel_symbol = symbol_accessor.get_symbol( rel_symbol, rel_symbol_name, rel_symbol_value, rel_symbol_size, rel_symbol_bind, rel_symbol_type, rel_symbol_section_index, rel_symbol_other); - reloc_out.target_section = rel_symbol_section_index; + uint32_t rel_section_vram = section_out.ram_addr; + uint32_t rel_symbol_offset = 0; + + // Check if the symbol is undefined and to know whether to look for it in the reference symbols. + if (rel_symbol_section_index == ELFIO::SHN_UNDEF) { + // Undefined sym, check the reference symbols. + auto sym_find_it = context.reference_symbols_by_name.find(rel_symbol_name); + if (sym_find_it == context.reference_symbols_by_name.end()) { + fmt::print(stderr, "Undefined symbol: {}, not found in input or reference symbols!\n", + rel_symbol_name); + return nullptr; + } + + reloc_out.reference_symbol = true; + // Replace the reloc's symbol index with the index into the reference symbol array. + reloc_out.symbol_index = sym_find_it->second; + rel_section_vram = 0; + rel_symbol_offset = context.reference_symbols[reloc_out.symbol_index].section_offset; + reloc_out.target_section = context.reference_symbols[reloc_out.symbol_index].section_index; + + bool target_section_relocatable = false; + + if (reloc_out.target_section != RecompPort::SectionAbsolute && context.reference_sections[reloc_out.target_section].relocatable) { + target_section_relocatable = true; + } + + if (reloc_out.type == RecompPort::RelocType::R_MIPS_32 && target_section_relocatable) { + fmt::print(stderr, "Cannot reference {} in a statically initialized variable as it's defined in a relocatable section!\n", + rel_symbol_name); + return nullptr; + } + } + else { + reloc_out.reference_symbol = false; + reloc_out.target_section = rel_symbol_section_index; + } // Reloc pairing, see MIPS System V ABI documentation page 4-18 (https://refspecs.linuxfoundation.org/elf/mipsabi.pdf) if (reloc_out.type == RecompPort::RelocType::R_MIPS_LO16) { + uint32_t rel_immediate = instr.getProcessedImmediate(); + uint32_t full_immediate = (prev_hi_immediate << 16) + (int16_t)rel_immediate; + reloc_out.section_offset = full_immediate + rel_symbol_offset - rel_section_vram; if (prev_hi) { if (prev_hi_symbol != rel_symbol) { fmt::print(stderr, "Paired HI16 and LO16 relocations have different symbols\n" @@ -1083,36 +1177,36 @@ ELFIO::section* read_sections(RecompPort::Context& context, const RecompPort::Co i, section_out.name, reloc_out.symbol_index, reloc_out.address); return nullptr; } - uint32_t rel_immediate = instr.getProcessedImmediate(); - uint32_t full_immediate = (prev_hi_immediate << 16) + (int16_t)rel_immediate; - - // Set this and the previous HI16 relocs' relocated addresses - section_out.relocs[i - 1].target_address = full_immediate; - reloc_out.target_address = full_immediate; + // Set the previous HI16 relocs' relocated address. + section_out.relocs[i - 1].section_offset = reloc_out.section_offset; } else { - if (prev_lo) { - uint32_t rel_immediate = instr.getProcessedImmediate(); - uint32_t full_immediate; - - if (prev_hi_symbol != rel_symbol) { - fmt::print(stderr, "[WARN] LO16 reloc index {} in section {} referencing symbol {} with offset 0x{:08X} follows LO16 with different symbol\n", + // Orphaned LO16 reloc warnings. + if (config.unpaired_lo16_warnings) { + if (prev_lo) { + // Don't warn if multiple LO16 in a row reference the same symbol, as some linkers will use this behavior. + if (prev_hi_symbol != rel_symbol) { + fmt::print(stderr, "[WARN] LO16 reloc index {} in section {} referencing symbol {} with offset 0x{:08X} follows LO16 with different symbol\n", + i, section_out.name, reloc_out.symbol_index, reloc_out.address); + } + } + else { + fmt::print(stderr, "[WARN] Unpaired LO16 reloc index {} in section {} referencing symbol {} with offset 0x{:08X}\n", i, section_out.name, reloc_out.symbol_index, reloc_out.address); } - - full_immediate = (prev_hi_immediate << 16) + (int16_t)rel_immediate; - reloc_out.target_address = full_immediate; - } - else { - fmt::print(stderr, "Unpaired LO16 reloc index {} in section {} referencing symbol {} with offset 0x{:08X}\n", - i, section_out.name, reloc_out.symbol_index, reloc_out.address); - return nullptr; } + // Even though this is an orphaned LO16 reloc, the previous calculation for the addend still follows the MIPS System V ABI documentation: + // "R_MIPS_LO16 entries without an R_MIPS_HI16 entry immediately preceding are orphaned and the previously defined + // R_MIPS_HI16 is used for computing the addend." + // Therefore, nothing needs to be done to the section_offset member. } prev_lo = true; } else { if (prev_hi) { + // This is an invalid elf as the MIPS System V ABI documentation states: + // "Each relocation type of R_MIPS_HI16 must have an associated R_MIPS_LO16 entry + // immediately following it in the list of relocations." fmt::print(stderr, "Unpaired HI16 reloc index {} in section {} referencing symbol {} with offset 0x{:08X}\n", i - 1, section_out.name, section_out.relocs[i - 1].symbol_index, section_out.relocs[i - 1].address); return nullptr; @@ -1130,7 +1224,25 @@ ELFIO::section* read_sections(RecompPort::Context& context, const RecompPort::Co } if (reloc_out.type == RecompPort::RelocType::R_MIPS_32) { - // Nothing to do here + // The reloc addend is just the existing word before relocation, so the section offset can just be the symbol's section offset. + // Incorporating the addend will be handled at load-time. + reloc_out.section_offset = rel_symbol_offset; + // TODO set section_out.has_mips32_relocs to true if this section should emit its mips32 relocs (mainly for TLB mapping). + + if (reloc_out.reference_symbol) { + uint32_t reloc_target_section_addr = 0; + if (reloc_out.target_section != RecompPort::SectionAbsolute) { + reloc_target_section_addr = context.reference_sections[reloc_out.target_section].ram_addr; + } + // Patch the word in the ROM to incorporate the symbol's value. + uint32_t updated_reloc_word = reloc_rom_word + reloc_target_section_addr + reloc_out.section_offset; + *reinterpret_cast(context.rom.data() + reloc_rom_addr) = byteswap(updated_reloc_word); + } + } + + if (reloc_out.type == RecompPort::RelocType::R_MIPS_26) { + uint32_t rel_immediate = instr.getProcessedImmediate(); + reloc_out.section_offset = rel_immediate + rel_symbol_offset; } } } @@ -1258,49 +1370,99 @@ std::vector reloc_names { "R_MIPS_GPREL16", }; -void dump_context(const RecompPort::Context& context, const std::filesystem::path& path) { - std::ofstream context_file {path}; +void dump_context(const RecompPort::Context& context, const std::unordered_map>& data_syms, const std::filesystem::path& func_path, const std::filesystem::path& data_path) { + std::ofstream func_context_file {func_path}; + std::ofstream data_context_file {data_path}; + + fmt::print(func_context_file, "# Autogenerated from an ELF via N64Recomp\n"); + fmt::print(data_context_file, "# Autogenerated from an ELF via N64Recomp\n"); - for (size_t section_index = 0; section_index < context.sections.size(); section_index++) { - const RecompPort::Section& section = context.sections[section_index]; - const std::vector& section_funcs = context.section_functions[section_index]; - if (!section_funcs.empty()) { - fmt::print(context_file, - "# Autogenerated from an ELF via N64Recomp\n" + auto print_section = [](std::ofstream& output_file, const std::string& name, uint64_t rom_addr, uint64_t ram_addr, uint64_t size) { + if (rom_addr == (uint64_t)-1) { + fmt::print(output_file, + "[[section]]\n" + "name = \"{}\"\n" + "vram = 0x{:08X}\n" + "size = 0x{:X}\n" + "\n", + name, ram_addr, size); + } + else { + fmt::print(output_file, "[[section]]\n" "name = \"{}\"\n" "rom = 0x{:08X}\n" "vram = 0x{:08X}\n" "size = 0x{:X}\n" "\n", - section.name, section.rom_addr, section.ram_addr, section.size); + name, rom_addr, ram_addr, size); + } + }; + for (size_t section_index = 0; section_index < context.sections.size(); section_index++) { + const RecompPort::Section& section = context.sections[section_index]; + const std::vector& section_funcs = context.section_functions[section_index]; + if (!section_funcs.empty()) { + print_section(func_context_file, section.name, section.rom_addr, section.ram_addr, section.size); + + // Dump relocs into the function context file. if (!section.relocs.empty()) { - fmt::print(context_file, "relocs = [\n"); + fmt::print(func_context_file, "relocs = [\n"); for (const RecompPort::Reloc& reloc : section.relocs) { if (reloc.target_section == section_index || reloc.target_section == section.bss_section_index) { - // TODO allow MIPS32 relocs for TLB mapping support. + // TODO allow emitting MIPS32 relocs for specific sections via a toml option for TLB mapping support. if (reloc.type == RecompPort::RelocType::R_MIPS_HI16 || reloc.type == RecompPort::RelocType::R_MIPS_LO16) { - fmt::print(context_file, " {{ type = \"{}\", vram = 0x{:08X}, target_vram = 0x{:08X} }},\n", - reloc_names[static_cast(reloc.type)], reloc.address, reloc.target_address); + fmt::print(func_context_file, " {{ type = \"{}\", vram = 0x{:08X}, target_vram = 0x{:08X} }},\n", + reloc_names[static_cast(reloc.type)], reloc.address, reloc.section_offset + section.ram_addr); } } } - fmt::print(context_file, "]\n\n"); + fmt::print(func_context_file, "]\n\n"); } - fmt::print(context_file, "functions = [\n"); + // Dump functions into the function context file. + fmt::print(func_context_file, "functions = [\n"); for (const size_t& function_index : section_funcs) { const RecompPort::Function& func = context.functions[function_index]; - fmt::print(context_file, " {{ name = \"{}\", vram = 0x{:08X}, size = 0x{:X} }},\n", + fmt::print(func_context_file, " {{ name = \"{}\", vram = 0x{:08X}, size = 0x{:X} }},\n", func.name, func.vram, func.words.size() * sizeof(func.words[0])); } - fmt::print(context_file, "]\n\n"); + fmt::print(func_context_file, "]\n\n"); } + + const auto find_syms_it = data_syms.find((uint16_t)section_index); + if (find_syms_it != data_syms.end() && !find_syms_it->second.empty()) { + if (section.name.ends_with(".bss")) { + fmt::print("asdasd {}\n", section.name); + } + print_section(data_context_file, section.name, section.rom_addr, section.ram_addr, section.size); + + // Dump other symbols into the data context file. + fmt::print(data_context_file, "symbols = [\n"); + + for (const DataSymbol& cur_sym : find_syms_it->second) { + fmt::print(data_context_file, " {{ name = \"{}\", vram = 0x{:08X} }},\n", cur_sym.name, cur_sym.vram); + } + + fmt::print(data_context_file, "]\n\n"); + } + } + + const auto find_abs_syms_it = data_syms.find((uint16_t)-1); + if (find_abs_syms_it != data_syms.end() && !find_abs_syms_it->second.empty()) { + // Dump absolute symbols into the data context file. + print_section(data_context_file, "ABSOLUTE_SYMS", (uint64_t)-1, 0, 0); + fmt::print(data_context_file, "symbols = [\n"); + + for (const DataSymbol& cur_sym : find_abs_syms_it->second) { + fmt::print(data_context_file, " {{ name = \"{}\", vram = 0x{:08X} }},\n", cur_sym.name, cur_sym.vram); + } + + fmt::print(data_context_file, "]\n\n"); } } @@ -1326,6 +1488,9 @@ int main(int argc, char** argv) { std::exit(EXIT_FAILURE); }; + // TODO expose a way to dump the context from the command line. + bool dumping_context = false; + if (argc != 2) { fmt::print("Usage: {} [config file]\n", argv[0]); std::exit(EXIT_SUCCESS); @@ -1380,6 +1545,27 @@ int main(int argc, char** argv) { context = { elf_file }; context.relocatable_sections = std::move(relocatable_sections); + // Import symbols from any reference symbols files that were provided. + if (!config.func_reference_syms_file_path.empty()) { + { + // Create a new temporary context to read the function reference symbol file into, since it's the same format as the recompilation symbol file. + std::vector dummy_rom{}; + RecompPort::Context reference_context{}; + if (!RecompPort::Context::from_symbol_file(config.func_reference_syms_file_path, std::move(dummy_rom), reference_context, false)) { + exit_failure("Failed to load provided function reference symbol file\n"); + } + + // Use the reference context to build a reference symbol list for the actual context. + context.import_reference_context(reference_context); + } + + for (const std::filesystem::path& cur_data_sym_path : config.data_reference_syms_file_paths) { + if (!context.read_data_reference_syms(cur_data_sym_path)) { + exit_failure(fmt::format("Failed to load provided data reference symbol file: {}\n", cur_data_sym_path.string())); + } + } + } + // Read all of the sections in the elf and look for the symbol table section ELFIO::section* symtab_section = read_sections(context, config, elf_file); @@ -1396,8 +1582,11 @@ int main(int argc, char** argv) { context.manually_sized_funcs.emplace(func_size.func_name, func_size.size_bytes); } + // Lists of data symbols organized by section, only used if dumping context. + std::unordered_map> data_syms; + // Read all of the symbols in the elf and look for the entrypoint function - bool found_entrypoint_func = read_symbols(context, elf_file, symtab_section, config.entrypoint, config.has_entrypoint, config.use_absolute_symbols); + bool found_entrypoint_func = read_symbols(context, elf_file, symtab_section, config.entrypoint, config.has_entrypoint, config.use_absolute_symbols, dumping_context, data_syms); // Add any manual functions add_manual_functions(context, elf_file, config.manual_functions); @@ -1405,6 +1594,21 @@ int main(int argc, char** argv) { if (config.has_entrypoint && !found_entrypoint_func) { exit_failure("Could not find entrypoint function\n"); } + + if (dumping_context) { + fmt::print("Dumping context\n"); + // Sort the data syms by address so the output is nicer. + for (auto& [section_index, section_syms] : data_syms) { + std::sort(section_syms.begin(), section_syms.end(), + [](const DataSymbol& a, const DataSymbol& b) { + return a.vram < b.vram; + } + ); + } + + dump_context(context, data_syms, "dump.toml", "data_dump.toml"); + return 0; + } } // Build a context from the provided symbols file. else if (!config.symbols_file_path.empty()) { @@ -1412,12 +1616,16 @@ int main(int argc, char** argv) { exit_failure("A ROM file must be provided when using a symbols file\n"); } + if (dumping_context) { + exit_failure("Cannot dump context when using a symbols file\n"); + } + std::vector rom = read_file(config.rom_file_path); if (rom.empty()) { exit_failure("Failed to load ROM file: " + config.rom_file_path.string() + "\n"); } - if (!RecompPort::Context::from_symbol_file(config.symbols_file_path, std::move(rom), context)) { + if (!RecompPort::Context::from_symbol_file(config.symbols_file_path, std::move(rom), context, true)) { exit_failure("Failed to load symbols file\n"); } @@ -1485,11 +1693,6 @@ int main(int argc, char** argv) { std::vector> static_funcs_by_section{ context.sections.size() }; - // TODO expose a way to dump the context from the command line. Make sure not to rename functions when doing so. - //fmt::print("Dumping context\n"); - //dump_context(context, "dump.toml"); - //return 0; - fmt::print("Working dir: {}\n", std::filesystem::current_path().string()); // Stub out any functions specified in the config file. @@ -1738,14 +1941,14 @@ int main(int argc, char** argv) { const auto& section = context.sections[section_index]; const auto& section_funcs = context.section_functions[section_index]; - if (!section_funcs.empty()) { + if (section.has_mips32_relocs || !section_funcs.empty()) { std::string_view section_name_trimmed{ section.name }; if (section.relocatable) { relocatable_section_indices.emplace(section.name, written_sections); } - while (section_name_trimmed[0] == '.') { + while (section_name_trimmed.size() > 0 && section_name_trimmed[0] == '.') { section_name_trimmed.remove_prefix(1); } @@ -1797,5 +2000,10 @@ int main(int argc, char** argv) { fmt::print(overlay_file, "}};\n"); } + if (!config.output_binary_path.empty()) { + std::ofstream output_binary{config.output_binary_path, std::ios::binary}; + output_binary.write(reinterpret_cast(context.rom.data()), context.rom.size()); + } + return 0; } diff --git a/src/recompilation.cpp b/src/recompilation.cpp index e48bc02..d9bf139 100644 --- a/src/recompilation.cpp +++ b/src/recompilation.cpp @@ -56,26 +56,61 @@ bool process_instruction(const RecompPort::Context& context, const RecompPort::C RecompPort::RelocType reloc_type = RecompPort::RelocType::R_MIPS_NONE; uint32_t reloc_section = 0; uint32_t reloc_target_section_offset = 0; + size_t reloc_reference_symbol = (size_t)-1; uint32_t func_vram_end = func.vram + func.words.size() * sizeof(func.words[0]); + uint16_t imm = instr.Get_immediate(); + // Check if this instruction has a reloc. - if (section.relocatable && section.relocs.size() > 0 && section.relocs[reloc_index].address == instr_vram) { + if (section.relocs.size() > 0 && section.relocs[reloc_index].address == instr_vram) { // Get the reloc data for this instruction const auto& reloc = section.relocs[reloc_index]; reloc_section = reloc.target_section; - // Some symbols are in a nonexistent section (e.g. absolute symbols), so check that the section is valid before doing anything else. - // Absolute symbols will never need to be relocated so it's safe to skip this. - if (reloc_section < context.sections.size()) { - // Ignore this reloc if it points to a different section. - // Also check if the reloc points to the bss section since that will also be relocated with the section. - if (reloc_section == func.section_index || reloc_section == section.bss_section_index) { - // Record the reloc's data. - reloc_type = reloc.type; - reloc_target_section_offset = reloc.target_address - section.ram_addr; - // Ignore all relocs that aren't HI16 or LO16. - if (reloc_type == RecompPort::RelocType::R_MIPS_HI16 || reloc_type == RecompPort::RelocType::R_MIPS_LO16) { - at_reloc = true; + // Only process this relocation if this section is relocatable or if this relocation targets a reference symbol. + if (section.relocatable || reloc.reference_symbol) { + // Some symbols are in a nonexistent section (e.g. absolute symbols), so check that the section is valid before doing anything else. + // Absolute symbols will never need to be relocated so it's safe to skip this. + // Always process reference symbols relocations. + if (reloc_section < context.sections.size() || reloc.reference_symbol) { + // Ignore this reloc if it points to a different section. + // Also check if the reloc points to the bss section since that will also be relocated with the section. + // Additionally, always process reference symbol relocations. + if (reloc_section == func.section_index || reloc_section == section.bss_section_index || reloc.reference_symbol) { + // Record the reloc's data. + reloc_type = reloc.type; + reloc_target_section_offset = reloc.section_offset; + // Ignore all relocs that aren't HI16 or LO16. + if (reloc_type == RecompPort::RelocType::R_MIPS_HI16 || reloc_type == RecompPort::RelocType::R_MIPS_LO16 || reloc_type == RecompPort::RelocType::R_MIPS_26) { + at_reloc = true; + + if (reloc.reference_symbol) { + reloc_reference_symbol = reloc.symbol_index; + static RecompPort::ReferenceSection dummy_section{ + .rom_addr = 0, + .ram_addr = 0, + .size = 0, + .relocatable = false + }; + const auto& reloc_reference_section = reloc.target_section == RecompPort::SectionAbsolute ? dummy_section : context.reference_sections[reloc.target_section]; + if (!reloc_reference_section.relocatable) { + at_reloc = false; + uint32_t full_immediate = reloc.section_offset + reloc_reference_section.ram_addr; + + if (reloc_type == RecompPort::RelocType::R_MIPS_HI16) { + imm = (full_immediate >> 16) + ((full_immediate >> 15) & 1); + } + else if (reloc_type == RecompPort::RelocType::R_MIPS_LO16) { + imm = full_immediate & 0xFFFF; + } + } + } + } + + // Repoint bss relocations at their non-bss counterpart section. + if (reloc_section == section.bss_section_index) { + reloc_section = func.section_index; + } } } } @@ -112,70 +147,90 @@ bool process_instruction(const RecompPort::Context& context, const RecompPort::C } }; - auto print_func_call = [&](uint32_t target_func_vram, bool link_branch = true, bool indent = false) { - const auto matching_funcs_find = context.functions_by_vram.find(target_func_vram); + auto print_func_call = [reloc_target_section_offset, reloc_reference_symbol, reloc_type, &context, §ion, &func, &static_funcs_out, &needs_link_branch, &print_unconditional_branch] + (uint32_t target_func_vram, bool link_branch = true, bool indent = false) + { std::string jal_target_name; - uint32_t section_vram_start = section.ram_addr; - uint32_t section_vram_end = section.ram_addr + section.size; - // TODO the current section should be prioritized if the target jal is in its vram even if a function isn't known (i.e. static) - if (matching_funcs_find != context.functions_by_vram.end()) { - // If we found matches for the target function by vram, - const auto& matching_funcs_vec = matching_funcs_find->second; - size_t real_func_index; - bool ambiguous; - // If there is more than one corresponding function, look for any that have a nonzero size. - if (matching_funcs_vec.size() > 1) { - size_t nonzero_func_index = (size_t)-1; - bool found_nonzero_func = false; - for (size_t cur_func_index : matching_funcs_vec) { - const auto& cur_func = context.functions[cur_func_index]; - if (cur_func.words.size() != 0) { - if (found_nonzero_func) { - ambiguous = true; - break; - } - // If this section is relocatable and the target vram is in the section, don't call functions - // in any section other than this one. - if (cur_func.section_index == func.section_index || - !(section.relocatable && target_func_vram >= section_vram_start && target_func_vram < section_vram_end)) { - found_nonzero_func = true; - nonzero_func_index = cur_func_index; - } - } - } - if (nonzero_func_index == (size_t)-1) { - fmt::print(stderr, "[Warn] Potential jal resolution ambiguity\n"); - for (size_t cur_func_index : matching_funcs_vec) { - fmt::print(stderr, " {}\n", context.functions[cur_func_index].name); - } - nonzero_func_index = 0; - } - real_func_index = nonzero_func_index; - ambiguous = false; - } - else { - real_func_index = matching_funcs_vec.front(); - ambiguous = false; - } - if (ambiguous) { - fmt::print(stderr, "Ambiguous jal target: 0x{:08X}\n", target_func_vram); - for (size_t cur_func_index : matching_funcs_vec) { - const auto& cur_func = context.functions[cur_func_index]; - fmt::print(stderr, " {}\n", cur_func.name); - } + if (reloc_reference_symbol != (size_t)-1) { + const auto& ref_symbol = context.reference_symbols[reloc_reference_symbol]; + const std::string& ref_symbol_name = context.reference_symbol_names[reloc_reference_symbol]; + + if (reloc_type != RecompPort::RelocType::R_MIPS_26) { + fmt::print(stderr, "Unsupported reloc type {} on jal instruction in {}\n", (int)reloc_type, func.name); return false; } - jal_target_name = context.functions[real_func_index].name; + + if (ref_symbol.section_offset != reloc_target_section_offset) { + fmt::print(stderr, "Function {} uses a MIPS_R_26 addend, which is not supported yet\n", func.name); + return false; + } + + jal_target_name = ref_symbol_name; } else { - const auto& section = context.sections[func.section_index]; - if (target_func_vram >= section.ram_addr && target_func_vram < section.ram_addr + section.size) { - jal_target_name = fmt::format("static_{}_{:08X}", func.section_index, target_func_vram); - static_funcs_out[func.section_index].push_back(target_func_vram); + const auto matching_funcs_find = context.functions_by_vram.find(target_func_vram); + uint32_t section_vram_start = section.ram_addr; + uint32_t section_vram_end = section.ram_addr + section.size; + // TODO the current section should be prioritized if the target jal is in its vram even if a function isn't known (i.e. static) + if (matching_funcs_find != context.functions_by_vram.end()) { + // If we found matches for the target function by vram, + const auto& matching_funcs_vec = matching_funcs_find->second; + size_t real_func_index; + bool ambiguous; + // If there is more than one corresponding function, look for any that have a nonzero size. + if (matching_funcs_vec.size() > 1) { + size_t nonzero_func_index = (size_t)-1; + bool found_nonzero_func = false; + for (size_t cur_func_index : matching_funcs_vec) { + const auto& cur_func = context.functions[cur_func_index]; + if (cur_func.words.size() != 0) { + if (found_nonzero_func) { + ambiguous = true; + break; + } + // If this section is relocatable and the target vram is in the section, don't call functions + // in any section other than this one. + if (cur_func.section_index == func.section_index || + !(section.relocatable && target_func_vram >= section_vram_start && target_func_vram < section_vram_end)) { + found_nonzero_func = true; + nonzero_func_index = cur_func_index; + } + } + } + if (nonzero_func_index == (size_t)-1) { + fmt::print(stderr, "[Warn] Potential jal resolution ambiguity\n"); + for (size_t cur_func_index : matching_funcs_vec) { + fmt::print(stderr, " {}\n", context.functions[cur_func_index].name); + } + nonzero_func_index = 0; + } + real_func_index = nonzero_func_index; + ambiguous = false; + } + else { + real_func_index = matching_funcs_vec.front(); + ambiguous = false; + } + if (ambiguous) { + fmt::print(stderr, "Ambiguous jal target: 0x{:08X}\n", target_func_vram); + for (size_t cur_func_index : matching_funcs_vec) { + const auto& cur_func = context.functions[cur_func_index]; + fmt::print(stderr, " {}\n", cur_func.name); + } + return false; + } + jal_target_name = context.functions[real_func_index].name; } else { - fmt::print(stderr, "No function found for jal target: 0x{:08X}\n", target_func_vram); - return false; + const auto& section = context.sections[func.section_index]; + if (target_func_vram >= section.ram_addr && target_func_vram < section.ram_addr + section.size) { + jal_target_name = fmt::format("static_{}_{:08X}", func.section_index, target_func_vram); + static_funcs_out[func.section_index].push_back(target_func_vram); + } + else { + fmt::print(stderr, "No function found for jal target: 0x{:08X}\n", target_func_vram); + return false; + } } } needs_link_branch = link_branch; @@ -238,8 +293,6 @@ bool process_instruction(const RecompPort::Context& context, const RecompPort::C int cop1_cs = (int)instr.Get_cop1cs(); - uint16_t imm = instr.Get_immediate(); - std::string unsigned_imm_string; std::string signed_imm_string; @@ -249,15 +302,19 @@ bool process_instruction(const RecompPort::Context& context, const RecompPort::C } else { switch (reloc_type) { case RecompPort::RelocType::R_MIPS_HI16: - unsigned_imm_string = fmt::format("RELOC_HI16({}, {:#X})", (uint32_t)func.section_index, reloc_target_section_offset); + unsigned_imm_string = fmt::format("RELOC_HI16({}, {:#X})", reloc_section, reloc_target_section_offset); signed_imm_string = "(int16_t)" + unsigned_imm_string; reloc_handled = true; break; case RecompPort::RelocType::R_MIPS_LO16: - unsigned_imm_string = fmt::format("RELOC_LO16({}, {:#X})", (uint32_t)func.section_index, reloc_target_section_offset); + unsigned_imm_string = fmt::format("RELOC_LO16({}, {:#X})", reloc_section, reloc_target_section_offset); signed_imm_string = "(int16_t)" + unsigned_imm_string; reloc_handled = true; break; + case RecompPort::RelocType::R_MIPS_26: + // Nothing to do here, this will be handled by print_func_call. + reloc_handled = true; + break; default: throw std::runtime_error(fmt::format("Unexpected reloc type {} in {}\n", static_cast(reloc_type), func.name)); } @@ -440,10 +497,10 @@ bool process_instruction(const RecompPort::Context& context, const RecompPort::C print_line("{}{} = hi", ctx_gpr_prefix(rd), rd); break; case InstrId::cpu_mtlo: - print_line("lo = {}{}", ctx_gpr_prefix(rd), rd); + print_line("lo = {}{}", ctx_gpr_prefix(rs), rs); break; case InstrId::cpu_mthi: - print_line("hi = {}{}", ctx_gpr_prefix(rd), rd); + print_line("hi = {}{}", ctx_gpr_prefix(rs), rs); break; // Loads case InstrId::cpu_ld: @@ -1166,7 +1223,6 @@ bool RecompPort::recompile_function(const RecompPort::Context& context, const Re bool needs_link_branch = false; bool in_likely_delay_slot = false; const auto& section = context.sections[func.section_index]; - bool needs_reloc = section.relocatable && section.relocs.size() > 0; size_t reloc_index = 0; for (size_t instr_index = 0; instr_index < instructions.size(); ++instr_index) { bool had_link_branch = needs_link_branch; @@ -1181,11 +1237,9 @@ bool RecompPort::recompile_function(const RecompPort::Context& context, const Re ++cur_label; } - // If this is a relocatable section, advance the reloc index until we reach the last one or until we get to/pass the current instruction - if (needs_reloc) { - while (reloc_index < (section.relocs.size() - 1) && section.relocs[reloc_index].address < vram) { - reloc_index++; - } + // Advance the reloc index until we reach the last one or until we get to/pass the current instruction + while ((reloc_index + 1) < section.relocs.size() && section.relocs[reloc_index].address < vram) { + reloc_index++; } // Process the current instruction and check for errors