Add symbol reference file mechanism for elf recompilation (#82)

* Consolidate context dumping toggle into a single bool, begin work on data symbol context dumping
* Added data symbol context dumping
* Fix mthi/mtlo implementation
* Add option to control unpaired LO16 warnings
This commit is contained in:
Wiseguy 2024-07-02 21:42:22 -04:00 committed by GitHub
parent 16819a0515
commit ba4aede49c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 778 additions and 274 deletions

View file

@ -68,11 +68,15 @@ namespace RecompPort {
bool uses_mips3_float_mode;
bool single_file_output;
bool use_absolute_symbols;
bool unpaired_lo16_warnings;
std::filesystem::path elf_path;
std::filesystem::path symbols_file_path;
std::filesystem::path func_reference_syms_file_path;
std::vector<std::filesystem::path> data_reference_syms_file_paths;
std::filesystem::path rom_file_path;
std::filesystem::path output_func_path;
std::filesystem::path relocatable_sections_path;
std::filesystem::path output_binary_path;
std::vector<std::string> stubbed_funcs;
std::vector<std::string> ignored_funcs;
DeclaredFunctionMap declared_funcs;
@ -137,12 +141,15 @@ namespace RecompPort {
struct Reloc {
uint32_t address;
uint32_t target_address;
uint32_t section_offset;
uint32_t symbol_index;
uint32_t target_section;
RelocType type;
bool reference_symbol;
};
constexpr uint16_t SectionSelf = (uint16_t)-1;
constexpr uint16_t SectionAbsolute = (uint16_t)-2;
struct Section {
ELFIO::Elf_Xword rom_addr = 0;
ELFIO::Elf64_Addr ram_addr = 0;
@ -153,6 +160,7 @@ namespace RecompPort {
ELFIO::Elf_Half bss_section_index = (ELFIO::Elf_Half)-1;
bool executable = false;
bool relocatable = false;
bool has_mips32_relocs = false;
};
struct FunctionStats {
@ -160,6 +168,19 @@ namespace RecompPort {
std::vector<AbsoluteJump> absolute_jumps;
};
struct ReferenceSection {
uint32_t rom_addr;
uint32_t ram_addr;
uint32_t size;
bool relocatable;
};
struct ReferenceSymbol {
uint16_t section_index;
uint32_t section_offset;
bool is_function;
};
struct Context {
// ROM address of each section
std::vector<Section> sections;
@ -174,6 +195,16 @@ namespace RecompPort {
std::unordered_set<std::string> relocatable_sections;
// Functions with manual size overrides
std::unordered_map<std::string, size_t> manually_sized_funcs;
//// Reference symbols (used for populating relocations for patches)
// A list of the sections that contain the reference symbols.
std::vector<ReferenceSection> reference_sections;
// A list of the reference symbols.
std::vector<ReferenceSymbol> reference_symbols;
// Name of every reference symbol in the same order as `reference_symbols`.
std::vector<std::string> reference_symbol_names;
// Mapping of symbol name to reference symbol index.
std::unordered_map<std::string, size_t> reference_symbols_by_name;
int executable_section_count;
Context(const ELFIO::elfio& elf_file) {
@ -186,7 +217,12 @@ namespace RecompPort {
executable_section_count = 0;
}
static bool from_symbol_file(const std::filesystem::path& symbol_file_path, std::vector<uint8_t>&& rom, Context& out);
// Imports sections and function symbols from a provided context into this context's reference sections and reference functions.
void import_reference_context(const Context& reference_context);
// Reads a data symbol file and adds its contents into this context's reference data symbols.
bool read_data_reference_syms(const std::filesystem::path& data_syms_file_path);
static bool from_symbol_file(const std::filesystem::path& symbol_file_path, std::vector<uint8_t>&& rom, Context& out, bool with_relocs);
Context() = default;
};

View file

@ -4,6 +4,13 @@
#include "fmt/format.h"
#include "recomp_port.h"
std::filesystem::path concat_if_not_empty(const std::filesystem::path& parent, const std::filesystem::path& child) {
if (!child.empty()) {
return parent / child;
}
return child;
}
std::vector<RecompPort::ManualFunction> get_manual_funcs(const toml::array* manual_funcs_array) {
std::vector<RecompPort::ManualFunction> ret;
@ -30,6 +37,23 @@ std::vector<RecompPort::ManualFunction> get_manual_funcs(const toml::array* manu
return ret;
}
std::vector<std::filesystem::path> get_data_syms_paths(const toml::array* data_syms_paths_array, const std::filesystem::path& basedir) {
std::vector<std::filesystem::path> ret;
// Reserve room for all the funcs in the map.
ret.reserve(data_syms_paths_array->size());
data_syms_paths_array->for_each([&ret, &basedir](auto&& el) {
if constexpr (toml::is_string<decltype(el)>) {
ret.emplace_back(concat_if_not_empty(basedir, el.template value_exact<std::string>().value()));
}
else {
throw toml::parse_error("Invalid type for data reference symbol file entry", el.source());
}
});
return ret;
}
std::vector<std::string> get_stubbed_funcs(const toml::table* patches_data) {
std::vector<std::string> stubbed_funcs{};
@ -268,13 +292,6 @@ std::vector<RecompPort::FunctionHook> get_function_hooks(const toml::table* patc
return ret;
}
std::filesystem::path concat_if_not_empty(const std::filesystem::path& parent, const std::filesystem::path& child) {
if (!child.empty()) {
return parent / child;
}
return child;
}
RecompPort::Config::Config(const char* path) {
// Start this config out as bad so that it has to finish parsing without errors to be good.
entrypoint = 0;
@ -370,7 +387,25 @@ RecompPort::Config::Config(const char* path) {
toml::node_view manual_functions_data = input_data["manual_funcs"];
if (manual_functions_data.is_array()) {
const toml::array* array = manual_functions_data.as_array();
get_manual_funcs(array);
manual_functions = get_manual_funcs(array);
}
// Output binary path when using an elf file input, includes patching reference symbol MIPS32 relocs (optional)
std::optional<std::string> output_binary_path_opt = input_data["output_binary_path"].value<std::string>();
if (output_binary_path_opt.has_value()) {
output_binary_path = concat_if_not_empty(basedir, output_binary_path_opt.value());
}
else {
output_binary_path = "";
}
// Control whether the recompiler warns about unpaired LO16 relocs (optional, defaults to true)
std::optional<bool> unpaired_lo16_warnings_opt = input_data["unpaired_lo16_warnings"].value<bool>();
if (unpaired_lo16_warnings_opt.has_value()) {
unpaired_lo16_warnings = unpaired_lo16_warnings_opt.value();
}
else {
unpaired_lo16_warnings = true;
}
// Patches section (optional)
@ -396,6 +431,28 @@ RecompPort::Config::Config(const char* path) {
// Fonction hooks (optional)
function_hooks = get_function_hooks(table);
}
// Function reference symbols file (optional)
std::optional<std::string> func_reference_syms_file_opt = input_data["func_reference_syms_file"].value<std::string>();
if (func_reference_syms_file_opt.has_value()) {
if (!symbols_file_path.empty()) {
throw toml::parse_error("Reference symbol files can only be used in elf input mode", input_data["func_reference_syms_file"].node()->source());
}
func_reference_syms_file_path = concat_if_not_empty(basedir, func_reference_syms_file_opt.value());
}
// Data reference symbols files (optional)
toml::node_view data_reference_syms_file_data = input_data["data_reference_syms_files"];
if (data_reference_syms_file_data.is_array()) {
if (!symbols_file_path.empty()) {
throw toml::parse_error("Reference symbol files can only be used in elf input mode", data_reference_syms_file_data.node()->source());
}
if (func_reference_syms_file_path.empty()) {
throw toml::parse_error("Data reference symbol files can only be used if a function reference symbol file is also in use", data_reference_syms_file_data.node()->source());
}
const toml::array* array = data_reference_syms_file_data.as_array();
data_reference_syms_file_paths = get_data_syms_paths(array, basedir);
}
}
catch (const toml::parse_error& err) {
std::cerr << "Syntax error parsing toml: " << *err.source().path << " (" << err.source().begin << "):\n" << err.description() << std::endl;
@ -425,7 +482,7 @@ RecompPort::RelocType reloc_type_from_name(const std::string& reloc_type_name) {
return RecompPort::RelocType::R_MIPS_NONE;
}
bool RecompPort::Context::from_symbol_file(const std::filesystem::path& symbol_file_path, std::vector<uint8_t>&& rom, RecompPort::Context& out) {
bool RecompPort::Context::from_symbol_file(const std::filesystem::path& symbol_file_path, std::vector<uint8_t>&& rom, RecompPort::Context& out, bool with_relocs) {
RecompPort::Context ret{};
try {
@ -439,7 +496,7 @@ bool RecompPort::Context::from_symbol_file(const std::filesystem::path& symbol_f
const toml::array* config_sections = config_sections_value.as_array();
ret.section_functions.resize(config_sections->size());
config_sections->for_each([&ret, &rom](auto&& el) {
config_sections->for_each([&ret, &rom, with_relocs](auto&& el) {
if constexpr (toml::is_table<decltype(el)>) {
std::optional<uint32_t> rom_addr = el["rom"].template value<uint32_t>();
std::optional<uint32_t> vram_addr = el["vram"].template value<uint32_t>();
@ -496,15 +553,18 @@ bool RecompPort::Context::from_symbol_file(const std::filesystem::path& symbol_f
throw toml::parse_error("Function's rom address isn't word aligned", func_el.source());
}
if (cur_func.rom + func_size > rom.size()) {
// Function is out of bounds of the provided rom.
throw toml::parse_error("Functio is out of bounds of the provided rom", func_el.source());
}
// Read the function's words if a rom was provided.
if (!rom.empty()) {
if (cur_func.rom + func_size > rom.size()) {
// Function is out of bounds of the provided rom.
throw toml::parse_error("Function is out of bounds of the provided rom", func_el.source());
}
// Get the function's words from the rom.
cur_func.words.reserve(func_size / sizeof(uint32_t));
for (size_t rom_addr = cur_func.rom; rom_addr < cur_func.rom + func_size; rom_addr += sizeof(uint32_t)) {
cur_func.words.push_back(*reinterpret_cast<const uint32_t*>(rom.data() + rom_addr));
// Get the function's words from the rom.
cur_func.words.reserve(func_size / sizeof(uint32_t));
for (size_t rom_addr = cur_func.rom; rom_addr < cur_func.rom + func_size; rom_addr += sizeof(uint32_t)) {
cur_func.words.push_back(*reinterpret_cast<const uint32_t*>(rom.data() + rom_addr));
}
}
section.function_addrs.push_back(cur_func.vram);
@ -525,38 +585,39 @@ bool RecompPort::Context::from_symbol_file(const std::filesystem::path& symbol_f
// Mark the section as relocatable, since it has relocs.
section.relocatable = true;
// Read relocs for the section.
const toml::array* relocs_array = relocs_value.as_array();
relocs_array->for_each([&ret, &rom, &section, section_index](auto&& reloc_el) {
if constexpr (toml::is_table<decltype(reloc_el)>) {
std::optional<uint32_t> vram = reloc_el["vram"].template value<uint32_t>();
std::optional<uint32_t> target_vram = reloc_el["target_vram"].template value<uint32_t>();
std::optional<std::string> type_string = reloc_el["type"].template value<std::string>();
if (with_relocs) {
// Read relocs for the section.
const toml::array* relocs_array = relocs_value.as_array();
relocs_array->for_each([&ret, &rom, &section, section_index](auto&& reloc_el) {
if constexpr (toml::is_table<decltype(reloc_el)>) {
std::optional<uint32_t> vram = reloc_el["vram"].template value<uint32_t>();
std::optional<uint32_t> target_vram = reloc_el["target_vram"].template value<uint32_t>();
std::optional<std::string> type_string = reloc_el["type"].template value<std::string>();
if (!vram.has_value() || !target_vram.has_value() || !type_string.has_value()) {
throw toml::parse_error("Reloc entry missing required field(s)", reloc_el.source());
if (!vram.has_value() || !target_vram.has_value() || !type_string.has_value()) {
throw toml::parse_error("Reloc entry missing required field(s)", reloc_el.source());
}
RelocType reloc_type = reloc_type_from_name(type_string.value());
if (reloc_type != RelocType::R_MIPS_HI16 && reloc_type != RelocType::R_MIPS_LO16 && reloc_type != RelocType::R_MIPS_32) {
throw toml::parse_error("Invalid reloc entry type", reloc_el.source());
}
Reloc cur_reloc{};
cur_reloc.address = vram.value();
cur_reloc.section_offset = target_vram.value() - section.ram_addr;
cur_reloc.symbol_index = (uint32_t)-1;
cur_reloc.target_section = section_index;
cur_reloc.type = reloc_type;
section.relocs.emplace_back(cur_reloc);
}
RelocType reloc_type = reloc_type_from_name(type_string.value());
// TODO also accept MIPS32 for TLB relocations.
if (reloc_type != RelocType::R_MIPS_HI16 && reloc_type != RelocType::R_MIPS_LO16) {
throw toml::parse_error("Invalid reloc entry type", reloc_el.source());
else {
throw toml::parse_error("Invalid reloc entry", reloc_el.source());
}
Reloc cur_reloc{};
cur_reloc.address = vram.value();
cur_reloc.target_address = target_vram.value();
cur_reloc.symbol_index = (uint32_t)-1;
cur_reloc.target_section = section_index;
cur_reloc.type = reloc_type;
section.relocs.emplace_back(cur_reloc);
}
else {
throw toml::parse_error("Invalid reloc entry", reloc_el.source());
}
});
});
}
}
else {
section.relocatable = false;
@ -575,3 +636,148 @@ bool RecompPort::Context::from_symbol_file(const std::filesystem::path& symbol_f
out = std::move(ret);
return true;
}
void RecompPort::Context::import_reference_context(const RecompPort::Context& reference_context) {
reference_sections.resize(reference_context.sections.size());
reference_symbols.reserve(reference_context.functions.size());
reference_symbol_names.reserve(reference_context.functions.size());
// Copy the reference context's sections into the real context's reference sections.
for (size_t section_index = 0; section_index < reference_context.sections.size(); section_index++) {
const RecompPort::Section& section_in = reference_context.sections[section_index];
RecompPort::ReferenceSection& section_out = reference_sections[section_index];
section_out.rom_addr = section_in.rom_addr;
section_out.ram_addr = section_in.ram_addr;
section_out.size = section_in.size;
section_out.relocatable = section_in.relocatable;
}
// Copy the functions from the reference context into the reference context's function map.
for (const RecompPort::Function& func_in: reference_context.functions) {
const RecompPort::Section& func_section = reference_context.sections[func_in.section_index];
reference_symbols_by_name.emplace(func_in.name, reference_symbols.size());
reference_symbols.emplace_back(RecompPort::ReferenceSymbol{
.section_index = func_in.section_index,
.section_offset = func_in.vram - static_cast<uint32_t>(func_section.ram_addr),
.is_function = true
});
reference_symbol_names.emplace_back(func_in.name);
}
}
// Reads a data symbol file and adds its contents into this context's reference data symbols.
bool RecompPort::Context::read_data_reference_syms(const std::filesystem::path& data_syms_file_path) {
try {
const toml::table data_syms_file_data = toml::parse_file(data_syms_file_path.u8string());
const toml::node_view data_sections_value = data_syms_file_data["section"];
if (!data_sections_value.is_array()) {
return false;
}
// Create a mapping of rom address to section to ensure that the same section indexes are used for both function and data reference symbols.
std::unordered_map<uint32_t, uint16_t> ref_section_indices_by_vrom;
for (uint16_t section_index = 0; section_index < reference_sections.size(); section_index++) {
ref_section_indices_by_vrom.emplace(reference_sections[section_index].rom_addr, section_index);
}
const toml::array* data_sections = data_sections_value.as_array();
data_sections->for_each([this, &ref_section_indices_by_vrom](auto&& el) {
if constexpr (toml::is_table<decltype(el)>) {
std::optional<uint64_t> rom_addr = el["rom"].template value<uint64_t>();
std::optional<uint32_t> vram_addr = el["vram"].template value<uint32_t>();
std::optional<uint32_t> size = el["size"].template value<uint32_t>();
std::optional<std::string> name = el["name"].template value<std::string>();
if (!vram_addr.has_value() || !size.has_value() || !name.has_value()) {
throw toml::parse_error("Section entry missing required field(s)", el.source());
}
uint16_t ref_section_index;
if (!rom_addr.has_value()) {
ref_section_index = RecompPort::SectionAbsolute; // Non-relocatable bss section or absolute symbols, mark this as an absolute symbol
}
else if (rom_addr.value() > 0xFFFFFFFF) {
throw toml::parse_error("Section has invalid ROM address", el.source());
}
else {
// Find the matching section from the function reference symbol file to ensure
auto find_section_it = ref_section_indices_by_vrom.find(rom_addr.value());
if (find_section_it != ref_section_indices_by_vrom.end()) {
ref_section_index = find_section_it->second;
}
else {
ref_section_index = RecompPort::SectionAbsolute; // Not in the function symbol reference file, so this section can be treated as non-relocatable.
}
}
static ReferenceSection dummy_absolute_section {
.rom_addr = 0,
.ram_addr = 0,
.size = 0,
.relocatable = 0
};
const ReferenceSection& ref_section = ref_section_index == RecompPort::SectionAbsolute ? dummy_absolute_section : this->reference_sections[ref_section_index];
// Sanity check this section against the matching one in the function reference symbol file if one exists.
if (ref_section_index != RecompPort::SectionAbsolute) {
if (ref_section.ram_addr != vram_addr.value()) {
throw toml::parse_error("Section vram address differs from matching ROM address section in the function symbol reference file", el.source());
}
if (ref_section.size != size.value()) {
throw toml::parse_error("Section size address differs from matching ROM address section in the function symbol reference file", el.source());
}
}
// Read functions for the section.
const toml::node_view cur_symbols_value = el["symbols"];
if (!cur_symbols_value.is_array()) {
throw toml::parse_error("Invalid symbols array", cur_symbols_value.node()->source());
}
uint32_t ref_section_vram = ref_section.ram_addr;
const toml::array* cur_symbols = cur_symbols_value.as_array();
cur_symbols->for_each([this, ref_section_index, ref_section_vram](auto&& data_sym_el) {
if constexpr (toml::is_table<decltype(data_sym_el)>) {
std::optional<std::string> name = data_sym_el["name"].template value<std::string>();
std::optional<uint32_t> vram_addr = data_sym_el["vram"].template value<uint32_t>();
if (!name.has_value() || !vram_addr.has_value()) {
throw toml::parse_error("Reference data symbol entry is missing required field(s)", data_sym_el.source());
}
this->reference_symbols_by_name.emplace(name.value(), reference_symbols.size());
this->reference_symbols.emplace_back(
ReferenceSymbol {
.section_index = ref_section_index,
.section_offset = vram_addr.value() - ref_section_vram,
.is_function = false
}
);
this->reference_symbol_names.emplace_back(name.value());
}
else {
throw toml::parse_error("Invalid data symbol entry", data_sym_el.source());
}
});
} else {
throw toml::parse_error("Invalid section entry", el.source());
}
});
}
catch (const toml::parse_error& err) {
std::cerr << "Syntax error parsing toml: " << *err.source().path << " (" << err.source().begin << "):\n" << err.description() << std::endl;
return false;
}
return true;
}

View file

@ -672,11 +672,33 @@ std::unordered_set<std::string> renamed_funcs{
"_matherr",
};
bool read_symbols(RecompPort::Context& context, const ELFIO::elfio& elf_file, ELFIO::section* symtab_section, uint32_t entrypoint, bool has_entrypoint, bool use_absolute_symbols) {
struct DataSymbol {
uint32_t vram;
std::string name;
DataSymbol(uint32_t vram, std::string&& name) : vram(vram), name(std::move(name)) {}
};
bool read_symbols(RecompPort::Context& context, const ELFIO::elfio& elf_file, ELFIO::section* symtab_section, uint32_t entrypoint, bool has_entrypoint, bool use_absolute_symbols, bool dumping_context, std::unordered_map<uint16_t, std::vector<DataSymbol>>& data_syms) {
bool found_entrypoint_func = false;
ELFIO::symbol_section_accessor symbols{ elf_file, symtab_section };
fmt::print("Num symbols: {}\n", symbols.get_symbols_num());
std::unordered_map<uint16_t, uint16_t> bss_section_to_target_section{};
// Create a mapping of bss section to the corresponding non-bss section. This is only used when dumping context in order
// for patches and mods to correctly relocate symbols in bss. This mapping only matters for relocatable sections.
if (dumping_context) {
// Process bss and reloc sections
for (size_t cur_section_index = 0; cur_section_index < context.sections.size(); cur_section_index++) {
const RecompPort::Section& cur_section = context.sections[cur_section_index];
// Check if a bss section was found that corresponds with this section.
if (cur_section.bss_section_index != (uint16_t)-1) {
bss_section_to_target_section[cur_section.bss_section_index] = cur_section_index;
}
}
}
for (int sym_index = 0; sym_index < symbols.get_symbols_num(); sym_index++) {
std::string name;
ELFIO::Elf64_Addr value;
@ -687,6 +709,7 @@ bool read_symbols(RecompPort::Context& context, const ELFIO::elfio& elf_file, EL
unsigned char other;
bool ignored = false;
bool reimplemented = false;
bool recorded_symbol = false;
// Read symbol properties
symbols.get_symbol(sym_index, name, value, size, bind, type,
@ -709,105 +732,135 @@ bool read_symbols(RecompPort::Context& context, const ELFIO::elfio& elf_file, EL
continue;
}
if (section_index >= context.sections.size()) {
continue;
}
// Check if this symbol is the entrypoint
if (has_entrypoint && value == entrypoint && type == ELFIO::STT_FUNC) {
if (found_entrypoint_func) {
fmt::print(stderr, "Ambiguous entrypoint: {}\n", name);
return false;
}
found_entrypoint_func = true;
fmt::print("Found entrypoint, original name: {}\n", name);
size = 0x50; // dummy size for entrypoints, should cover them all
name = "recomp_entrypoint";
}
// Check if this symbol has a size override
auto size_find = context.manually_sized_funcs.find(name);
if (size_find != context.manually_sized_funcs.end()) {
size = size_find->second;
type = ELFIO::STT_FUNC;
}
if (reimplemented_funcs.contains(name)) {
reimplemented = true;
name = name + "_recomp";
ignored = true;
} else if (ignored_funcs.contains(name)) {
name = name + "_recomp";
ignored = true;
}
auto& section = context.sections[section_index];
// Check if this symbol is a function or has no type (like a regular glabel would)
// Symbols with no type have a dummy entry created so that their symbol can be looked up for function calls
if (ignored || type == ELFIO::STT_FUNC || type == ELFIO::STT_NOTYPE || type == ELFIO::STT_OBJECT) {
if (renamed_funcs.contains(name)) {
name = name + "_recomp";
ignored = false;
if (section_index < context.sections.size()) {
// Check if this symbol is the entrypoint
if (has_entrypoint && value == entrypoint && type == ELFIO::STT_FUNC) {
if (found_entrypoint_func) {
fmt::print(stderr, "Ambiguous entrypoint: {}\n", name);
return false;
}
found_entrypoint_func = true;
fmt::print("Found entrypoint, original name: {}\n", name);
size = 0x50; // dummy size for entrypoints, should cover them all
name = "recomp_entrypoint";
}
if (section_index < context.sections.size()) {
auto section_offset = value - elf_file.sections[section_index]->get_address();
const uint32_t* words = reinterpret_cast<const uint32_t*>(elf_file.sections[section_index]->get_data() + section_offset);
uint32_t vram = static_cast<uint32_t>(value);
uint32_t num_instructions = type == ELFIO::STT_FUNC ? size / 4 : 0;
uint32_t rom_address = static_cast<uint32_t>(section_offset + section.rom_addr);
// Check if this symbol has a size override
auto size_find = context.manually_sized_funcs.find(name);
if (size_find != context.manually_sized_funcs.end()) {
size = size_find->second;
type = ELFIO::STT_FUNC;
}
section.function_addrs.push_back(vram);
context.functions_by_vram[vram].push_back(context.functions.size());
if (!dumping_context) {
if (reimplemented_funcs.contains(name)) {
reimplemented = true;
name = name + "_recomp";
ignored = true;
} else if (ignored_funcs.contains(name)) {
name = name + "_recomp";
ignored = true;
}
}
// Find the entrypoint by rom address in case it doesn't have vram as its value
if (has_entrypoint && rom_address == 0x1000 && type == ELFIO::STT_FUNC) {
vram = entrypoint;
found_entrypoint_func = true;
name = "recomp_entrypoint";
if (size == 0) {
num_instructions = 0x50 / 4;
auto& section = context.sections[section_index];
// Check if this symbol is a function or has no type (like a regular glabel would)
// Symbols with no type have a dummy entry created so that their symbol can be looked up for function calls
if (ignored || type == ELFIO::STT_FUNC || type == ELFIO::STT_NOTYPE || type == ELFIO::STT_OBJECT) {
if (!dumping_context) {
if (renamed_funcs.contains(name)) {
name = name + "_recomp";
ignored = false;
}
}
// Suffix local symbols to prevent name conflicts.
if (bind == ELFIO::STB_LOCAL) {
name = fmt::format("{}_{:08X}", name, rom_address);
}
if (num_instructions > 0) {
context.section_functions[section_index].push_back(context.functions.size());
}
context.functions_by_name[name] = context.functions.size();
if (section_index < context.sections.size()) {
auto section_offset = value - elf_file.sections[section_index]->get_address();
const uint32_t* words = reinterpret_cast<const uint32_t*>(elf_file.sections[section_index]->get_data() + section_offset);
uint32_t vram = static_cast<uint32_t>(value);
uint32_t num_instructions = type == ELFIO::STT_FUNC ? size / 4 : 0;
uint32_t rom_address = static_cast<uint32_t>(section_offset + section.rom_addr);
std::vector<uint32_t> insn_words(num_instructions);
insn_words.assign(words, words + num_instructions);
section.function_addrs.push_back(vram);
context.functions_by_vram[vram].push_back(context.functions.size());
context.functions.emplace_back(
vram,
rom_address,
std::move(insn_words),
std::move(name),
section_index,
ignored,
reimplemented
);
} else {
uint32_t vram = static_cast<uint32_t>(value);
section.function_addrs.push_back(vram);
context.functions_by_vram[vram].push_back(context.functions.size());
context.functions.emplace_back(
vram,
0,
std::vector<uint32_t>{},
std::move(name),
section_index,
ignored,
reimplemented
);
// Find the entrypoint by rom address in case it doesn't have vram as its value
if (has_entrypoint && rom_address == 0x1000 && type == ELFIO::STT_FUNC) {
vram = entrypoint;
found_entrypoint_func = true;
name = "recomp_entrypoint";
if (size == 0) {
num_instructions = 0x50 / 4;
}
}
// Suffix local symbols to prevent name conflicts.
if (bind == ELFIO::STB_LOCAL) {
name = fmt::format("{}_{:08X}", name, rom_address);
}
if (num_instructions > 0) {
context.section_functions[section_index].push_back(context.functions.size());
recorded_symbol = true;
}
context.functions_by_name[name] = context.functions.size();
std::vector<uint32_t> insn_words(num_instructions);
insn_words.assign(words, words + num_instructions);
context.functions.emplace_back(
vram,
rom_address,
std::move(insn_words),
name,
section_index,
ignored,
reimplemented
);
} else {
// TODO is this case needed anymore?
uint32_t vram = static_cast<uint32_t>(value);
section.function_addrs.push_back(vram);
context.functions_by_vram[vram].push_back(context.functions.size());
context.functions.emplace_back(
vram,
0,
std::vector<uint32_t>{},
name,
section_index,
ignored,
reimplemented
);
}
}
}
// The symbol wasn't detected as a function, so add it to the data symbols if the context is being dumped.
if (!recorded_symbol && dumping_context && !name.empty()) {
uint32_t vram = static_cast<uint32_t>(value);
// Place this symbol in the absolute symbol list if it's in the absolute section.
uint16_t target_section_index = section_index;
if (section_index == ELFIO::SHN_ABS) {
target_section_index = RecompPort::SectionAbsolute;
}
else if (section_index >= context.sections.size()) {
fmt::print("Symbol \"{}\" not in a valid section ({})\n", name, section_index);
}
// Move this symbol into the corresponding non-bss section if it's in a bss section.
auto find_bss_it = bss_section_to_target_section.find(target_section_index);
if (find_bss_it != bss_section_to_target_section.end()) {
fmt::print("mapping {} to {}\n", context.sections[section_index].name, context.sections[find_bss_it->second].name);
target_section_index = find_bss_it->second;
}
data_syms[target_section_index].emplace_back(
vram,
std::move(name)
);
}
}
return found_entrypoint_func;
@ -945,8 +998,9 @@ ELFIO::section* read_sections(RecompPort::Context& context, const RecompPort::Co
std::string reloc_target_section = section_name.substr(strlen(".rel"));
// If this reloc section is for a section that has been marked as relocatable, record it in the reloc section lookup
if (context.relocatable_sections.contains(reloc_target_section)) {
// If this reloc section is for a section that has been marked as relocatable, record it in the reloc section lookup.
// Alternatively, if this recompilation uses reference symbols then record all reloc sections.
if (!context.reference_sections.empty() || context.relocatable_sections.contains(reloc_target_section)) {
reloc_sections_by_name[reloc_target_section] = section.get();
}
}
@ -1020,14 +1074,15 @@ ELFIO::section* read_sections(RecompPort::Context& context, const RecompPort::Co
// TODO make sure that a reloc section was found for every section marked as relocatable
// Process bss and reloc sections
for (RecompPort::Section &section_out : context.sections) {
for (size_t section_index = 0; section_index < context.sections.size(); section_index++) {
RecompPort::Section& section_out = context.sections[section_index];
// Check if a bss section was found that corresponds with this section
auto bss_find = bss_sections_by_name.find(section_out.name);
if (bss_find != bss_sections_by_name.end()) {
section_out.bss_section_index = bss_find->second->get_index();
}
if (section_out.relocatable) {
if (!context.reference_symbols.empty() || section_out.relocatable) {
// Check if a reloc section was found that corresponds with this section
auto reloc_find = reloc_sections_by_name.find(section_out.name);
if (reloc_find != reloc_sections_by_name.end()) {
@ -1053,8 +1108,9 @@ ELFIO::section* read_sections(RecompPort::Context& context, const RecompPort::Co
RecompPort::Reloc& reloc_out = section_out.relocs[i];
// Get the real full_immediate by extracting the immediate from the instruction
uint32_t instr_word = byteswap(*reinterpret_cast<const uint32_t*>(context.rom.data() + section_out.rom_addr + rel_offset - section_out.ram_addr));
rabbitizer::InstructionCpu instr{ instr_word, static_cast<uint32_t>(rel_offset) };
uint32_t reloc_rom_addr = section_out.rom_addr + rel_offset - section_out.ram_addr;
uint32_t reloc_rom_word = byteswap(*reinterpret_cast<const uint32_t*>(context.rom.data() + reloc_rom_addr));
rabbitizer::InstructionCpu instr{ reloc_rom_word, static_cast<uint32_t>(rel_offset) };
//context.rom section_out.rom_addr;
reloc_out.address = rel_offset;
@ -1072,10 +1128,48 @@ ELFIO::section* read_sections(RecompPort::Context& context, const RecompPort::Co
bool found_rel_symbol = symbol_accessor.get_symbol(
rel_symbol, rel_symbol_name, rel_symbol_value, rel_symbol_size, rel_symbol_bind, rel_symbol_type, rel_symbol_section_index, rel_symbol_other);
reloc_out.target_section = rel_symbol_section_index;
uint32_t rel_section_vram = section_out.ram_addr;
uint32_t rel_symbol_offset = 0;
// Check if the symbol is undefined and to know whether to look for it in the reference symbols.
if (rel_symbol_section_index == ELFIO::SHN_UNDEF) {
// Undefined sym, check the reference symbols.
auto sym_find_it = context.reference_symbols_by_name.find(rel_symbol_name);
if (sym_find_it == context.reference_symbols_by_name.end()) {
fmt::print(stderr, "Undefined symbol: {}, not found in input or reference symbols!\n",
rel_symbol_name);
return nullptr;
}
reloc_out.reference_symbol = true;
// Replace the reloc's symbol index with the index into the reference symbol array.
reloc_out.symbol_index = sym_find_it->second;
rel_section_vram = 0;
rel_symbol_offset = context.reference_symbols[reloc_out.symbol_index].section_offset;
reloc_out.target_section = context.reference_symbols[reloc_out.symbol_index].section_index;
bool target_section_relocatable = false;
if (reloc_out.target_section != RecompPort::SectionAbsolute && context.reference_sections[reloc_out.target_section].relocatable) {
target_section_relocatable = true;
}
if (reloc_out.type == RecompPort::RelocType::R_MIPS_32 && target_section_relocatable) {
fmt::print(stderr, "Cannot reference {} in a statically initialized variable as it's defined in a relocatable section!\n",
rel_symbol_name);
return nullptr;
}
}
else {
reloc_out.reference_symbol = false;
reloc_out.target_section = rel_symbol_section_index;
}
// Reloc pairing, see MIPS System V ABI documentation page 4-18 (https://refspecs.linuxfoundation.org/elf/mipsabi.pdf)
if (reloc_out.type == RecompPort::RelocType::R_MIPS_LO16) {
uint32_t rel_immediate = instr.getProcessedImmediate();
uint32_t full_immediate = (prev_hi_immediate << 16) + (int16_t)rel_immediate;
reloc_out.section_offset = full_immediate + rel_symbol_offset - rel_section_vram;
if (prev_hi) {
if (prev_hi_symbol != rel_symbol) {
fmt::print(stderr, "Paired HI16 and LO16 relocations have different symbols\n"
@ -1083,36 +1177,36 @@ ELFIO::section* read_sections(RecompPort::Context& context, const RecompPort::Co
i, section_out.name, reloc_out.symbol_index, reloc_out.address);
return nullptr;
}
uint32_t rel_immediate = instr.getProcessedImmediate();
uint32_t full_immediate = (prev_hi_immediate << 16) + (int16_t)rel_immediate;
// Set this and the previous HI16 relocs' relocated addresses
section_out.relocs[i - 1].target_address = full_immediate;
reloc_out.target_address = full_immediate;
// Set the previous HI16 relocs' relocated address.
section_out.relocs[i - 1].section_offset = reloc_out.section_offset;
}
else {
if (prev_lo) {
uint32_t rel_immediate = instr.getProcessedImmediate();
uint32_t full_immediate;
if (prev_hi_symbol != rel_symbol) {
fmt::print(stderr, "[WARN] LO16 reloc index {} in section {} referencing symbol {} with offset 0x{:08X} follows LO16 with different symbol\n",
// Orphaned LO16 reloc warnings.
if (config.unpaired_lo16_warnings) {
if (prev_lo) {
// Don't warn if multiple LO16 in a row reference the same symbol, as some linkers will use this behavior.
if (prev_hi_symbol != rel_symbol) {
fmt::print(stderr, "[WARN] LO16 reloc index {} in section {} referencing symbol {} with offset 0x{:08X} follows LO16 with different symbol\n",
i, section_out.name, reloc_out.symbol_index, reloc_out.address);
}
}
else {
fmt::print(stderr, "[WARN] Unpaired LO16 reloc index {} in section {} referencing symbol {} with offset 0x{:08X}\n",
i, section_out.name, reloc_out.symbol_index, reloc_out.address);
}
full_immediate = (prev_hi_immediate << 16) + (int16_t)rel_immediate;
reloc_out.target_address = full_immediate;
}
else {
fmt::print(stderr, "Unpaired LO16 reloc index {} in section {} referencing symbol {} with offset 0x{:08X}\n",
i, section_out.name, reloc_out.symbol_index, reloc_out.address);
return nullptr;
}
// Even though this is an orphaned LO16 reloc, the previous calculation for the addend still follows the MIPS System V ABI documentation:
// "R_MIPS_LO16 entries without an R_MIPS_HI16 entry immediately preceding are orphaned and the previously defined
// R_MIPS_HI16 is used for computing the addend."
// Therefore, nothing needs to be done to the section_offset member.
}
prev_lo = true;
} else {
if (prev_hi) {
// This is an invalid elf as the MIPS System V ABI documentation states:
// "Each relocation type of R_MIPS_HI16 must have an associated R_MIPS_LO16 entry
// immediately following it in the list of relocations."
fmt::print(stderr, "Unpaired HI16 reloc index {} in section {} referencing symbol {} with offset 0x{:08X}\n",
i - 1, section_out.name, section_out.relocs[i - 1].symbol_index, section_out.relocs[i - 1].address);
return nullptr;
@ -1130,7 +1224,25 @@ ELFIO::section* read_sections(RecompPort::Context& context, const RecompPort::Co
}
if (reloc_out.type == RecompPort::RelocType::R_MIPS_32) {
// Nothing to do here
// The reloc addend is just the existing word before relocation, so the section offset can just be the symbol's section offset.
// Incorporating the addend will be handled at load-time.
reloc_out.section_offset = rel_symbol_offset;
// TODO set section_out.has_mips32_relocs to true if this section should emit its mips32 relocs (mainly for TLB mapping).
if (reloc_out.reference_symbol) {
uint32_t reloc_target_section_addr = 0;
if (reloc_out.target_section != RecompPort::SectionAbsolute) {
reloc_target_section_addr = context.reference_sections[reloc_out.target_section].ram_addr;
}
// Patch the word in the ROM to incorporate the symbol's value.
uint32_t updated_reloc_word = reloc_rom_word + reloc_target_section_addr + reloc_out.section_offset;
*reinterpret_cast<uint32_t*>(context.rom.data() + reloc_rom_addr) = byteswap(updated_reloc_word);
}
}
if (reloc_out.type == RecompPort::RelocType::R_MIPS_26) {
uint32_t rel_immediate = instr.getProcessedImmediate();
reloc_out.section_offset = rel_immediate + rel_symbol_offset;
}
}
}
@ -1258,49 +1370,99 @@ std::vector<std::string> reloc_names {
"R_MIPS_GPREL16",
};
void dump_context(const RecompPort::Context& context, const std::filesystem::path& path) {
std::ofstream context_file {path};
void dump_context(const RecompPort::Context& context, const std::unordered_map<uint16_t, std::vector<DataSymbol>>& data_syms, const std::filesystem::path& func_path, const std::filesystem::path& data_path) {
std::ofstream func_context_file {func_path};
std::ofstream data_context_file {data_path};
fmt::print(func_context_file, "# Autogenerated from an ELF via N64Recomp\n");
fmt::print(data_context_file, "# Autogenerated from an ELF via N64Recomp\n");
for (size_t section_index = 0; section_index < context.sections.size(); section_index++) {
const RecompPort::Section& section = context.sections[section_index];
const std::vector<size_t>& section_funcs = context.section_functions[section_index];
if (!section_funcs.empty()) {
fmt::print(context_file,
"# Autogenerated from an ELF via N64Recomp\n"
auto print_section = [](std::ofstream& output_file, const std::string& name, uint64_t rom_addr, uint64_t ram_addr, uint64_t size) {
if (rom_addr == (uint64_t)-1) {
fmt::print(output_file,
"[[section]]\n"
"name = \"{}\"\n"
"vram = 0x{:08X}\n"
"size = 0x{:X}\n"
"\n",
name, ram_addr, size);
}
else {
fmt::print(output_file,
"[[section]]\n"
"name = \"{}\"\n"
"rom = 0x{:08X}\n"
"vram = 0x{:08X}\n"
"size = 0x{:X}\n"
"\n",
section.name, section.rom_addr, section.ram_addr, section.size);
name, rom_addr, ram_addr, size);
}
};
for (size_t section_index = 0; section_index < context.sections.size(); section_index++) {
const RecompPort::Section& section = context.sections[section_index];
const std::vector<size_t>& section_funcs = context.section_functions[section_index];
if (!section_funcs.empty()) {
print_section(func_context_file, section.name, section.rom_addr, section.ram_addr, section.size);
// Dump relocs into the function context file.
if (!section.relocs.empty()) {
fmt::print(context_file, "relocs = [\n");
fmt::print(func_context_file, "relocs = [\n");
for (const RecompPort::Reloc& reloc : section.relocs) {
if (reloc.target_section == section_index || reloc.target_section == section.bss_section_index) {
// TODO allow MIPS32 relocs for TLB mapping support.
// TODO allow emitting MIPS32 relocs for specific sections via a toml option for TLB mapping support.
if (reloc.type == RecompPort::RelocType::R_MIPS_HI16 || reloc.type == RecompPort::RelocType::R_MIPS_LO16) {
fmt::print(context_file, " {{ type = \"{}\", vram = 0x{:08X}, target_vram = 0x{:08X} }},\n",
reloc_names[static_cast<int>(reloc.type)], reloc.address, reloc.target_address);
fmt::print(func_context_file, " {{ type = \"{}\", vram = 0x{:08X}, target_vram = 0x{:08X} }},\n",
reloc_names[static_cast<int>(reloc.type)], reloc.address, reloc.section_offset + section.ram_addr);
}
}
}
fmt::print(context_file, "]\n\n");
fmt::print(func_context_file, "]\n\n");
}
fmt::print(context_file, "functions = [\n");
// Dump functions into the function context file.
fmt::print(func_context_file, "functions = [\n");
for (const size_t& function_index : section_funcs) {
const RecompPort::Function& func = context.functions[function_index];
fmt::print(context_file, " {{ name = \"{}\", vram = 0x{:08X}, size = 0x{:X} }},\n",
fmt::print(func_context_file, " {{ name = \"{}\", vram = 0x{:08X}, size = 0x{:X} }},\n",
func.name, func.vram, func.words.size() * sizeof(func.words[0]));
}
fmt::print(context_file, "]\n\n");
fmt::print(func_context_file, "]\n\n");
}
const auto find_syms_it = data_syms.find((uint16_t)section_index);
if (find_syms_it != data_syms.end() && !find_syms_it->second.empty()) {
if (section.name.ends_with(".bss")) {
fmt::print("asdasd {}\n", section.name);
}
print_section(data_context_file, section.name, section.rom_addr, section.ram_addr, section.size);
// Dump other symbols into the data context file.
fmt::print(data_context_file, "symbols = [\n");
for (const DataSymbol& cur_sym : find_syms_it->second) {
fmt::print(data_context_file, " {{ name = \"{}\", vram = 0x{:08X} }},\n", cur_sym.name, cur_sym.vram);
}
fmt::print(data_context_file, "]\n\n");
}
}
const auto find_abs_syms_it = data_syms.find((uint16_t)-1);
if (find_abs_syms_it != data_syms.end() && !find_abs_syms_it->second.empty()) {
// Dump absolute symbols into the data context file.
print_section(data_context_file, "ABSOLUTE_SYMS", (uint64_t)-1, 0, 0);
fmt::print(data_context_file, "symbols = [\n");
for (const DataSymbol& cur_sym : find_abs_syms_it->second) {
fmt::print(data_context_file, " {{ name = \"{}\", vram = 0x{:08X} }},\n", cur_sym.name, cur_sym.vram);
}
fmt::print(data_context_file, "]\n\n");
}
}
@ -1326,6 +1488,9 @@ int main(int argc, char** argv) {
std::exit(EXIT_FAILURE);
};
// TODO expose a way to dump the context from the command line.
bool dumping_context = false;
if (argc != 2) {
fmt::print("Usage: {} [config file]\n", argv[0]);
std::exit(EXIT_SUCCESS);
@ -1380,6 +1545,27 @@ int main(int argc, char** argv) {
context = { elf_file };
context.relocatable_sections = std::move(relocatable_sections);
// Import symbols from any reference symbols files that were provided.
if (!config.func_reference_syms_file_path.empty()) {
{
// Create a new temporary context to read the function reference symbol file into, since it's the same format as the recompilation symbol file.
std::vector<uint8_t> dummy_rom{};
RecompPort::Context reference_context{};
if (!RecompPort::Context::from_symbol_file(config.func_reference_syms_file_path, std::move(dummy_rom), reference_context, false)) {
exit_failure("Failed to load provided function reference symbol file\n");
}
// Use the reference context to build a reference symbol list for the actual context.
context.import_reference_context(reference_context);
}
for (const std::filesystem::path& cur_data_sym_path : config.data_reference_syms_file_paths) {
if (!context.read_data_reference_syms(cur_data_sym_path)) {
exit_failure(fmt::format("Failed to load provided data reference symbol file: {}\n", cur_data_sym_path.string()));
}
}
}
// Read all of the sections in the elf and look for the symbol table section
ELFIO::section* symtab_section = read_sections(context, config, elf_file);
@ -1396,8 +1582,11 @@ int main(int argc, char** argv) {
context.manually_sized_funcs.emplace(func_size.func_name, func_size.size_bytes);
}
// Lists of data symbols organized by section, only used if dumping context.
std::unordered_map<uint16_t, std::vector<DataSymbol>> data_syms;
// Read all of the symbols in the elf and look for the entrypoint function
bool found_entrypoint_func = read_symbols(context, elf_file, symtab_section, config.entrypoint, config.has_entrypoint, config.use_absolute_symbols);
bool found_entrypoint_func = read_symbols(context, elf_file, symtab_section, config.entrypoint, config.has_entrypoint, config.use_absolute_symbols, dumping_context, data_syms);
// Add any manual functions
add_manual_functions(context, elf_file, config.manual_functions);
@ -1405,6 +1594,21 @@ int main(int argc, char** argv) {
if (config.has_entrypoint && !found_entrypoint_func) {
exit_failure("Could not find entrypoint function\n");
}
if (dumping_context) {
fmt::print("Dumping context\n");
// Sort the data syms by address so the output is nicer.
for (auto& [section_index, section_syms] : data_syms) {
std::sort(section_syms.begin(), section_syms.end(),
[](const DataSymbol& a, const DataSymbol& b) {
return a.vram < b.vram;
}
);
}
dump_context(context, data_syms, "dump.toml", "data_dump.toml");
return 0;
}
}
// Build a context from the provided symbols file.
else if (!config.symbols_file_path.empty()) {
@ -1412,12 +1616,16 @@ int main(int argc, char** argv) {
exit_failure("A ROM file must be provided when using a symbols file\n");
}
if (dumping_context) {
exit_failure("Cannot dump context when using a symbols file\n");
}
std::vector<uint8_t> rom = read_file(config.rom_file_path);
if (rom.empty()) {
exit_failure("Failed to load ROM file: " + config.rom_file_path.string() + "\n");
}
if (!RecompPort::Context::from_symbol_file(config.symbols_file_path, std::move(rom), context)) {
if (!RecompPort::Context::from_symbol_file(config.symbols_file_path, std::move(rom), context, true)) {
exit_failure("Failed to load symbols file\n");
}
@ -1485,11 +1693,6 @@ int main(int argc, char** argv) {
std::vector<std::vector<uint32_t>> static_funcs_by_section{ context.sections.size() };
// TODO expose a way to dump the context from the command line. Make sure not to rename functions when doing so.
//fmt::print("Dumping context\n");
//dump_context(context, "dump.toml");
//return 0;
fmt::print("Working dir: {}\n", std::filesystem::current_path().string());
// Stub out any functions specified in the config file.
@ -1738,14 +1941,14 @@ int main(int argc, char** argv) {
const auto& section = context.sections[section_index];
const auto& section_funcs = context.section_functions[section_index];
if (!section_funcs.empty()) {
if (section.has_mips32_relocs || !section_funcs.empty()) {
std::string_view section_name_trimmed{ section.name };
if (section.relocatable) {
relocatable_section_indices.emplace(section.name, written_sections);
}
while (section_name_trimmed[0] == '.') {
while (section_name_trimmed.size() > 0 && section_name_trimmed[0] == '.') {
section_name_trimmed.remove_prefix(1);
}
@ -1797,5 +2000,10 @@ int main(int argc, char** argv) {
fmt::print(overlay_file, "}};\n");
}
if (!config.output_binary_path.empty()) {
std::ofstream output_binary{config.output_binary_path, std::ios::binary};
output_binary.write(reinterpret_cast<const char*>(context.rom.data()), context.rom.size());
}
return 0;
}

View file

@ -56,26 +56,61 @@ bool process_instruction(const RecompPort::Context& context, const RecompPort::C
RecompPort::RelocType reloc_type = RecompPort::RelocType::R_MIPS_NONE;
uint32_t reloc_section = 0;
uint32_t reloc_target_section_offset = 0;
size_t reloc_reference_symbol = (size_t)-1;
uint32_t func_vram_end = func.vram + func.words.size() * sizeof(func.words[0]);
uint16_t imm = instr.Get_immediate();
// Check if this instruction has a reloc.
if (section.relocatable && section.relocs.size() > 0 && section.relocs[reloc_index].address == instr_vram) {
if (section.relocs.size() > 0 && section.relocs[reloc_index].address == instr_vram) {
// Get the reloc data for this instruction
const auto& reloc = section.relocs[reloc_index];
reloc_section = reloc.target_section;
// Some symbols are in a nonexistent section (e.g. absolute symbols), so check that the section is valid before doing anything else.
// Absolute symbols will never need to be relocated so it's safe to skip this.
if (reloc_section < context.sections.size()) {
// Ignore this reloc if it points to a different section.
// Also check if the reloc points to the bss section since that will also be relocated with the section.
if (reloc_section == func.section_index || reloc_section == section.bss_section_index) {
// Record the reloc's data.
reloc_type = reloc.type;
reloc_target_section_offset = reloc.target_address - section.ram_addr;
// Ignore all relocs that aren't HI16 or LO16.
if (reloc_type == RecompPort::RelocType::R_MIPS_HI16 || reloc_type == RecompPort::RelocType::R_MIPS_LO16) {
at_reloc = true;
// Only process this relocation if this section is relocatable or if this relocation targets a reference symbol.
if (section.relocatable || reloc.reference_symbol) {
// Some symbols are in a nonexistent section (e.g. absolute symbols), so check that the section is valid before doing anything else.
// Absolute symbols will never need to be relocated so it's safe to skip this.
// Always process reference symbols relocations.
if (reloc_section < context.sections.size() || reloc.reference_symbol) {
// Ignore this reloc if it points to a different section.
// Also check if the reloc points to the bss section since that will also be relocated with the section.
// Additionally, always process reference symbol relocations.
if (reloc_section == func.section_index || reloc_section == section.bss_section_index || reloc.reference_symbol) {
// Record the reloc's data.
reloc_type = reloc.type;
reloc_target_section_offset = reloc.section_offset;
// Ignore all relocs that aren't HI16 or LO16.
if (reloc_type == RecompPort::RelocType::R_MIPS_HI16 || reloc_type == RecompPort::RelocType::R_MIPS_LO16 || reloc_type == RecompPort::RelocType::R_MIPS_26) {
at_reloc = true;
if (reloc.reference_symbol) {
reloc_reference_symbol = reloc.symbol_index;
static RecompPort::ReferenceSection dummy_section{
.rom_addr = 0,
.ram_addr = 0,
.size = 0,
.relocatable = false
};
const auto& reloc_reference_section = reloc.target_section == RecompPort::SectionAbsolute ? dummy_section : context.reference_sections[reloc.target_section];
if (!reloc_reference_section.relocatable) {
at_reloc = false;
uint32_t full_immediate = reloc.section_offset + reloc_reference_section.ram_addr;
if (reloc_type == RecompPort::RelocType::R_MIPS_HI16) {
imm = (full_immediate >> 16) + ((full_immediate >> 15) & 1);
}
else if (reloc_type == RecompPort::RelocType::R_MIPS_LO16) {
imm = full_immediate & 0xFFFF;
}
}
}
}
// Repoint bss relocations at their non-bss counterpart section.
if (reloc_section == section.bss_section_index) {
reloc_section = func.section_index;
}
}
}
}
@ -112,70 +147,90 @@ bool process_instruction(const RecompPort::Context& context, const RecompPort::C
}
};
auto print_func_call = [&](uint32_t target_func_vram, bool link_branch = true, bool indent = false) {
const auto matching_funcs_find = context.functions_by_vram.find(target_func_vram);
auto print_func_call = [reloc_target_section_offset, reloc_reference_symbol, reloc_type, &context, &section, &func, &static_funcs_out, &needs_link_branch, &print_unconditional_branch]
(uint32_t target_func_vram, bool link_branch = true, bool indent = false)
{
std::string jal_target_name;
uint32_t section_vram_start = section.ram_addr;
uint32_t section_vram_end = section.ram_addr + section.size;
// TODO the current section should be prioritized if the target jal is in its vram even if a function isn't known (i.e. static)
if (matching_funcs_find != context.functions_by_vram.end()) {
// If we found matches for the target function by vram,
const auto& matching_funcs_vec = matching_funcs_find->second;
size_t real_func_index;
bool ambiguous;
// If there is more than one corresponding function, look for any that have a nonzero size.
if (matching_funcs_vec.size() > 1) {
size_t nonzero_func_index = (size_t)-1;
bool found_nonzero_func = false;
for (size_t cur_func_index : matching_funcs_vec) {
const auto& cur_func = context.functions[cur_func_index];
if (cur_func.words.size() != 0) {
if (found_nonzero_func) {
ambiguous = true;
break;
}
// If this section is relocatable and the target vram is in the section, don't call functions
// in any section other than this one.
if (cur_func.section_index == func.section_index ||
!(section.relocatable && target_func_vram >= section_vram_start && target_func_vram < section_vram_end)) {
found_nonzero_func = true;
nonzero_func_index = cur_func_index;
}
}
}
if (nonzero_func_index == (size_t)-1) {
fmt::print(stderr, "[Warn] Potential jal resolution ambiguity\n");
for (size_t cur_func_index : matching_funcs_vec) {
fmt::print(stderr, " {}\n", context.functions[cur_func_index].name);
}
nonzero_func_index = 0;
}
real_func_index = nonzero_func_index;
ambiguous = false;
}
else {
real_func_index = matching_funcs_vec.front();
ambiguous = false;
}
if (ambiguous) {
fmt::print(stderr, "Ambiguous jal target: 0x{:08X}\n", target_func_vram);
for (size_t cur_func_index : matching_funcs_vec) {
const auto& cur_func = context.functions[cur_func_index];
fmt::print(stderr, " {}\n", cur_func.name);
}
if (reloc_reference_symbol != (size_t)-1) {
const auto& ref_symbol = context.reference_symbols[reloc_reference_symbol];
const std::string& ref_symbol_name = context.reference_symbol_names[reloc_reference_symbol];
if (reloc_type != RecompPort::RelocType::R_MIPS_26) {
fmt::print(stderr, "Unsupported reloc type {} on jal instruction in {}\n", (int)reloc_type, func.name);
return false;
}
jal_target_name = context.functions[real_func_index].name;
if (ref_symbol.section_offset != reloc_target_section_offset) {
fmt::print(stderr, "Function {} uses a MIPS_R_26 addend, which is not supported yet\n", func.name);
return false;
}
jal_target_name = ref_symbol_name;
}
else {
const auto& section = context.sections[func.section_index];
if (target_func_vram >= section.ram_addr && target_func_vram < section.ram_addr + section.size) {
jal_target_name = fmt::format("static_{}_{:08X}", func.section_index, target_func_vram);
static_funcs_out[func.section_index].push_back(target_func_vram);
const auto matching_funcs_find = context.functions_by_vram.find(target_func_vram);
uint32_t section_vram_start = section.ram_addr;
uint32_t section_vram_end = section.ram_addr + section.size;
// TODO the current section should be prioritized if the target jal is in its vram even if a function isn't known (i.e. static)
if (matching_funcs_find != context.functions_by_vram.end()) {
// If we found matches for the target function by vram,
const auto& matching_funcs_vec = matching_funcs_find->second;
size_t real_func_index;
bool ambiguous;
// If there is more than one corresponding function, look for any that have a nonzero size.
if (matching_funcs_vec.size() > 1) {
size_t nonzero_func_index = (size_t)-1;
bool found_nonzero_func = false;
for (size_t cur_func_index : matching_funcs_vec) {
const auto& cur_func = context.functions[cur_func_index];
if (cur_func.words.size() != 0) {
if (found_nonzero_func) {
ambiguous = true;
break;
}
// If this section is relocatable and the target vram is in the section, don't call functions
// in any section other than this one.
if (cur_func.section_index == func.section_index ||
!(section.relocatable && target_func_vram >= section_vram_start && target_func_vram < section_vram_end)) {
found_nonzero_func = true;
nonzero_func_index = cur_func_index;
}
}
}
if (nonzero_func_index == (size_t)-1) {
fmt::print(stderr, "[Warn] Potential jal resolution ambiguity\n");
for (size_t cur_func_index : matching_funcs_vec) {
fmt::print(stderr, " {}\n", context.functions[cur_func_index].name);
}
nonzero_func_index = 0;
}
real_func_index = nonzero_func_index;
ambiguous = false;
}
else {
real_func_index = matching_funcs_vec.front();
ambiguous = false;
}
if (ambiguous) {
fmt::print(stderr, "Ambiguous jal target: 0x{:08X}\n", target_func_vram);
for (size_t cur_func_index : matching_funcs_vec) {
const auto& cur_func = context.functions[cur_func_index];
fmt::print(stderr, " {}\n", cur_func.name);
}
return false;
}
jal_target_name = context.functions[real_func_index].name;
}
else {
fmt::print(stderr, "No function found for jal target: 0x{:08X}\n", target_func_vram);
return false;
const auto& section = context.sections[func.section_index];
if (target_func_vram >= section.ram_addr && target_func_vram < section.ram_addr + section.size) {
jal_target_name = fmt::format("static_{}_{:08X}", func.section_index, target_func_vram);
static_funcs_out[func.section_index].push_back(target_func_vram);
}
else {
fmt::print(stderr, "No function found for jal target: 0x{:08X}\n", target_func_vram);
return false;
}
}
}
needs_link_branch = link_branch;
@ -238,8 +293,6 @@ bool process_instruction(const RecompPort::Context& context, const RecompPort::C
int cop1_cs = (int)instr.Get_cop1cs();
uint16_t imm = instr.Get_immediate();
std::string unsigned_imm_string;
std::string signed_imm_string;
@ -249,15 +302,19 @@ bool process_instruction(const RecompPort::Context& context, const RecompPort::C
} else {
switch (reloc_type) {
case RecompPort::RelocType::R_MIPS_HI16:
unsigned_imm_string = fmt::format("RELOC_HI16({}, {:#X})", (uint32_t)func.section_index, reloc_target_section_offset);
unsigned_imm_string = fmt::format("RELOC_HI16({}, {:#X})", reloc_section, reloc_target_section_offset);
signed_imm_string = "(int16_t)" + unsigned_imm_string;
reloc_handled = true;
break;
case RecompPort::RelocType::R_MIPS_LO16:
unsigned_imm_string = fmt::format("RELOC_LO16({}, {:#X})", (uint32_t)func.section_index, reloc_target_section_offset);
unsigned_imm_string = fmt::format("RELOC_LO16({}, {:#X})", reloc_section, reloc_target_section_offset);
signed_imm_string = "(int16_t)" + unsigned_imm_string;
reloc_handled = true;
break;
case RecompPort::RelocType::R_MIPS_26:
// Nothing to do here, this will be handled by print_func_call.
reloc_handled = true;
break;
default:
throw std::runtime_error(fmt::format("Unexpected reloc type {} in {}\n", static_cast<int>(reloc_type), func.name));
}
@ -440,10 +497,10 @@ bool process_instruction(const RecompPort::Context& context, const RecompPort::C
print_line("{}{} = hi", ctx_gpr_prefix(rd), rd);
break;
case InstrId::cpu_mtlo:
print_line("lo = {}{}", ctx_gpr_prefix(rd), rd);
print_line("lo = {}{}", ctx_gpr_prefix(rs), rs);
break;
case InstrId::cpu_mthi:
print_line("hi = {}{}", ctx_gpr_prefix(rd), rd);
print_line("hi = {}{}", ctx_gpr_prefix(rs), rs);
break;
// Loads
case InstrId::cpu_ld:
@ -1166,7 +1223,6 @@ bool RecompPort::recompile_function(const RecompPort::Context& context, const Re
bool needs_link_branch = false;
bool in_likely_delay_slot = false;
const auto& section = context.sections[func.section_index];
bool needs_reloc = section.relocatable && section.relocs.size() > 0;
size_t reloc_index = 0;
for (size_t instr_index = 0; instr_index < instructions.size(); ++instr_index) {
bool had_link_branch = needs_link_branch;
@ -1181,11 +1237,9 @@ bool RecompPort::recompile_function(const RecompPort::Context& context, const Re
++cur_label;
}
// If this is a relocatable section, advance the reloc index until we reach the last one or until we get to/pass the current instruction
if (needs_reloc) {
while (reloc_index < (section.relocs.size() - 1) && section.relocs[reloc_index].address < vram) {
reloc_index++;
}
// Advance the reloc index until we reach the last one or until we get to/pass the current instruction
while ((reloc_index + 1) < section.relocs.size() && section.relocs[reloc_index].address < vram) {
reloc_index++;
}
// Process the current instruction and check for errors