From cfe49fbfba4e3c86111c3653742df583b1ead3d1 Mon Sep 17 00:00:00 2001 From: Paris Oplopoios Date: Tue, 3 Sep 2024 20:41:35 +0300 Subject: [PATCH] Preserve flags on some patched instructions (#720) * Preserve flags on some patched instructions * Move flag saving to Save/RestoreContext --- src/core/cpu_patches.cpp | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/src/core/cpu_patches.cpp b/src/core/cpu_patches.cpp index 07253b46..0160d63a 100644 --- a/src/core/cpu_patches.cpp +++ b/src/core/cpu_patches.cpp @@ -212,31 +212,38 @@ static void RestoreRegisters(Xbyak::CodeGenerator& c, } /// Switches to the patch stack and stores all registers. -static void SaveContext(Xbyak::CodeGenerator& c) { +static void SaveContext(Xbyak::CodeGenerator& c, bool save_flags = false) { SaveStack(c); for (int reg = Xbyak::Operand::RAX; reg <= Xbyak::Operand::R15; reg++) { c.push(Xbyak::Reg64(reg)); } for (int reg = 0; reg <= 7; reg++) { - c.sub(rsp, 32); + c.lea(rsp, ptr[rsp - 32]); c.vmovdqu(ptr[rsp], Xbyak::Ymm(reg)); } + if (save_flags) { + c.pushfq(); + } } /// Restores all registers and restores the original stack. /// If the destination is a register, it is not restored to preserve the output. -static void RestoreContext(Xbyak::CodeGenerator& c, const Xbyak::Operand& dst) { +static void RestoreContext(Xbyak::CodeGenerator& c, const Xbyak::Operand& dst, + bool restore_flags = false) { + if (restore_flags) { + c.popfq(); + } for (int reg = 7; reg >= 0; reg--) { if ((!dst.isXMM() && !dst.isYMM()) || dst.getIdx() != reg) { c.vmovdqu(Xbyak::Ymm(reg), ptr[rsp]); } - c.add(rsp, 32); + c.lea(rsp, ptr[rsp + 32]); } for (int reg = Xbyak::Operand::R15; reg >= Xbyak::Operand::RAX; reg--) { if (!dst.isREG() || dst.getIdx() != reg) { c.pop(Xbyak::Reg64(reg)); } else { - c.add(rsp, 8); + c.lea(rsp, ptr[rsp + 8]); } } RestoreStack(c); @@ -376,7 +383,7 @@ static void GenerateVCVTPH2PS(const ZydisDecodedOperand* operands, Xbyak::CodeGe const auto float_count = dst.getBit() / 32; const auto byte_count = float_count * 4; - SaveContext(c); + SaveContext(c, true); // Allocate stack space for outputs and load into first parameter. c.sub(rsp, byte_count); @@ -412,7 +419,7 @@ static void GenerateVCVTPH2PS(const ZydisDecodedOperand* operands, Xbyak::CodeGe } c.add(rsp, byte_count); - RestoreContext(c, dst); + RestoreContext(c, dst, true); } using SingleToHalfFloatConverter = half_float::half (*)(float); @@ -440,7 +447,7 @@ static void GenerateVCVTPS2PH(const ZydisDecodedOperand* operands, Xbyak::CodeGe const auto float_count = src.getBit() / 32; const auto byte_count = float_count * 4; - SaveContext(c); + SaveContext(c, true); if (dst->isXMM()) { // Allocate stack space for outputs and load into first parameter. @@ -487,7 +494,7 @@ static void GenerateVCVTPS2PH(const ZydisDecodedOperand* operands, Xbyak::CodeGe c.add(rsp, byte_count); } - RestoreContext(c, *dst); + RestoreContext(c, *dst, true); } static bool FilterRosetta2Only(const ZydisDecodedOperand*) {