From 8e119a1e9694416289c4c4c7ed38c408c096f9a2 Mon Sep 17 00:00:00 2001
From: gdkchan <gab.dark.100@gmail.com>
Date: Tue, 13 Sep 2022 19:51:40 -0300
Subject: [PATCH] Implement PLD and SUB (imm16) on T32, plus UADD8, SADD8,
 USUB8 and SSUB8 on both A32 and T32 (#3693)

---
 ARMeilleure/Decoders/OpCodeTable.cs       | 52 ++++++++++----
 ARMeilleure/Instructions/InstEmitAlu32.cs | 88 ++++++++++++++++++++---
 ARMeilleure/Instructions/InstName.cs      |  5 ++
 ARMeilleure/State/PState.cs               |  4 ++
 Ryujinx.Tests/Cpu/CpuTest32.cs            |  4 ++
 Ryujinx.Tests/Cpu/CpuTestAlu32.cs         | 56 ++++++++++-----
 6 files changed, 168 insertions(+), 41 deletions(-)

diff --git a/ARMeilleure/Decoders/OpCodeTable.cs b/ARMeilleure/Decoders/OpCodeTable.cs
index 6484eaf56..faeb9b5ae 100644
--- a/ARMeilleure/Decoders/OpCodeTable.cs
+++ b/ARMeilleure/Decoders/OpCodeTable.cs
@@ -727,11 +727,13 @@ namespace ARMeilleure.Decoders
             SetA32("<<<<0010111xxxxxxxxxxxxxxxxxxxxx", InstName.Rsc,     InstEmit32.Rsc,     OpCode32AluImm.Create);
             SetA32("<<<<0000111xxxxxxxxxxxxxxxx0xxxx", InstName.Rsc,     InstEmit32.Rsc,     OpCode32AluRsImm.Create);
             SetA32("<<<<0000111xxxxxxxxxxxxx0xx1xxxx", InstName.Rsc,     InstEmit32.Rsc,     OpCode32AluRsReg.Create);
+            SetA32("<<<<01100001xxxxxxxx11111001xxxx", InstName.Sadd8,   InstEmit32.Sadd8,   OpCode32AluReg.Create);
             SetA32("<<<<0010110xxxxxxxxxxxxxxxxxxxxx", InstName.Sbc,     InstEmit32.Sbc,     OpCode32AluImm.Create);
             SetA32("<<<<0000110xxxxxxxxxxxxxxxx0xxxx", InstName.Sbc,     InstEmit32.Sbc,     OpCode32AluRsImm.Create);
             SetA32("<<<<0000110xxxxxxxxxxxxx0xx1xxxx", InstName.Sbc,     InstEmit32.Sbc,     OpCode32AluRsReg.Create);
             SetA32("<<<<0111101xxxxxxxxxxxxxx101xxxx", InstName.Sbfx,    InstEmit32.Sbfx,    OpCode32AluBf.Create);
             SetA32("<<<<01110001xxxx1111xxxx0001xxxx", InstName.Sdiv,    InstEmit32.Sdiv,    OpCode32AluMla.Create);
+            SetA32("<<<<01101000xxxxxxxx11111011xxxx", InstName.Sel,     InstEmit32.Sel,     OpCode32AluReg.Create);
             SetA32("<<<<01100011xxxxxxxx11111001xxxx", InstName.Shadd8,  InstEmit32.Shadd8,  OpCode32AluReg.Create);
             SetA32("<<<<01100011xxxxxxxx11111111xxxx", InstName.Shsub8,  InstEmit32.Shsub8,  OpCode32AluReg.Create);
             SetA32("<<<<00010000xxxxxxxxxxxx1xx0xxxx", InstName.Smla__,  InstEmit32.Smla__,  OpCode32AluMla.Create);
@@ -745,6 +747,7 @@ namespace ARMeilleure.Decoders
             SetA32("<<<<00010010xxxx0000xxxx1x10xxxx", InstName.Smulw_,  InstEmit32.Smulw_,  OpCode32AluMla.Create);
             SetA32("<<<<0110101xxxxxxxxxxxxxxx01xxxx", InstName.Ssat,    InstEmit32.Ssat,    OpCode32Sat.Create);
             SetA32("<<<<01101010xxxxxxxx11110011xxxx", InstName.Ssat16,  InstEmit32.Ssat16,  OpCode32Sat16.Create);
+            SetA32("<<<<01100001xxxxxxxx11111111xxxx", InstName.Ssub8,   InstEmit32.Ssub8,   OpCode32AluReg.Create);
             SetA32("<<<<00011000xxxx111111001001xxxx", InstName.Stl,     InstEmit32.Stl,     OpCode32MemStEx.Create);
             SetA32("<<<<00011100xxxx111111001001xxxx", InstName.Stlb,    InstEmit32.Stlb,    OpCode32MemStEx.Create);
             SetA32("<<<<00011000xxxxxxxx11101001xxxx", InstName.Stlex,   InstEmit32.Stlex,   OpCode32MemStEx.Create);
@@ -779,6 +782,7 @@ namespace ARMeilleure.Decoders
             SetA32("<<<<00110001xxxx0000xxxxxxxxxxxx", InstName.Tst,     InstEmit32.Tst,     OpCode32AluImm.Create);
             SetA32("<<<<00010001xxxx0000xxxxxxx0xxxx", InstName.Tst,     InstEmit32.Tst,     OpCode32AluRsImm.Create);
             SetA32("<<<<00010001xxxx0000xxxx0xx1xxxx", InstName.Tst,     InstEmit32.Tst,     OpCode32AluRsReg.Create);
+            SetA32("<<<<01100101xxxxxxxx11111001xxxx", InstName.Uadd8,   InstEmit32.Uadd8,   OpCode32AluReg.Create);
             SetA32("<<<<0111111xxxxxxxxxxxxxx101xxxx", InstName.Ubfx,    InstEmit32.Ubfx,    OpCode32AluBf.Create);
             SetA32("<<<<01110011xxxx1111xxxx0001xxxx", InstName.Udiv,    InstEmit32.Udiv,    OpCode32AluMla.Create);
             SetA32("<<<<01100111xxxxxxxx11111001xxxx", InstName.Uhadd8,  InstEmit32.Uhadd8,  OpCode32AluReg.Create);
@@ -788,6 +792,7 @@ namespace ARMeilleure.Decoders
             SetA32("<<<<0000100xxxxxxxxxxxxx1001xxxx", InstName.Umull,   InstEmit32.Umull,   OpCode32AluUmull.Create);
             SetA32("<<<<0110111xxxxxxxxxxxxxxx01xxxx", InstName.Usat,    InstEmit32.Usat,    OpCode32Sat.Create);
             SetA32("<<<<01101110xxxxxxxx11110011xxxx", InstName.Usat16,  InstEmit32.Usat16,  OpCode32Sat16.Create);
+            SetA32("<<<<01100101xxxxxxxx11111111xxxx", InstName.Usub8,   InstEmit32.Usub8,   OpCode32AluReg.Create);
             SetA32("<<<<01101110xxxxxxxxxx000111xxxx", InstName.Uxtb,    InstEmit32.Uxtb,    OpCode32AluUx.Create);
             SetA32("<<<<01101100xxxxxxxxxx000111xxxx", InstName.Uxtb16,  InstEmit32.Uxtb16,  OpCode32AluUx.Create);
             SetA32("<<<<01101111xxxxxxxxxx000111xxxx", InstName.Uxth,    InstEmit32.Uxth,    OpCode32AluUx.Create);
@@ -1128,26 +1133,26 @@ namespace ARMeilleure.Decoders
             SetT32("111110001101xxxxxxxxxxxxxxxxxxxx", InstName.Ldr,      InstEmit32.Ldr,      OpCodeT32MemImm12.Create);
             SetT32("111110000101<<<<xxxx000000xxxxxx", InstName.Ldr,      InstEmit32.Ldr,      OpCodeT32MemRsImm.Create);
             SetT32("111110000001xxxxxxxx10x1xxxxxxxx", InstName.Ldrb,     InstEmit32.Ldrb,     OpCodeT32MemImm8.Create);
-            SetT32("111110000001xxxxxxxx1100xxxxxxxx", InstName.Ldrb,     InstEmit32.Ldrb,     OpCodeT32MemImm8.Create);
+            SetT32("111110000001xxxx<<<<1100xxxxxxxx", InstName.Ldrb,     InstEmit32.Ldrb,     OpCodeT32MemImm8.Create);
             SetT32("111110000001xxxxxxxx11x1xxxxxxxx", InstName.Ldrb,     InstEmit32.Ldrb,     OpCodeT32MemImm8.Create);
-            SetT32("111110001001xxxxxxxxxxxxxxxxxxxx", InstName.Ldrb,     InstEmit32.Ldrb,     OpCodeT32MemImm12.Create);
+            SetT32("111110001001xxxx<<<<xxxxxxxxxxxx", InstName.Ldrb,     InstEmit32.Ldrb,     OpCodeT32MemImm12.Create);
             SetT32("111110000001xxxx<<<<000000xxxxxx", InstName.Ldrb,     InstEmit32.Ldrb,     OpCodeT32MemRsImm.Create);
             SetT32("11101000x111<<<<xxxxxxxxxxxxxxxx", InstName.Ldrd,     InstEmit32.Ldrd,     OpCodeT32MemImm8D.Create);
             SetT32("11101001x1x1<<<<xxxxxxxxxxxxxxxx", InstName.Ldrd,     InstEmit32.Ldrd,     OpCodeT32MemImm8D.Create);
             SetT32("111110000011xxxxxxxx10x1xxxxxxxx", InstName.Ldrh,     InstEmit32.Ldrh,     OpCodeT32MemImm8.Create);
-            SetT32("111110000011xxxxxxxx1100xxxxxxxx", InstName.Ldrh,     InstEmit32.Ldrh,     OpCodeT32MemImm8.Create);
+            SetT32("111110000011xxxx<<<<1100xxxxxxxx", InstName.Ldrh,     InstEmit32.Ldrh,     OpCodeT32MemImm8.Create);
             SetT32("111110000011xxxxxxxx11x1xxxxxxxx", InstName.Ldrh,     InstEmit32.Ldrh,     OpCodeT32MemImm8.Create);
-            SetT32("111110001011xxxxxxxxxxxxxxxxxxxx", InstName.Ldrh,     InstEmit32.Ldrh,     OpCodeT32MemImm12.Create);
+            SetT32("111110001011xxxx<<<<xxxxxxxxxxxx", InstName.Ldrh,     InstEmit32.Ldrh,     OpCodeT32MemImm12.Create);
             SetT32("111110000011xxxx<<<<000000xxxxxx", InstName.Ldrh,     InstEmit32.Ldrh,     OpCodeT32MemRsImm.Create);
             SetT32("111110010001xxxxxxxx10x1xxxxxxxx", InstName.Ldrsb,    InstEmit32.Ldrsb,    OpCodeT32MemImm8.Create);
-            SetT32("111110010001xxxxxxxx1100xxxxxxxx", InstName.Ldrsb,    InstEmit32.Ldrsb,    OpCodeT32MemImm8.Create);
+            SetT32("111110010001xxxx<<<<1100xxxxxxxx", InstName.Ldrsb,    InstEmit32.Ldrsb,    OpCodeT32MemImm8.Create);
             SetT32("111110010001xxxxxxxx11x1xxxxxxxx", InstName.Ldrsb,    InstEmit32.Ldrsb,    OpCodeT32MemImm8.Create);
-            SetT32("111110011001xxxxxxxxxxxxxxxxxxxx", InstName.Ldrsb,    InstEmit32.Ldrsb,    OpCodeT32MemImm12.Create);
+            SetT32("111110011001xxxx<<<<xxxxxxxxxxxx", InstName.Ldrsb,    InstEmit32.Ldrsb,    OpCodeT32MemImm12.Create);
             SetT32("111110010001xxxx<<<<000000xxxxxx", InstName.Ldrsb,    InstEmit32.Ldrsb,    OpCodeT32MemRsImm.Create);
             SetT32("111110010011xxxxxxxx10x1xxxxxxxx", InstName.Ldrsh,    InstEmit32.Ldrsh,    OpCodeT32MemImm8.Create);
-            SetT32("111110010011xxxxxxxx1100xxxxxxxx", InstName.Ldrsh,    InstEmit32.Ldrsh,    OpCodeT32MemImm8.Create);
+            SetT32("111110010011xxxx<<<<1100xxxxxxxx", InstName.Ldrsh,    InstEmit32.Ldrsh,    OpCodeT32MemImm8.Create);
             SetT32("111110010011xxxxxxxx11x1xxxxxxxx", InstName.Ldrsh,    InstEmit32.Ldrsh,    OpCodeT32MemImm8.Create);
-            SetT32("111110011011xxxxxxxxxxxxxxxxxxxx", InstName.Ldrsh,    InstEmit32.Ldrsh,    OpCodeT32MemImm12.Create);
+            SetT32("111110011011xxxx<<<<xxxxxxxxxxxx", InstName.Ldrsh,    InstEmit32.Ldrsh,    OpCodeT32MemImm12.Create);
             SetT32("111110010011xxxx<<<<000000xxxxxx", InstName.Ldrsh,    InstEmit32.Ldrsh,    OpCodeT32MemRsImm.Create);
             SetT32("111110110000xxxx<<<<xxxx0000xxxx", InstName.Mla,      InstEmit32.Mla,      OpCodeT32AluMla.Create);
             SetT32("111110110000xxxxxxxxxxxx0001xxxx", InstName.Mls,      InstEmit32.Mls,      OpCodeT32AluMla.Create);
@@ -1164,12 +1169,19 @@ namespace ARMeilleure.Decoders
             SetT32("11110x00011x<<<<0xxxxxxxxxxxxxxx", InstName.Orn,      InstEmit32.Orn,      OpCodeT32AluImm.Create);
             SetT32("11101010010x<<<<0xxxxxxxxxxxxxxx", InstName.Orr,      InstEmit32.Orr,      OpCodeT32AluRsImm.Create);
             SetT32("11110x00010x<<<<0xxxxxxxxxxxxxxx", InstName.Orr,      InstEmit32.Orr,      OpCodeT32AluImm.Create);
+            SetT32("1111100010x1xxxx1111xxxxxxxxxxxx", InstName.Pld,      InstEmit32.Nop,      OpCodeT32.Create);
+            SetT32("1111100000x1xxxx11111100xxxxxxxx", InstName.Pld,      InstEmit32.Nop,      OpCodeT32.Create);
+            SetT32("1111100000x1xxxx1111000000xxxxxx", InstName.Pld,      InstEmit32.Nop,      OpCodeT32.Create);
             SetT32("11101011110xxxxx0xxxxxxxxxxxxxxx", InstName.Rsb,      InstEmit32.Rsb,      OpCodeT32AluRsImm.Create);
             SetT32("11110x01110xxxxx0xxxxxxxxxxxxxxx", InstName.Rsb,      InstEmit32.Rsb,      OpCodeT32AluImm.Create);
+            SetT32("111110101000xxxx1111xxxx0000xxxx", InstName.Sadd8,    InstEmit32.Sadd8,    OpCodeT32AluReg.Create);
             SetT32("11101011011xxxxx0xxxxxxxxxxxxxxx", InstName.Sbc,      InstEmit32.Sbc,      OpCodeT32AluRsImm.Create);
             SetT32("11110x01011xxxxx0xxxxxxxxxxxxxxx", InstName.Sbc,      InstEmit32.Sbc,      OpCodeT32AluImm.Create);
             SetT32("111100110100xxxx0xxxxxxxxx0xxxxx", InstName.Sbfx,     InstEmit32.Sbfx,     OpCodeT32AluBf.Create);
             SetT32("111110111001xxxx1111xxxx1111xxxx", InstName.Sdiv,     InstEmit32.Sdiv,     OpCodeT32AluMla.Create);
+            SetT32("111110101010xxxx1111xxxx1000xxxx", InstName.Sel,      InstEmit32.Sel,      OpCodeT32AluReg.Create);
+            SetT32("111110101000xxxx1111xxxx0010xxxx", InstName.Shadd8,   InstEmit32.Shadd8,   OpCodeT32AluReg.Create);
+            SetT32("111110101100xxxx1111xxxx0010xxxx", InstName.Shsub8,   InstEmit32.Shsub8,   OpCodeT32AluReg.Create);
             SetT32("111110110001xxxx<<<<xxxx00xxxxxx", InstName.Smla__,   InstEmit32.Smla__,   OpCodeT32AluMla.Create);
             SetT32("111110111100xxxxxxxxxxxx0000xxxx", InstName.Smlal,    InstEmit32.Smlal,    OpCodeT32AluUmull.Create);
             SetT32("111110111100xxxxxxxxxxxx10xxxxxx", InstName.Smlal__,  InstEmit32.Smlal__,  OpCodeT32AluUmull.Create);
@@ -1179,6 +1191,7 @@ namespace ARMeilleure.Decoders
             SetT32("111110110001xxxx1111xxxx00xxxxxx", InstName.Smul__,   InstEmit32.Smul__,   OpCodeT32AluMla.Create);
             SetT32("111110111000xxxxxxxxxxxx0000xxxx", InstName.Smull,    InstEmit32.Smull,    OpCodeT32AluUmull.Create);
             SetT32("111110110011xxxx1111xxxx000xxxxx", InstName.Smulw_,   InstEmit32.Smulw_,   OpCodeT32AluMla.Create);
+            SetT32("111110101100xxxx1111xxxx0000xxxx", InstName.Ssub8,    InstEmit32.Ssub8,    OpCodeT32AluReg.Create);
             SetT32("111010001100xxxxxxxx111110101111", InstName.Stl,      InstEmit32.Stl,      OpCodeT32MemStEx.Create);
             SetT32("111010001100xxxxxxxx111110001111", InstName.Stlb,     InstEmit32.Stlb,     OpCodeT32MemStEx.Create);
             SetT32("111010001100xxxxxxxx11111110xxxx", InstName.Stlex,    InstEmit32.Stlex,    OpCodeT32MemStEx.Create);
@@ -1188,19 +1201,26 @@ namespace ARMeilleure.Decoders
             SetT32("111010001100xxxxxxxx111110011111", InstName.Stlh,     InstEmit32.Stlh,     OpCodeT32MemStEx.Create);
             SetT32("1110100010x0xxxx0xxxxxxxxxxxxxxx", InstName.Stm,      InstEmit32.Stm,      OpCodeT32MemMult.Create);
             SetT32("1110100100x0xxxx0xxxxxxxxxxxxxxx", InstName.Stm,      InstEmit32.Stm,      OpCodeT32MemMult.Create);
-            SetT32("111110000100xxxxxxxx1<<>xxxxxxxx", InstName.Str,      InstEmit32.Str,      OpCodeT32MemImm8.Create);
-            SetT32("111110001100xxxxxxxxxxxxxxxxxxxx", InstName.Str,      InstEmit32.Str,      OpCodeT32MemImm12.Create);
+            SetT32("111110000100<<<<xxxx10x1xxxxxxxx", InstName.Str,      InstEmit32.Str,      OpCodeT32MemImm8.Create);
+            SetT32("111110000100<<<<xxxx1100xxxxxxxx", InstName.Str,      InstEmit32.Str,      OpCodeT32MemImm8.Create);
+            SetT32("111110000100<<<<xxxx11x1xxxxxxxx", InstName.Str,      InstEmit32.Str,      OpCodeT32MemImm8.Create);
+            SetT32("111110001100<<<<xxxxxxxxxxxxxxxx", InstName.Str,      InstEmit32.Str,      OpCodeT32MemImm12.Create);
             SetT32("111110000100<<<<xxxx000000xxxxxx", InstName.Str,      InstEmit32.Str,      OpCodeT32MemRsImm.Create);
-            SetT32("111110000000xxxxxxxx1<<>xxxxxxxx", InstName.Strb,     InstEmit32.Strb,     OpCodeT32MemImm8.Create);
-            SetT32("111110001000xxxxxxxxxxxxxxxxxxxx", InstName.Strb,     InstEmit32.Strb,     OpCodeT32MemImm12.Create);
+            SetT32("111110000000<<<<xxxx10x1xxxxxxxx", InstName.Strb,     InstEmit32.Strb,     OpCodeT32MemImm8.Create);
+            SetT32("111110000000<<<<xxxx1100xxxxxxxx", InstName.Strb,     InstEmit32.Strb,     OpCodeT32MemImm8.Create);
+            SetT32("111110000000<<<<xxxx11x1xxxxxxxx", InstName.Strb,     InstEmit32.Strb,     OpCodeT32MemImm8.Create);
+            SetT32("111110001000<<<<xxxxxxxxxxxxxxxx", InstName.Strb,     InstEmit32.Strb,     OpCodeT32MemImm12.Create);
             SetT32("111110000000<<<<xxxx000000xxxxxx", InstName.Strb,     InstEmit32.Strb,     OpCodeT32MemRsImm.Create);
             SetT32("11101000x110<<<<xxxxxxxxxxxxxxxx", InstName.Strd,     InstEmit32.Strd,     OpCodeT32MemImm8D.Create);
             SetT32("11101001x1x0<<<<xxxxxxxxxxxxxxxx", InstName.Strd,     InstEmit32.Strd,     OpCodeT32MemImm8D.Create);
-            SetT32("111110000010xxxxxxxx1<<>xxxxxxxx", InstName.Strh,     InstEmit32.Strh,     OpCodeT32MemImm8.Create);
-            SetT32("111110001010xxxxxxxxxxxxxxxxxxxx", InstName.Strh,     InstEmit32.Strh,     OpCodeT32MemImm12.Create);
+            SetT32("111110000010<<<<xxxx10x1xxxxxxxx", InstName.Strh,     InstEmit32.Strh,     OpCodeT32MemImm8.Create);
+            SetT32("111110000010<<<<xxxx1100xxxxxxxx", InstName.Strh,     InstEmit32.Strh,     OpCodeT32MemImm8.Create);
+            SetT32("111110000010<<<<xxxx11x1xxxxxxxx", InstName.Strh,     InstEmit32.Strh,     OpCodeT32MemImm8.Create);
+            SetT32("111110001010<<<<xxxxxxxxxxxxxxxx", InstName.Strh,     InstEmit32.Strh,     OpCodeT32MemImm12.Create);
             SetT32("111110000010<<<<xxxx000000xxxxxx", InstName.Strh,     InstEmit32.Strh,     OpCodeT32MemRsImm.Create);
             SetT32("11101011101<xxxx0xxx<<<<xxxxxxxx", InstName.Sub,      InstEmit32.Sub,      OpCodeT32AluRsImm.Create);
             SetT32("11110x01101<xxxx0xxx<<<<xxxxxxxx", InstName.Sub,      InstEmit32.Sub,      OpCodeT32AluImm.Create);
+            SetT32("11110x101010xxxx0xxxxxxxxxxxxxxx", InstName.Sub,      InstEmit32.Sub,      OpCodeT32AluImm12.Create);
             SetT32("111110100100xxxx1111xxxx10xxxxxx", InstName.Sxtb,     InstEmit32.Sxtb,     OpCodeT32AluUx.Create);
             SetT32("111110100010xxxx1111xxxx10xxxxxx", InstName.Sxtb16,   InstEmit32.Sxtb16,   OpCodeT32AluUx.Create);
             SetT32("111110100000xxxx1111xxxx10xxxxxx", InstName.Sxth,     InstEmit32.Sxth,     OpCodeT32AluUx.Create);
@@ -1210,11 +1230,15 @@ namespace ARMeilleure.Decoders
             SetT32("11110x001001xxxx0xxx1111xxxxxxxx", InstName.Teq,      InstEmit32.Teq,      OpCodeT32AluImm.Create);
             SetT32("111010100001xxxx0xxx1111xxxxxxxx", InstName.Tst,      InstEmit32.Tst,      OpCodeT32AluRsImm.Create);
             SetT32("11110x000001xxxx0xxx1111xxxxxxxx", InstName.Tst,      InstEmit32.Tst,      OpCodeT32AluImm.Create);
+            SetT32("111110101000xxxx1111xxxx0100xxxx", InstName.Uadd8,    InstEmit32.Uadd8,    OpCodeT32AluReg.Create);
             SetT32("111100111100xxxx0xxxxxxxxx0xxxxx", InstName.Ubfx,     InstEmit32.Ubfx,     OpCodeT32AluBf.Create);
             SetT32("111110111011xxxx1111xxxx1111xxxx", InstName.Udiv,     InstEmit32.Udiv,     OpCodeT32AluMla.Create);
+            SetT32("111110101000xxxx1111xxxx0110xxxx", InstName.Uhadd8,   InstEmit32.Uhadd8,   OpCodeT32AluReg.Create);
+            SetT32("111110101100xxxx1111xxxx0110xxxx", InstName.Uhsub8,   InstEmit32.Uhsub8,   OpCodeT32AluReg.Create);
             SetT32("111110111110xxxxxxxxxxxx0110xxxx", InstName.Umaal,    InstEmit32.Umaal,    OpCodeT32AluUmull.Create);
             SetT32("111110111110xxxxxxxxxxxx0000xxxx", InstName.Umlal,    InstEmit32.Umlal,    OpCodeT32AluUmull.Create);
             SetT32("111110111010xxxxxxxxxxxx0000xxxx", InstName.Umull,    InstEmit32.Umull,    OpCodeT32AluUmull.Create);
+            SetT32("111110101100xxxx1111xxxx0100xxxx", InstName.Usub8,    InstEmit32.Usub8,    OpCodeT32AluReg.Create);
             SetT32("111110100101xxxx1111xxxx10xxxxxx", InstName.Uxtb,     InstEmit32.Uxtb,     OpCodeT32AluUx.Create);
             SetT32("111110100011xxxx1111xxxx10xxxxxx", InstName.Uxtb16,   InstEmit32.Uxtb16,   OpCodeT32AluUx.Create);
             SetT32("111110100001xxxx1111xxxx10xxxxxx", InstName.Uxth,     InstEmit32.Uxth,     OpCodeT32AluUx.Create);
diff --git a/ARMeilleure/Instructions/InstEmitAlu32.cs b/ARMeilleure/Instructions/InstEmitAlu32.cs
index 31d8a02cc..584ada7e0 100644
--- a/ARMeilleure/Instructions/InstEmitAlu32.cs
+++ b/ARMeilleure/Instructions/InstEmitAlu32.cs
@@ -363,6 +363,11 @@ namespace ARMeilleure.Instructions
             EmitAluStore(context, res);
         }
 
+        public static void Sadd8(ArmEmitterContext context)
+        {
+            EmitAddSub8(context, add: true, unsigned: false);
+        }
+
         public static void Sbc(ArmEmitterContext context)
         {
             IOpCode32Alu op = (IOpCode32Alu)context.CurrOp;
@@ -401,17 +406,38 @@ namespace ARMeilleure.Instructions
 
         public static void Sdiv(ArmEmitterContext context)
         {
-            EmitDiv(context, false);
+            EmitDiv(context, unsigned: false);
+        }
+
+        public static void Sel(ArmEmitterContext context)
+        {
+            IOpCode32AluReg op = (IOpCode32AluReg)context.CurrOp;
+
+            Operand n = GetIntA32(context, op.Rn);
+            Operand m = GetIntA32(context, op.Rm);
+
+            Operand ge0 = context.ZeroExtend8(OperandType.I32, context.Negate(GetFlag(PState.GE0Flag)));
+            Operand ge1 = context.ZeroExtend8(OperandType.I32, context.Negate(GetFlag(PState.GE1Flag)));
+            Operand ge2 = context.ZeroExtend8(OperandType.I32, context.Negate(GetFlag(PState.GE2Flag)));
+            Operand ge3 = context.Negate(GetFlag(PState.GE3Flag));
+
+            Operand mask = context.BitwiseOr(ge0, context.ShiftLeft(ge1, Const(8)));
+            mask = context.BitwiseOr(mask, context.ShiftLeft(ge2, Const(16)));
+            mask = context.BitwiseOr(mask, context.ShiftLeft(ge3, Const(24)));
+
+            Operand res = context.BitwiseOr(context.BitwiseAnd(n, mask), context.BitwiseAnd(m, context.BitwiseNot(mask)));
+
+            SetIntA32(context, op.Rd, res);
         }
 
         public static void Shadd8(ArmEmitterContext context)
         {
-            EmitHadd8(context, false);
+            EmitHadd8(context, unsigned: false);
         }
 
         public static void Shsub8(ArmEmitterContext context)
         {
-            EmitHsub8(context, false);
+            EmitHsub8(context, unsigned: false);
         }
 
         public static void Ssat(ArmEmitterContext context)
@@ -428,6 +454,11 @@ namespace ARMeilleure.Instructions
             EmitSat16(context, -(1 << op.SatImm), (1 << op.SatImm) - 1);
         }
 
+        public static void Ssub8(ArmEmitterContext context)
+        {
+            EmitAddSub8(context, add: false, unsigned: false);
+        }
+
         public static void Sub(ArmEmitterContext context)
         {
             IOpCode32Alu op = (IOpCode32Alu)context.CurrOp;
@@ -482,6 +513,11 @@ namespace ARMeilleure.Instructions
             EmitNZFlagsCheck(context, res);
         }
 
+        public static void Uadd8(ArmEmitterContext context)
+        {
+            EmitAddSub8(context, add: true, unsigned: true);
+        }
+
         public static void Ubfx(ArmEmitterContext context)
         {
             IOpCode32AluBf op = (IOpCode32AluBf)context.CurrOp;
@@ -496,17 +532,17 @@ namespace ARMeilleure.Instructions
 
         public static void Udiv(ArmEmitterContext context)
         {
-            EmitDiv(context, true);
+            EmitDiv(context, unsigned: true);
         }
 
         public static void Uhadd8(ArmEmitterContext context)
         {
-            EmitHadd8(context, true);
+            EmitHadd8(context, unsigned: true);
         }
 
         public static void Uhsub8(ArmEmitterContext context)
         {
-            EmitHsub8(context, true);
+            EmitHsub8(context, unsigned: true);
         }
 
         public static void Usat(ArmEmitterContext context)
@@ -523,6 +559,11 @@ namespace ARMeilleure.Instructions
             EmitSat16(context, 0, (1 << op.SatImm) - 1);
         }
 
+        public static void Usub8(ArmEmitterContext context)
+        {
+            EmitAddSub8(context, add: false, unsigned: true);
+        }
+
         public static void Uxtb(ArmEmitterContext context)
         {
             EmitSignExtend(context, false, 8);
@@ -678,9 +719,40 @@ namespace ARMeilleure.Instructions
             context.MarkLabel(lblEnd);
         }
 
+        private static void EmitAddSub8(ArmEmitterContext context, bool add, bool unsigned)
+        {
+            IOpCode32AluReg op = (IOpCode32AluReg)context.CurrOp;
+
+            Operand n = GetIntA32(context, op.Rn);
+            Operand m = GetIntA32(context, op.Rm);
+
+            Operand res = Const(0);
+
+            for (int byteSel = 0; byteSel < 4; byteSel++)
+            {
+                Operand shift = Const(byteSel * 8);
+
+                Operand nByte = context.ShiftRightUI(n, shift);
+                Operand mByte = context.ShiftRightUI(m, shift);
+
+                nByte = unsigned ? context.ZeroExtend8(OperandType.I32, nByte) : context.SignExtend8(OperandType.I32, nByte);
+                mByte = unsigned ? context.ZeroExtend8(OperandType.I32, mByte) : context.SignExtend8(OperandType.I32, mByte);
+
+                Operand resByte = add ? context.Add(nByte, mByte) : context.Subtract(nByte, mByte);
+
+                res = context.BitwiseOr(res, context.ShiftLeft(context.ZeroExtend8(OperandType.I32, resByte), shift));
+
+                SetFlag(context, PState.GE0Flag + byteSel, unsigned && add
+                    ? context.ShiftRightUI(resByte, Const(8))
+                    : context.ShiftRightUI(context.BitwiseNot(resByte), Const(31)));
+            }
+
+            SetIntA32(context, op.Rd, res);
+        }
+
         private static void EmitHadd8(ArmEmitterContext context, bool unsigned)
         {
-            OpCode32AluReg op = (OpCode32AluReg)context.CurrOp;
+            IOpCode32AluReg op = (IOpCode32AluReg)context.CurrOp;
 
             Operand m = GetIntA32(context, op.Rm);
             Operand n = GetIntA32(context, op.Rn);
@@ -710,7 +782,7 @@ namespace ARMeilleure.Instructions
 
         private static void EmitHsub8(ArmEmitterContext context, bool unsigned)
         {
-            OpCode32AluReg op = (OpCode32AluReg)context.CurrOp;
+            IOpCode32AluReg op = (IOpCode32AluReg)context.CurrOp;
 
             Operand m = GetIntA32(context, op.Rm);
             Operand n = GetIntA32(context, op.Rn);
diff --git a/ARMeilleure/Instructions/InstName.cs b/ARMeilleure/Instructions/InstName.cs
index f7022f8ef..73be1aef9 100644
--- a/ARMeilleure/Instructions/InstName.cs
+++ b/ARMeilleure/Instructions/InstName.cs
@@ -81,6 +81,7 @@ namespace ARMeilleure.Instructions
         Sbcs,
         Sbfm,
         Sdiv,
+        Sel,
         Shsub8,
         Smaddl,
         Smsubl,
@@ -519,6 +520,7 @@ namespace ARMeilleure.Instructions
         Revsh,
         Rsb,
         Rsc,
+        Sadd8,
         Sbfx,
         Shadd8,
         Smla__,
@@ -529,6 +531,7 @@ namespace ARMeilleure.Instructions
         Smmls,
         Smul__,
         Smmul,
+        Ssub8,
         Stl,
         Stlb,
         Stlex,
@@ -550,6 +553,7 @@ namespace ARMeilleure.Instructions
         Teq,
         Trap,
         Tst,
+        Uadd8,
         Ubfx,
         Uhadd8,
         Uhsub8,
@@ -558,6 +562,7 @@ namespace ARMeilleure.Instructions
         Umull,
         Usat,
         Usat16,
+        Usub8,
         Uxtb,
         Uxtb16,
         Uxth,
diff --git a/ARMeilleure/State/PState.cs b/ARMeilleure/State/PState.cs
index e087e702c..9a80bc570 100644
--- a/ARMeilleure/State/PState.cs
+++ b/ARMeilleure/State/PState.cs
@@ -4,6 +4,10 @@ namespace ARMeilleure.State
     {
         TFlag = 5,
         EFlag = 9,
+        GE0Flag = 16,
+        GE1Flag = 17,
+        GE2Flag = 18,
+        GE3Flag = 19,
         QFlag = 27,
         VFlag = 28,
         CFlag = 29,
diff --git a/Ryujinx.Tests/Cpu/CpuTest32.cs b/Ryujinx.Tests/Cpu/CpuTest32.cs
index 1bd87aee1..be8df8dfc 100644
--- a/Ryujinx.Tests/Cpu/CpuTest32.cs
+++ b/Ryujinx.Tests/Cpu/CpuTest32.cs
@@ -456,6 +456,10 @@ namespace Ryujinx.Tests.Cpu
 
             Assert.Multiple(() =>
             {
+                Assert.That(_context.GetPstateFlag(PState.GE0Flag), Is.EqualTo((_unicornEmu.CPSR & (1u << 16)) != 0), "GE0Flag");
+                Assert.That(_context.GetPstateFlag(PState.GE1Flag), Is.EqualTo((_unicornEmu.CPSR & (1u << 17)) != 0), "GE1Flag");
+                Assert.That(_context.GetPstateFlag(PState.GE2Flag), Is.EqualTo((_unicornEmu.CPSR & (1u << 18)) != 0), "GE2Flag");
+                Assert.That(_context.GetPstateFlag(PState.GE3Flag), Is.EqualTo((_unicornEmu.CPSR & (1u << 19)) != 0), "GE3Flag");
                 Assert.That(_context.GetPstateFlag(PState.QFlag), Is.EqualTo(_unicornEmu.QFlag), "QFlag");
                 Assert.That(_context.GetPstateFlag(PState.VFlag), Is.EqualTo(_unicornEmu.OverflowFlag), "VFlag");
                 Assert.That(_context.GetPstateFlag(PState.CFlag), Is.EqualTo(_unicornEmu.CarryFlag), "CFlag");
diff --git a/Ryujinx.Tests/Cpu/CpuTestAlu32.cs b/Ryujinx.Tests/Cpu/CpuTestAlu32.cs
index f04aa22e7..c7537cd9b 100644
--- a/Ryujinx.Tests/Cpu/CpuTestAlu32.cs
+++ b/Ryujinx.Tests/Cpu/CpuTestAlu32.cs
@@ -10,6 +10,21 @@ namespace Ryujinx.Tests.Cpu
 #if Alu32
 
 #region "ValueSource (Opcodes)"
+        private static uint[] _SU_H_AddSub_8_()
+        {
+            return new uint[]
+            {
+                0xe6100f90u, // SADD8  R0, R0, R0
+                0xe6100ff0u, // SSUB8  R0, R0, R0
+                0xe6300f90u, // SHADD8 R0, R0, R0
+                0xe6300ff0u, // SHSUB8 R0, R0, R0
+                0xe6500f90u, // UADD8  R0, R0, R0
+                0xe6500ff0u, // USUB8  R0, R0, R0
+                0xe6700f90u, // UHADD8 R0, R0, R0
+                0xe6700ff0u  // UHSUB8 R0, R0, R0
+            };
+        }
+
         private static uint[] _Ssat_Usat_()
         {
             return new uint[]
@@ -150,15 +165,14 @@ namespace Ryujinx.Tests.Cpu
         }
 
         [Test, Pairwise]
-        public void Uhadd8([Values(0u, 0xdu)] uint rd,
-                           [Values(1u)] uint rm,
-                           [Values(2u)] uint rn,
-                           [Random(RndCnt)] uint w0,
-                           [Random(RndCnt)] uint w1,
-                           [Random(RndCnt)] uint w2)
+        public void SU_H_AddSub_8([ValueSource("_SU_H_AddSub_8_")] uint opcode,
+                                  [Values(0u, 0xdu)] uint rd,
+                                  [Values(1u)] uint rm,
+                                  [Values(2u)] uint rn,
+                                  [Random(RndCnt)] uint w0,
+                                  [Random(RndCnt)] uint w1,
+                                  [Random(RndCnt)] uint w2)
         {
-            uint opcode = 0xE6700F90u; // UHADD8 R0, R0, R0
-
             opcode |= ((rm & 15) << 0) | ((rd & 15) << 12) | ((rn & 15) << 16);
 
             uint sp = TestContext.CurrentContext.Random.NextUInt();
@@ -169,20 +183,24 @@ namespace Ryujinx.Tests.Cpu
         }
 
         [Test, Pairwise]
-        public void Uhsub8([Values(0u, 0xdu)] uint rd,
-                           [Values(1u)] uint rm,
-                           [Values(2u)] uint rn,
-                           [Random(RndCnt)] uint w0,
-                           [Random(RndCnt)] uint w1,
-                           [Random(RndCnt)] uint w2)
+        public void Uadd8_Sel([Values(0u)] uint rd,
+                              [Values(1u)] uint rm,
+                              [Values(2u)] uint rn,
+                              [Random(RndCnt)] uint w0,
+                              [Random(RndCnt)] uint w1,
+                              [Random(RndCnt)] uint w2)
         {
-            uint opcode = 0xE6700FF0u; // UHSUB8 R0, R0, R0
+            uint opUadd8 = 0xE6500F90; // UADD8 R0, R0, R0
+            uint opSel   = 0xE6800FB0; // SEL R0, R0, R0
 
-            opcode |= ((rm & 15) << 0) | ((rd & 15) << 12) | ((rn & 15) << 16);
+            opUadd8  |= ((rm & 15) << 0) | ((rd & 15) << 12) | ((rn & 15) << 16);
+            opSel |= ((rm & 15) << 0) | ((rd & 15) << 12) | ((rn & 15) << 16);
 
-            uint sp = TestContext.CurrentContext.Random.NextUInt();
-
-            SingleOpcode(opcode, r0: w0, r1: w1, r2: w2, sp: sp);
+            SetContext(r0: w0, r1: w1, r2: w2);
+            Opcode(opUadd8);
+            Opcode(opSel);
+            Opcode(0xE12FFF1E); // BX LR
+            ExecuteOpcodes();
 
             CompareAgainstUnicorn();
         }