diff --git a/ARMeilleure/Decoders/OpCode32SimdMovn.cs b/ARMeilleure/Decoders/OpCode32SimdMovn.cs
new file mode 100644
index 000000000..e4c5f8db6
--- /dev/null
+++ b/ARMeilleure/Decoders/OpCode32SimdMovn.cs
@@ -0,0 +1,12 @@
+namespace ARMeilleure.Decoders
+{
+    class OpCode32SimdMovn : OpCode32Simd
+    {
+        public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdMovn(inst, address, opCode);
+
+        public OpCode32SimdMovn(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+        {
+            Size = (opCode >> 18) & 0x3;
+        }
+    }
+}
diff --git a/ARMeilleure/Decoders/OpCodeTable.cs b/ARMeilleure/Decoders/OpCodeTable.cs
index 7fee8a719..ed77b99c2 100644
--- a/ARMeilleure/Decoders/OpCodeTable.cs
+++ b/ARMeilleure/Decoders/OpCodeTable.cs
@@ -809,8 +809,8 @@ namespace ARMeilleure.Decoders
             SetA32("111100100xxxxxxxxxxx1000xxx0xxxx", InstName.Vadd,        InstEmit32.Vadd_I,      OpCode32SimdReg.Create);
             SetA32("<<<<11100x11xxxxxxxx101xx0x0xxxx", InstName.Vadd,        InstEmit32.Vadd_S,      OpCode32SimdRegS.Create);
             SetA32("111100100x00xxxxxxxx1101xxx0xxxx", InstName.Vadd,        InstEmit32.Vadd_V,      OpCode32SimdReg.Create);
-            SetA32("1111001x1x<<xxxxxxxx0000x0x0xxxx", InstName.Vaddl,       InstEmit32.Vaddl_I,     OpCode32SimdRegLong.Create);
-            SetA32("1111001x1x<<xxxxxxxx0001x0x0xxxx", InstName.Vaddw,       InstEmit32.Vaddw_I,     OpCode32SimdRegWide.Create);
+            SetA32("1111001x1x<<xxxxxxx00000x0x0xxxx", InstName.Vaddl,       InstEmit32.Vaddl_I,     OpCode32SimdRegLong.Create);
+            SetA32("1111001x1x<<xxxxxxx00001x0x0xxxx", InstName.Vaddw,       InstEmit32.Vaddw_I,     OpCode32SimdRegWide.Create);
             SetA32("111100100x00xxxxxxxx0001xxx1xxxx", InstName.Vand,        InstEmit32.Vand_I,      OpCode32SimdBinary.Create);
             SetA32("111100100x01xxxxxxxx0001xxx1xxxx", InstName.Vbic,        InstEmit32.Vbic_I,      OpCode32SimdBinary.Create);
             SetA32("1111001x1x000xxxxxxx<<x10x11xxxx", InstName.Vbic,        InstEmit32.Vbic_II,     OpCode32SimdImm.Create);
@@ -879,6 +879,7 @@ namespace ARMeilleure.Decoders
             SetA32("111100100xxxxxxxxxxx1001xxx0xxxx", InstName.Vmla,        InstEmit32.Vmla_I,      OpCode32SimdReg.Create);
             SetA32("<<<<11100x00xxxxxxxx101xx0x0xxxx", InstName.Vmla,        InstEmit32.Vmla_S,      OpCode32SimdRegS.Create);
             SetA32("111100100x00xxxxxxxx1101xxx1xxxx", InstName.Vmla,        InstEmit32.Vmla_V,      OpCode32SimdReg.Create);
+            SetA32("1111001x1x<<xxxxxxx01000x0x0xxxx", InstName.Vmlal,       InstEmit32.Vmlal_I,     OpCode32SimdRegLong.Create);
             SetA32("1111001x1x<<xxxxxxxx010xx1x0xxxx", InstName.Vmls,        InstEmit32.Vmls_1,      OpCode32SimdRegElem.Create);
             SetA32("<<<<11100x00xxxxxxxx101xx1x0xxxx", InstName.Vmls,        InstEmit32.Vmls_S,      OpCode32SimdRegS.Create);
             SetA32("111100100x10xxxxxxxx1101xxx1xxxx", InstName.Vmls,        InstEmit32.Vmls_V,      OpCode32SimdReg.Create);
@@ -898,7 +899,7 @@ namespace ARMeilleure.Decoders
             SetA32("1111001x1x001000xxx0101000x1xxxx", InstName.Vmovl,       InstEmit32.Vmovl,       OpCode32SimdLong.Create);
             SetA32("1111001x1x010000xxx0101000x1xxxx", InstName.Vmovl,       InstEmit32.Vmovl,       OpCode32SimdLong.Create);
             SetA32("1111001x1x100000xxx0101000x1xxxx", InstName.Vmovl,       InstEmit32.Vmovl,       OpCode32SimdLong.Create);
-            SetA32("111100111x11xx10xxxx001000x0xxx0", InstName.Vmovn,       InstEmit32.Vmovn,       OpCode32SimdCmpZ.Create);
+            SetA32("111100111x11<<10xxxx001000x0xxx0", InstName.Vmovn,       InstEmit32.Vmovn,       OpCode32SimdMovn.Create);
             SetA32("<<<<11101111xxxxxxxx101000010000", InstName.Vmrs,        InstEmit32.Vmrs,        OpCode32SimdSpecial.Create);
             SetA32("<<<<11101110xxxxxxxx101000010000", InstName.Vmsr,        InstEmit32.Vmsr,        OpCode32SimdSpecial.Create);
             SetA32("1111001x1x<<xxxxxxxx100xx1x0xxxx", InstName.Vmul,        InstEmit32.Vmul_1,      OpCode32SimdRegElem.Create);
@@ -924,22 +925,33 @@ namespace ARMeilleure.Decoders
             SetA32("1111001x1x000xxxxxxx<<x10x01xxxx", InstName.Vorr,        InstEmit32.Vorr_II,     OpCode32SimdImm.Create);
             SetA32("111100100x<<xxxxxxxx1011x0x1xxxx", InstName.Vpadd,       InstEmit32.Vpadd_I,     OpCode32SimdReg.Create);
             SetA32("111100110x00xxxxxxxx1101x0x0xxxx", InstName.Vpadd,       InstEmit32.Vpadd_V,     OpCode32SimdReg.Create);
+            SetA32("111100111x11<<00xxxx0010xxx0xxxx", InstName.Vpaddl,      InstEmit32.Vpaddl,      OpCode32SimdCmpZ.Create);
             SetA32("1111001x0x<<xxxxxxxx1010x0x0xxxx", InstName.Vpmax,       InstEmit32.Vpmax_I,     OpCode32SimdReg.Create);
             SetA32("111100110x00xxxxxxxx1111x0x0xxxx", InstName.Vpmax,       InstEmit32.Vpmax_V,     OpCode32SimdReg.Create);
             SetA32("1111001x0x<<xxxxxxxx1010x0x1xxxx", InstName.Vpmin,       InstEmit32.Vpmin_I,     OpCode32SimdReg.Create);
             SetA32("111100110x10xxxxxxxx1111x0x0xxxx", InstName.Vpmin,       InstEmit32.Vpmin_V,     OpCode32SimdReg.Create);
+            SetA32("1111001x0xxxxxxxxxxx0000xxx1xxxx", InstName.Vqadd,       InstEmit32.Vqadd,       OpCode32SimdReg.Create);
+            SetA32("111100100x01xxxxxxxx1011xxx0xxxx", InstName.Vqdmulh,     InstEmit32.Vqdmulh,     OpCode32SimdReg.Create);
+            SetA32("111100100x10xxxxxxxx1011xxx0xxxx", InstName.Vqdmulh,     InstEmit32.Vqdmulh,     OpCode32SimdReg.Create);
+            SetA32("111100111x11<<10xxxx00101xx0xxx0", InstName.Vqmovn,      InstEmit32.Vqmovn,      OpCode32SimdMovn.Create);
+            SetA32("111100111x11<<10xxxx001001x0xxx0", InstName.Vqmovun,     InstEmit32.Vqmovun,     OpCode32SimdMovn.Create);
             SetA32("1111001x1x>>>xxxxxxx100101x1xxx0", InstName.Vqrshrn,     InstEmit32.Vqrshrn,     OpCode32SimdShImmNarrow.Create);
             SetA32("111100111x>>>xxxxxxx100001x1xxx0", InstName.Vqrshrun,    InstEmit32.Vqrshrun,    OpCode32SimdShImmNarrow.Create);
             SetA32("1111001x1x>>>xxxxxxx100100x1xxx0", InstName.Vqshrn,      InstEmit32.Vqshrn,      OpCode32SimdShImmNarrow.Create);
+            SetA32("111100111x>>>xxxxxxx100000x1xxx0", InstName.Vqshrun,     InstEmit32.Vqshrun,     OpCode32SimdShImmNarrow.Create);
+            SetA32("1111001x0xxxxxxxxxxx0010xxx1xxxx", InstName.Vqsub,       InstEmit32.Vqsub,       OpCode32SimdReg.Create);
             SetA32("111100111x111011xxxx010x0xx0xxxx", InstName.Vrecpe,      InstEmit32.Vrecpe,      OpCode32SimdSqrte.Create);
             SetA32("111100100x00xxxxxxxx1111xxx1xxxx", InstName.Vrecps,      InstEmit32.Vrecps,      OpCode32SimdReg.Create);
             SetA32("111100111x11xx00xxxx000<<xx0xxxx", InstName.Vrev,        InstEmit32.Vrev,        OpCode32SimdRev.Create);
+            SetA32("1111001x0x<<xxxxxxxx0001xxx0xxxx", InstName.Vrhadd,      InstEmit32.Vrhadd,      OpCode32SimdReg.Create);
             SetA32("111111101x1110xxxxxx101x01x0xxxx", InstName.Vrint,       InstEmit32.Vrint_RM,    OpCode32SimdS.Create);
             SetA32("<<<<11101x110110xxxx101x11x0xxxx", InstName.Vrint,       InstEmit32.Vrint_Z,     OpCode32SimdS.Create);
             SetA32("<<<<11101x110111xxxx101x01x0xxxx", InstName.Vrintx,      InstEmit32.Vrintx_S,    OpCode32SimdS.Create);
             SetA32("1111001x1x>>>xxxxxxx0010>xx1xxxx", InstName.Vrshr,       InstEmit32.Vrshr,       OpCode32SimdShImm.Create);
+            SetA32("111100101x>>>xxxxxxx100001x1xxx0", InstName.Vrshrn,      InstEmit32.Vrshrn,      OpCode32SimdShImmNarrow.Create);
             SetA32("111100111x111011xxxx010x1xx0xxxx", InstName.Vrsqrte,     InstEmit32.Vrsqrte,     OpCode32SimdSqrte.Create);
             SetA32("111100100x10xxxxxxxx1111xxx1xxxx", InstName.Vrsqrts,     InstEmit32.Vrsqrts,     OpCode32SimdReg.Create);
+            SetA32("1111001x1x>>>xxxxxxx0011>xx1xxxx", InstName.Vrsra,       InstEmit32.Vrsra,       OpCode32SimdShImm.Create);
             SetA32("111111100xxxxxxxxxxx101xx0x0xxxx", InstName.Vsel,        InstEmit32.Vsel,        OpCode32SimdSel.Create);
             SetA32("111100101x>>>xxxxxxx0101>xx1xxxx", InstName.Vshl,        InstEmit32.Vshl,        OpCode32SimdShImm.Create);
             SetA32("1111001x0xxxxxxxxxxx0100xxx0xxxx", InstName.Vshl,        InstEmit32.Vshl_I,      OpCode32SimdReg.Create);
@@ -970,7 +982,8 @@ namespace ARMeilleure.Decoders
             SetA32("111100110xxxxxxxxxxx1000xxx0xxxx", InstName.Vsub,        InstEmit32.Vsub_I,      OpCode32SimdReg.Create);
             SetA32("<<<<11100x11xxxxxxxx101xx1x0xxxx", InstName.Vsub,        InstEmit32.Vsub_S,      OpCode32SimdRegS.Create);
             SetA32("111100100x10xxxxxxxx1101xxx0xxxx", InstName.Vsub,        InstEmit32.Vsub_V,      OpCode32SimdReg.Create);
-            SetA32("1111001x1x<<xxxxxxxx0011x0x0xxxx", InstName.Vsubw,       InstEmit32.Vsubw_I,     OpCode32SimdRegWide.Create);
+            SetA32("1111001x1x<<xxxxxxx00010x0x0xxxx", InstName.Vsubl,       InstEmit32.Vsubl_I,     OpCode32SimdRegLong.Create);
+            SetA32("1111001x1x<<xxxxxxx00011x0x0xxxx", InstName.Vsubw,       InstEmit32.Vsubw_I,     OpCode32SimdRegWide.Create);
             SetA32("111100111x11xxxxxxxx10xxxxx0xxxx", InstName.Vtbl,        InstEmit32.Vtbl,        OpCode32SimdTbl.Create);
             SetA32("111100111x11<<10xxxx00001xx0xxxx", InstName.Vtrn,        InstEmit32.Vtrn,        OpCode32SimdCmpZ.Create);
             SetA32("111100100x<<xxxxxxxx1000xxx1xxxx", InstName.Vtst,        InstEmit32.Vtst,        OpCode32SimdReg.Create);
diff --git a/ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs b/ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs
index ed6b2a314..79b376e95 100644
--- a/ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs
+++ b/ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs
@@ -777,6 +777,13 @@ namespace ARMeilleure.Instructions
             }
         }
 
+        public static void Vmlal_I(ArmEmitterContext context)
+        {
+            OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+            EmitVectorTernaryLongOpI32(context, (d, n, m) => context.Add(d, context.Multiply(n, m)), !op.U);
+        }
+
         public static void Vmls_S(ArmEmitterContext context)
         {
             if (Optimizations.FastFP && Optimizations.UseSse2)
@@ -992,6 +999,13 @@ namespace ARMeilleure.Instructions
             }
         }
 
+        public static void Vpaddl(ArmEmitterContext context)
+        {
+            OpCode32Simd op = (OpCode32Simd)context.CurrOp;
+
+            EmitVectorPairwiseLongOpI32(context, (op1, op2) => context.Add(op1, op2), (op.Opc & 1) == 0);
+        }
+
         public static void Vpmax_V(ArmEmitterContext context)
         {
             if (Optimizations.FastFP && Optimizations.UseSse2)
@@ -1014,7 +1028,7 @@ namespace ARMeilleure.Instructions
             }
             else
             {
-                EmitVectorPairwiseOpI32(context, (op1, op2) => 
+                EmitVectorPairwiseOpI32(context, (op1, op2) =>
                 {
                     Operand greater = op.U ? context.ICompareGreaterUI(op1, op2) : context.ICompareGreater(op1, op2);
                     return context.ConditionalSelect(greater, op1, op2);
@@ -1052,6 +1066,62 @@ namespace ARMeilleure.Instructions
             }
         }
 
+        public static void Vqadd(ArmEmitterContext context)
+        {
+            OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+            EmitSaturatingAddSubBinaryOp(context, add: true, !op.U);
+        }
+
+        public static void Vqdmulh(ArmEmitterContext context)
+        {
+            OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+            int eSize = 8 << op.Size;
+
+            EmitVectorBinaryOpI32(context, (op1, op2) =>
+            {
+                if (op.Size == 2)
+                {
+                    op1 = context.SignExtend32(OperandType.I64, op1);
+                    op2 = context.SignExtend32(OperandType.I64, op2);
+                }
+
+                Operand res = context.Multiply(op1, op2);
+                res = context.ShiftRightSI(res, Const(eSize - 1));
+                res = EmitSatQ(context, res, eSize, signedSrc: true, signedDst: true);
+
+                if (op.Size == 2)
+                {
+                    res = context.ConvertI64ToI32(res);
+                }
+
+                return res;
+            }, signed: true);
+        }
+
+        public static void Vqmovn(ArmEmitterContext context)
+        {
+            OpCode32SimdMovn op = (OpCode32SimdMovn)context.CurrOp;
+
+            bool signed = !op.Q;
+
+            EmitVectorUnaryNarrowOp32(context, (op1) => EmitSatQ(context, op1, 8 << op.Size, signed, signed), signed);
+        }
+
+        public static void Vqmovun(ArmEmitterContext context)
+        {
+            OpCode32SimdMovn op = (OpCode32SimdMovn)context.CurrOp;
+
+            EmitVectorUnaryNarrowOp32(context, (op1) => EmitSatQ(context, op1, 8 << op.Size, signedSrc: true, signedDst: false), signed: true);
+        }
+
+        public static void Vqsub(ArmEmitterContext context)
+        {
+            OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+            EmitSaturatingAddSubBinaryOp(context, add: false, !op.U);
+        }
+
         public static void Vrev(ArmEmitterContext context)
         {
             OpCode32SimdRev op = (OpCode32SimdRev)context.CurrOp;
@@ -1202,6 +1272,30 @@ namespace ARMeilleure.Instructions
             }
         }
 
+        public static void Vrhadd(ArmEmitterContext context)
+        {
+            OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+            EmitVectorBinaryOpI32(context, (op1, op2) =>
+            {
+                if (op.Size == 2)
+                {
+                    op1 = context.ZeroExtend32(OperandType.I64, op1);
+                    op2 = context.ZeroExtend32(OperandType.I64, op2);
+                }
+
+                Operand res = context.Add(context.Add(op1, op2), Const(op1.Type, 1L));
+                res = context.ShiftRightUI(res, Const(1));
+
+                if (op.Size == 2)
+                {
+                    res = context.ConvertI64ToI32(res);
+                }
+
+                return res;
+            }, !op.U);
+        }
+
         public static void Vrsqrte(ArmEmitterContext context)
         {
             OpCode32SimdSqrte op = (OpCode32SimdSqrte)context.CurrOp;
@@ -1349,6 +1443,13 @@ namespace ARMeilleure.Instructions
             }
         }
 
+        public static void Vsubl_I(ArmEmitterContext context)
+        {
+            OpCode32SimdRegLong op = (OpCode32SimdRegLong)context.CurrOp;
+
+            EmitVectorBinaryLongOpI32(context, (op1, op2) => context.Subtract(op1, op2), !op.U);
+        }
+
         public static void Vsubw_I(ArmEmitterContext context)
         {
             OpCode32SimdRegWide op = (OpCode32SimdRegWide)context.CurrOp;
@@ -1356,6 +1457,46 @@ namespace ARMeilleure.Instructions
             EmitVectorBinaryWideOpI32(context, (op1, op2) => context.Subtract(op1, op2), !op.U);
         }
 
+        private static void EmitSaturatingAddSubBinaryOp(ArmEmitterContext context, bool add, bool signed)
+        {
+            OpCode32Simd op = (OpCode32Simd)context.CurrOp;
+
+            EmitVectorBinaryOpI32(context, (ne, me) =>
+            {
+                if (op.Size <= 2)
+                {
+                    if (op.Size == 2)
+                    {
+                        ne = signed ? context.SignExtend32(OperandType.I64, ne) : context.ZeroExtend32(OperandType.I64, ne);
+                        me = signed ? context.SignExtend32(OperandType.I64, me) : context.ZeroExtend32(OperandType.I64, me);
+                    }
+
+                    Operand res = add ? context.Add(ne, me) : context.Subtract(ne, me);
+
+                    res = EmitSatQ(context, res, 8 << op.Size, signedSrc: true, signed);
+
+                    if (op.Size == 2)
+                    {
+                        res = context.ConvertI64ToI32(res);
+                    }
+
+                    return res;
+                }
+                else if (add) /* if (op.Size == 3) */
+                {
+                    return signed
+                        ? EmitBinarySignedSatQAdd(context, ne, me)
+                        : EmitBinaryUnsignedSatQAdd(context, ne, me);
+                }
+                else /* if (sub) */
+                {
+                    return signed
+                        ? EmitBinarySignedSatQSub(context, ne, me)
+                        : EmitBinaryUnsignedSatQSub(context, ne, me);
+                }
+            }, signed);
+        }
+
         private static void EmitSse41MaxMinNumOpF32(ArmEmitterContext context, bool isMaxNum, bool scalar)
         {
             IOpCode32Simd op = (IOpCode32Simd)context.CurrOp;
diff --git a/ARMeilleure/Instructions/InstEmitSimdHelper.cs b/ARMeilleure/Instructions/InstEmitSimdHelper.cs
index 805656d20..27b5c1302 100644
--- a/ARMeilleure/Instructions/InstEmitSimdHelper.cs
+++ b/ARMeilleure/Instructions/InstEmitSimdHelper.cs
@@ -1281,7 +1281,7 @@ namespace ARMeilleure.Instructions
 
         public static void EmitSseOrAvxExitFtzAndDazModesOpF(ArmEmitterContext context, Operand isTrue = default)
         {
-            isTrue = isTrue == default 
+            isTrue = isTrue == default
                 ? context.Call(typeof(NativeInterface).GetMethod(nameof(NativeInterface.GetFpcrFz)))
                 : isTrue;
 
@@ -1612,7 +1612,7 @@ namespace ARMeilleure.Instructions
         }
 
         // long BinarySignedSatQAdd(long op1, long op2);
-        private static Operand EmitBinarySignedSatQAdd(ArmEmitterContext context, Operand op1, Operand op2)
+        public static Operand EmitBinarySignedSatQAdd(ArmEmitterContext context, Operand op1, Operand op2)
         {
             Debug.Assert(op1.Type == OperandType.I64 && op2.Type == OperandType.I64);
 
@@ -1639,7 +1639,7 @@ namespace ARMeilleure.Instructions
         }
 
         // ulong BinaryUnsignedSatQAdd(ulong op1, ulong op2);
-        private static Operand EmitBinaryUnsignedSatQAdd(ArmEmitterContext context, Operand op1, Operand op2)
+        public static Operand EmitBinaryUnsignedSatQAdd(ArmEmitterContext context, Operand op1, Operand op2)
         {
             Debug.Assert(op1.Type == OperandType.I64 && op2.Type == OperandType.I64);
 
@@ -1660,7 +1660,7 @@ namespace ARMeilleure.Instructions
         }
 
         // long BinarySignedSatQSub(long op1, long op2);
-        private static Operand EmitBinarySignedSatQSub(ArmEmitterContext context, Operand op1, Operand op2)
+        public static Operand EmitBinarySignedSatQSub(ArmEmitterContext context, Operand op1, Operand op2)
         {
             Debug.Assert(op1.Type == OperandType.I64 && op2.Type == OperandType.I64);
 
@@ -1687,7 +1687,7 @@ namespace ARMeilleure.Instructions
         }
 
         // ulong BinaryUnsignedSatQSub(ulong op1, ulong op2);
-        private static Operand EmitBinaryUnsignedSatQSub(ArmEmitterContext context, Operand op1, Operand op2)
+        public static Operand EmitBinaryUnsignedSatQSub(ArmEmitterContext context, Operand op1, Operand op2)
         {
             Debug.Assert(op1.Type == OperandType.I64 && op2.Type == OperandType.I64);
 
diff --git a/ARMeilleure/Instructions/InstEmitSimdHelper32.cs b/ARMeilleure/Instructions/InstEmitSimdHelper32.cs
index 07ff481c1..c530985fa 100644
--- a/ARMeilleure/Instructions/InstEmitSimdHelper32.cs
+++ b/ARMeilleure/Instructions/InstEmitSimdHelper32.cs
@@ -219,6 +219,25 @@ namespace ARMeilleure.Instructions
 
         // Integer
 
+        public static void EmitVectorUnaryAccumulateOpI32(ArmEmitterContext context, Func1I emit, bool signed)
+        {
+            OpCode32Simd op = (OpCode32Simd)context.CurrOp;
+
+            Operand res = GetVecA32(op.Qd);
+
+            int elems = op.GetBytesCount() >> op.Size;
+
+            for (int index = 0; index < elems; index++)
+            {
+                Operand de = EmitVectorExtract32(context, op.Qd, op.Id + index, op.Size, signed);
+                Operand me = EmitVectorExtract32(context, op.Qm, op.Im + index, op.Size, signed);
+
+                res = EmitVectorInsert(context, res, context.Add(de, emit(me)), op.Id + index, op.Size);
+            }
+
+            context.Copy(GetVecA32(op.Qd), res);
+        }
+
         public static void EmitVectorUnaryOpI32(ArmEmitterContext context, Func1I emit, bool signed)
         {
             OpCode32Simd op = (OpCode32Simd)context.CurrOp;
@@ -385,6 +404,18 @@ namespace ARMeilleure.Instructions
             EmitVectorUnaryOpI32(context, emit, true);
         }
 
+        public static void EmitVectorUnaryOpSx32(ArmEmitterContext context, Func1I emit, bool accumulate)
+        {
+            if (accumulate)
+            {
+                EmitVectorUnaryAccumulateOpI32(context, emit, true);
+            }
+            else
+            {
+                EmitVectorUnaryOpI32(context, emit, true);
+            }
+        }
+
         public static void EmitVectorBinaryOpSx32(ArmEmitterContext context, Func2I emit)
         {
             EmitVectorBinaryOpI32(context, emit, true);
@@ -400,6 +431,18 @@ namespace ARMeilleure.Instructions
             EmitVectorUnaryOpI32(context, emit, false);
         }
 
+        public static void EmitVectorUnaryOpZx32(ArmEmitterContext context, Func1I emit, bool accumulate)
+        {
+            if (accumulate)
+            {
+                EmitVectorUnaryAccumulateOpI32(context, emit, false);
+            }
+            else
+            {
+                EmitVectorUnaryOpI32(context, emit, false);
+            }
+        }
+
         public static void EmitVectorBinaryOpZx32(ArmEmitterContext context, Func2I emit)
         {
             EmitVectorBinaryOpI32(context, emit, false);
@@ -592,6 +635,34 @@ namespace ARMeilleure.Instructions
             context.Copy(GetVecA32(op.Qd), res);
         }
 
+        public static void EmitVectorPairwiseLongOpI32(ArmEmitterContext context, Func2I emit, bool signed)
+        {
+            OpCode32Simd op = (OpCode32Simd)context.CurrOp;
+
+            int elems = (op.Q ? 16 : 8) >> op.Size;
+            int pairs = elems >> 1;
+            int id = (op.Vd & 1) * pairs;
+
+            Operand res = GetVecA32(op.Qd);
+
+            for (int index = 0; index < pairs; index++)
+            {
+                int pairIndex = index << 1;
+                Operand m1 = EmitVectorExtract32(context, op.Qm, op.Im + pairIndex, op.Size, signed);
+                Operand m2 = EmitVectorExtract32(context, op.Qm, op.Im + pairIndex + 1, op.Size, signed);
+
+                if (op.Size == 2)
+                {
+                    m1 = signed ? context.SignExtend32(OperandType.I64, m1) : context.ZeroExtend32(OperandType.I64, m1);
+                    m2 = signed ? context.SignExtend32(OperandType.I64, m2) : context.ZeroExtend32(OperandType.I64, m2);
+                }
+
+                res = EmitVectorInsert(context, res, emit(m1, m2), id + index, op.Size + 1);
+            }
+
+            context.Copy(GetVecA32(op.Qd), res);
+        }
+
         // Narrow
 
         public static void EmitVectorUnaryNarrowOp32(ArmEmitterContext context, Func1I emit, bool signed = false)
diff --git a/ARMeilleure/Instructions/InstEmitSimdShift32.cs b/ARMeilleure/Instructions/InstEmitSimdShift32.cs
index 6dcfe065b..e0968b7b1 100644
--- a/ARMeilleure/Instructions/InstEmitSimdShift32.cs
+++ b/ARMeilleure/Instructions/InstEmitSimdShift32.cs
@@ -33,64 +33,24 @@ namespace ARMeilleure.Instructions
             EmitShrImmSaturatingNarrowOp(context, op.U ? ShrImmSaturatingNarrowFlags.VectorZxZx : ShrImmSaturatingNarrowFlags.VectorSxSx);
         }
 
+        public static void Vqshrun(ArmEmitterContext context)
+        {
+            EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxZx);
+        }
+
         public static void Vrshr(ArmEmitterContext context)
         {
-            OpCode32SimdShImm op = (OpCode32SimdShImm)context.CurrOp;
-            int shift = GetImmShr(op);
-            long roundConst = 1L << (shift - 1);
+            EmitRoundShrImmOp(context, accumulate: false);
+        }
 
-            if (op.U)
-            {
-                if (op.Size < 2)
-                {
-                    EmitVectorUnaryOpZx32(context, (op1) =>
-                    {
-                        op1 = context.Add(op1, Const(op1.Type, roundConst));
+        public static void Vrshrn(ArmEmitterContext context)
+        {
+            EmitRoundShrImmNarrowOp(context, signed: false);
+        }
 
-                        return context.ShiftRightUI(op1, Const(shift));
-                    });
-                }
-                else if (op.Size == 2)
-                {
-                    EmitVectorUnaryOpZx32(context, (op1) =>
-                    {
-                        op1 = context.ZeroExtend32(OperandType.I64, op1);
-                        op1 = context.Add(op1, Const(op1.Type, roundConst));
-
-                        return context.ConvertI64ToI32(context.ShiftRightUI(op1, Const(shift)));
-                    });
-                }
-                else /* if (op.Size == 3) */
-                {
-                    EmitVectorUnaryOpZx32(context, (op1) => EmitShrImm64(context, op1, signed: false, roundConst, shift));
-                }
-            }
-            else
-            {
-                if (op.Size < 2)
-                {
-                    EmitVectorUnaryOpSx32(context, (op1) =>
-                    {
-                        op1 = context.Add(op1, Const(op1.Type, roundConst));
-
-                        return context.ShiftRightSI(op1, Const(shift));
-                    });
-                }
-                else if (op.Size == 2)
-                {
-                    EmitVectorUnaryOpSx32(context, (op1) =>
-                    {
-                        op1 = context.SignExtend32(OperandType.I64, op1);
-                        op1 = context.Add(op1, Const(op1.Type, roundConst));
-
-                        return context.ConvertI64ToI32(context.ShiftRightSI(op1, Const(shift)));
-                    });
-                }
-                else /* if (op.Size == 3) */
-                {
-                    EmitVectorUnaryOpZx32(context, (op1) => EmitShrImm64(context, op1, signed: true, roundConst, shift));
-                }
-            }
+        public static void Vrsra(ArmEmitterContext context)
+        {
+            EmitRoundShrImmOp(context, accumulate: true);
         }
 
         public static void Vshl(ArmEmitterContext context)
@@ -191,6 +151,89 @@ namespace ARMeilleure.Instructions
             }
         }
 
+        public static void EmitRoundShrImmOp(ArmEmitterContext context, bool accumulate)
+        {
+            OpCode32SimdShImm op = (OpCode32SimdShImm)context.CurrOp;
+            int shift = GetImmShr(op);
+            long roundConst = 1L << (shift - 1);
+
+            if (op.U)
+            {
+                if (op.Size < 2)
+                {
+                    EmitVectorUnaryOpZx32(context, (op1) =>
+                    {
+                        op1 = context.Add(op1, Const(op1.Type, roundConst));
+
+                        return context.ShiftRightUI(op1, Const(shift));
+                    }, accumulate);
+                }
+                else if (op.Size == 2)
+                {
+                    EmitVectorUnaryOpZx32(context, (op1) =>
+                    {
+                        op1 = context.ZeroExtend32(OperandType.I64, op1);
+                        op1 = context.Add(op1, Const(op1.Type, roundConst));
+
+                        return context.ConvertI64ToI32(context.ShiftRightUI(op1, Const(shift)));
+                    }, accumulate);
+                }
+                else /* if (op.Size == 3) */
+                {
+                    EmitVectorUnaryOpZx32(context, (op1) => EmitShrImm64(context, op1, signed: false, roundConst, shift), accumulate);
+                }
+            }
+            else
+            {
+                if (op.Size < 2)
+                {
+                    EmitVectorUnaryOpSx32(context, (op1) =>
+                    {
+                        op1 = context.Add(op1, Const(op1.Type, roundConst));
+
+                        return context.ShiftRightSI(op1, Const(shift));
+                    }, accumulate);
+                }
+                else if (op.Size == 2)
+                {
+                    EmitVectorUnaryOpSx32(context, (op1) =>
+                    {
+                        op1 = context.SignExtend32(OperandType.I64, op1);
+                        op1 = context.Add(op1, Const(op1.Type, roundConst));
+
+                        return context.ConvertI64ToI32(context.ShiftRightSI(op1, Const(shift)));
+                    }, accumulate);
+                }
+                else /* if (op.Size == 3) */
+                {
+                    EmitVectorUnaryOpZx32(context, (op1) => EmitShrImm64(context, op1, signed: true, roundConst, shift), accumulate);
+                }
+            }
+        }
+
+        private static void EmitRoundShrImmNarrowOp(ArmEmitterContext context, bool signed)
+        {
+            OpCode32SimdShImm op = (OpCode32SimdShImm)context.CurrOp;
+
+            int shift = GetImmShr(op);
+            long roundConst = 1L << (shift - 1);
+
+            EmitVectorUnaryNarrowOp32(context, (op1) =>
+            {
+                if (op.Size <= 1)
+                {
+                    op1 = context.Add(op1, Const(op1.Type, roundConst));
+                    op1 = signed ? context.ShiftRightSI(op1, Const(shift)) : context.ShiftRightUI(op1, Const(shift));
+                }
+                else /* if (op.Size == 2 && round) */
+                {
+                    op1 = EmitShrImm64(context, op1, signed, roundConst, shift); // shift <= 32
+                }
+
+                return op1;
+            }, signed);
+        }
+
         private static Operand EmitShlRegOp(ArmEmitterContext context, Operand op, Operand shiftLsB, int size, bool unsigned)
         {
             if (shiftLsB.Type == OperandType.I64)
@@ -289,7 +332,7 @@ namespace ARMeilleure.Instructions
                     op1 = EmitShrImm64(context, op1, signedSrc, roundConst, shift); // shift <= 32
                 }
 
-                return EmitSatQ(context, op1, 8 << op.Size, signedDst);
+                return EmitSatQ(context, op1, 8 << op.Size, signedSrc, signedDst);
             }, signedSrc);
         }
 
@@ -313,15 +356,20 @@ namespace ARMeilleure.Instructions
             return context.Call(info, value, Const(roundConst), Const(shift));
         }
 
-        private static Operand EmitSatQ(ArmEmitterContext context, Operand value, int eSize, bool signed)
+        private static Operand EmitSatQ(ArmEmitterContext context, Operand value, int eSize, bool signedSrc, bool signedDst)
         {
             Debug.Assert(eSize <= 32);
 
-            long intMin = signed ? -(1L << (eSize - 1)) : 0;
-            long intMax = signed ? (1L << (eSize - 1)) - 1 : (1L << eSize) - 1;
+            long intMin = signedDst ? -(1L << (eSize - 1)) : 0;
+            long intMax = signedDst ? (1L << (eSize - 1)) - 1 : (1L << eSize) - 1;
 
-            Operand gt = context.ICompareGreater(value, Const(value.Type, intMax));
-            Operand lt = context.ICompareLess(value, Const(value.Type, intMin));
+            Operand gt = signedSrc
+                ? context.ICompareGreater(value, Const(value.Type, intMax))
+                : context.ICompareGreaterUI(value, Const(value.Type, intMax));
+
+            Operand lt = signedSrc
+                ? context.ICompareLess(value, Const(value.Type, intMin))
+                : context.ICompareLessUI(value, Const(value.Type, intMin));
 
             value = context.ConditionalSelect(gt, Const(value.Type, intMax), value);
             value = context.ConditionalSelect(lt, Const(value.Type, intMin), value);
diff --git a/ARMeilleure/Instructions/InstEmitSystem32.cs b/ARMeilleure/Instructions/InstEmitSystem32.cs
index 674a44382..acd17045f 100644
--- a/ARMeilleure/Instructions/InstEmitSystem32.cs
+++ b/ARMeilleure/Instructions/InstEmitSystem32.cs
@@ -16,18 +16,13 @@ namespace ARMeilleure.Instructions
         {
             OpCode32System op = (OpCode32System)context.CurrOp;
 
-            if (op.Coproc != 15)
+            if (op.Coproc != 15 || op.Opc1 != 0)
             {
                 InstEmit.Und(context);
 
                 return;
             }
 
-            if (op.Opc1 != 0)
-            {
-                throw new NotImplementedException($"Unknown MRC Opc1 0x{op.Opc1:X16} at 0x{op.Address:X16}.");
-            }
-
             MethodInfo info;
 
             switch (op.CRn)
@@ -35,7 +30,7 @@ namespace ARMeilleure.Instructions
                 case 13: // Process and Thread Info.
                     if (op.CRm != 0)
                     {
-                        throw new NotImplementedException($"Unknown MRC CRm 0x{op.CRm:X16} at 0x{op.Address:X16}.");
+                        throw new NotImplementedException($"Unknown MRC CRm 0x{op.CRm:X} at 0x{op.Address:X} (0x{op.RawOpCode:X}).");
                     }
 
                     switch (op.Opc2)
@@ -44,7 +39,7 @@ namespace ARMeilleure.Instructions
                             info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.SetTpidrEl032)); break;
 
                         default:
-                            throw new NotImplementedException($"Unknown MRC Opc2 0x{op.Opc2:X16} at 0x{op.Address:X16}.");
+                            throw new NotImplementedException($"Unknown MRC Opc2 0x{op.Opc2:X} at 0x{op.Address:X} (0x{op.RawOpCode:X}).");
                     }
 
                     break;
@@ -59,11 +54,11 @@ namespace ARMeilleure.Instructions
                                     return; // No-op.
 
                                 default:
-                                    throw new NotImplementedException($"Unknown MRC Opc2 0x{op.Opc2:X16} at 0x{op.Address:X16}.");
+                                    throw new NotImplementedException($"Unknown MRC Opc2 0x{op.Opc2:X16} at 0x{op.Address:X16} (0x{op.RawOpCode:X}).");
                             }
 
                         default:
-                            throw new NotImplementedException($"Unknown MRC CRm 0x{op.CRm:X16} at 0x{op.Address:X16}.");
+                            throw new NotImplementedException($"Unknown MRC CRm 0x{op.CRm:X16} at 0x{op.Address:X16} (0x{op.RawOpCode:X}).");
                     }
 
                 default:
@@ -77,18 +72,13 @@ namespace ARMeilleure.Instructions
         {
             OpCode32System op = (OpCode32System)context.CurrOp;
 
-            if (op.Coproc != 15)
+            if (op.Coproc != 15 || op.Opc1 != 0)
             {
                 InstEmit.Und(context);
 
                 return;
             }
 
-            if (op.Opc1 != 0)
-            {
-                throw new NotImplementedException($"Unknown MRC Opc1 0x{op.Opc1:X16} at 0x{op.Address:X16}.");
-            }
-
             MethodInfo info;
 
             switch (op.CRn)
@@ -96,7 +86,7 @@ namespace ARMeilleure.Instructions
                 case 13: // Process and Thread Info.
                     if (op.CRm != 0)
                     {
-                        throw new NotImplementedException($"Unknown MRC CRm 0x{op.CRm:X16} at 0x{op.Address:X16}.");
+                        throw new NotImplementedException($"Unknown MRC CRm 0x{op.CRm:X} at 0x{op.Address:X} (0x{op.RawOpCode:X}).");
                     }
 
                     switch (op.Opc2)
@@ -108,13 +98,13 @@ namespace ARMeilleure.Instructions
                             info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.GetTpidr32)); break;
 
                         default:
-                            throw new NotImplementedException($"Unknown MRC Opc2 0x{op.Opc2:X16} at 0x{op.Address:X16}.");
+                            throw new NotImplementedException($"Unknown MRC Opc2 0x{op.Opc2:X} at 0x{op.Address:X} (0x{op.RawOpCode:X}).");
                     }
 
                     break;
 
                 default:
-                    throw new NotImplementedException($"Unknown MRC 0x{op.RawOpCode:X8} at 0x{op.Address:X16}.");
+                    throw new NotImplementedException($"Unknown MRC 0x{op.RawOpCode:X} at 0x{op.Address:X}.");
             }
 
             if (op.Rt == RegisterAlias.Aarch32Pc)
@@ -154,13 +144,13 @@ namespace ARMeilleure.Instructions
                             info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.GetCntpctEl0)); break;
 
                         default:
-                            throw new NotImplementedException($"Unknown MRRC Opc1 0x{opc:X16} at 0x{op.Address:X16}.");
+                            throw new NotImplementedException($"Unknown MRRC Opc1 0x{opc:X} at 0x{op.Address:X} (0x{op.RawOpCode:X}).");
                     }
 
                     break;
 
                 default:
-                    throw new NotImplementedException($"Unknown MRRC 0x{op.RawOpCode:X8} at 0x{op.Address:X16}.");
+                    throw new NotImplementedException($"Unknown MRRC 0x{op.RawOpCode:X} at 0x{op.Address:X}.");
             }
 
             Operand result = context.Call(info);
@@ -265,7 +255,7 @@ namespace ARMeilleure.Instructions
                 case 0b1000: // FPEXC
                     throw new NotImplementedException("Supervisor Only");
                 default:
-                    throw new NotImplementedException($"Unknown VMRS 0x{op.RawOpCode:X8} at 0x{op.Address:X16}.");
+                    throw new NotImplementedException($"Unknown VMRS 0x{op.RawOpCode:X} at 0x{op.Address:X}.");
             }
         }
 
@@ -288,7 +278,7 @@ namespace ARMeilleure.Instructions
                 case 0b1000: // FPEXC
                     throw new NotImplementedException("Supervisor Only");
                 default:
-                    throw new NotImplementedException($"Unknown VMSR 0x{op.RawOpCode:X8} at 0x{op.Address:X16}.");
+                    throw new NotImplementedException($"Unknown VMSR 0x{op.RawOpCode:X} at 0x{op.Address:X}.");
             }
         }
 
diff --git a/ARMeilleure/Instructions/InstName.cs b/ARMeilleure/Instructions/InstName.cs
index c667ba5f2..0d63820b7 100644
--- a/ARMeilleure/Instructions/InstName.cs
+++ b/ARMeilleure/Instructions/InstName.cs
@@ -601,6 +601,7 @@ namespace ARMeilleure.Instructions
         Vmin,
         Vminnm,
         Vmla,
+        Vmlal,
         Vmls,
         Vmlsl,
         Vmov,
@@ -618,15 +619,24 @@ namespace ARMeilleure.Instructions
         Vorn,
         Vorr,
         Vpadd,
+        Vpaddl,
         Vpmax,
         Vpmin,
+        Vqadd,
+        Vqdmulh,
+        Vqmovn,
+        Vqmovun,
         Vqrshrn,
         Vqrshrun,
         Vqshrn,
+        Vqshrun,
+        Vqsub,
         Vrev,
+        Vrhadd,
         Vrint,
         Vrintx,
         Vrshr,
+        Vrshrn,
         Vsel,
         Vshl,
         Vshll,
@@ -643,8 +653,10 @@ namespace ARMeilleure.Instructions
         Vrecps,
         Vrsqrte,
         Vrsqrts,
+        Vrsra,
         Vsra,
         Vsub,
+        Vsubl,
         Vsubw,
         Vtbl,
         Vtrn,
diff --git a/ARMeilleure/Translation/PTC/Ptc.cs b/ARMeilleure/Translation/PTC/Ptc.cs
index c060f3a28..dd24e3733 100644
--- a/ARMeilleure/Translation/PTC/Ptc.cs
+++ b/ARMeilleure/Translation/PTC/Ptc.cs
@@ -27,7 +27,7 @@ namespace ARMeilleure.Translation.PTC
         private const string OuterHeaderMagicString = "PTCohd\0\0";
         private const string InnerHeaderMagicString = "PTCihd\0\0";
 
-        private const uint InternalVersion = 3666; //! To be incremented manually for each change to the ARMeilleure project.
+        private const uint InternalVersion = 3677; //! To be incremented manually for each change to the ARMeilleure project.
 
         private const string ActualDir = "0";
         private const string BackupDir = "1";
diff --git a/Ryujinx.Tests/Cpu/CpuTestSimd32.cs b/Ryujinx.Tests/Cpu/CpuTestSimd32.cs
index 34e94068b..34173cd7d 100644
--- a/Ryujinx.Tests/Cpu/CpuTestSimd32.cs
+++ b/Ryujinx.Tests/Cpu/CpuTestSimd32.cs
@@ -12,12 +12,22 @@ namespace Ryujinx.Tests.Cpu
 #if Simd32
 
 #region "ValueSource (Opcodes)"
-        private static uint[] _Vabs_Vneg_V_()
+        private static uint[] _Vabs_Vneg_Vpaddl_I_()
         {
             return new uint[]
             {
-                0xf3b10300u, // VABS.S8 D0, D0
-                0xf3b10380u  // VNEG.S8 D0, D0
+                0xf3b10300u, // VABS.S8   D0, D0
+                0xf3b10380u, // VNEG.S8   D0, D0
+                0xf3b00200u  // VPADDL.S8 D0, D0
+            };
+        }
+
+        private static uint[] _Vabs_Vneg_F_()
+        {
+            return new uint[]
+            {
+                0xf3b90700u, // VABS.F32 D0, D0
+                0xf3b90780u  // VNEG.F32 D0, D0
             };
         }
 #endregion
@@ -201,40 +211,14 @@ namespace Ryujinx.Tests.Cpu
         }
 
         [Test, Pairwise]
-        public void Vabs_Vneg_V_S8_S16_S32([ValueSource("_Vabs_Vneg_V_")] uint opcode,
-                                           [Range(0u, 3u)] uint rd,
-                                           [Range(0u, 3u)] uint rm,
-                                           [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong z,
-                                           [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong b,
-                                           [Values(0u, 1u, 2u)] uint size, // <S8, S16, S32>
-                                           [Values] bool q)
+        public void Vabs_Vneg_Vpaddl_V_I([ValueSource("_Vabs_Vneg_Vpaddl_I_")] uint opcode,
+                                         [Range(0u, 3u)] uint rd,
+                                         [Range(0u, 3u)] uint rm,
+                                         [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong z,
+                                         [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong b,
+                                         [Values(0u, 1u, 2u)] uint size, // <S8, S16, S32>
+                                         [Values] bool q)
         {
-            const bool f = false;
-
-            Vabs_Vneg_V(opcode, rd, rm, z, b, size, f, q);
-        }
-
-        [Test, Pairwise]
-        public void Vabs_Vneg_V_F32([ValueSource("_Vabs_Vneg_V_")] uint opcode,
-                                    [Range(0u, 3u)] uint rd,
-                                    [Range(0u, 3u)] uint rm,
-                                    [ValueSource("_2S_F_")] ulong z,
-                                    [ValueSource("_2S_F_")] ulong b,
-                                    [Values] bool q)
-        {
-            const uint size = 0b10; // <F32>
-            const bool f = true;
-
-            Vabs_Vneg_V(opcode, rd, rm, z, b, size, f, q);
-        }
-
-        private void Vabs_Vneg_V(uint opcode, uint rd, uint rm, ulong z, ulong b, uint size, bool f, bool q)
-        {
-            if (f)
-            {
-                opcode |= 1 << 10;
-            }
-
             if (q)
             {
                 opcode |= 1 << 6;
@@ -256,6 +240,33 @@ namespace Ryujinx.Tests.Cpu
             CompareAgainstUnicorn();
         }
 
+        [Test, Pairwise]
+        public void Vabs_Vneg_V_F32([ValueSource("_Vabs_Vneg_F_")] uint opcode,
+                                    [Range(0u, 3u)] uint rd,
+                                    [Range(0u, 3u)] uint rm,
+                                    [ValueSource("_2S_F_")] ulong z,
+                                    [ValueSource("_2S_F_")] ulong b,
+                                    [Values] bool q)
+        {
+            if (q)
+            {
+                opcode |= 1 << 6;
+
+                rd >>= 1; rd <<= 1;
+                rm >>= 1; rm <<= 1;
+            }
+
+            opcode |= ((rd & 0xf) << 12) | ((rd & 0x10) << 18);
+            opcode |= ((rm & 0xf) << 0)  | ((rm & 0x10) << 1);
+
+            V128 v0 = MakeVectorE0E1(z, ~z);
+            V128 v1 = MakeVectorE0E1(b, ~b);
+
+            SingleOpcode(opcode, v0: v0, v1: v1);
+
+            CompareAgainstUnicorn();
+        }
+
         [Test, Pairwise, Description("VCNT.8 D0, D0 | VCNT.8 Q0, Q0")]
         public void Vcnt([Values(0u, 1u)] uint rd,
                          [Values(0u, 1u)] uint rm,
@@ -283,6 +294,32 @@ namespace Ryujinx.Tests.Cpu
 
             CompareAgainstUnicorn();
         }
+
+        [Test, Pairwise]
+        public void Vmovn_V([Range(0u, 3u)] uint rd,
+                            [Range(0u, 3u)] uint rm,
+                            [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong z,
+                            [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong b,
+                            [Values(0u, 1u, 2u, 3u)] uint op,
+                            [Values(0u, 1u, 2u)] uint size) // <S8, S16, S32>
+        {
+            rm >>= 1; rm <<= 1;
+
+            uint opcode = 0xf3b20200u; // VMOVN.S16 D0, Q0
+
+            opcode |= ((rd & 0xf) << 12) | ((rd & 0x10) << 18);
+            opcode |= ((rm & 0xf) << 0)  | ((rm & 0x10) << 1);
+
+            opcode |= (op & 0x3) << 6;
+            opcode |= (size & 0x3) << 18;
+
+            V128 v0 = MakeVectorE0E1(z, ~z);
+            V128 v1 = MakeVectorE0E1(b, ~b);
+
+            SingleOpcode(opcode, v0: v0, v1: v1);
+
+            CompareAgainstUnicorn();
+        }
 #endif
     }
 }
diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdReg32.cs b/Ryujinx.Tests/Cpu/CpuTestSimdReg32.cs
index 0635d8fc3..93fc658aa 100644
--- a/Ryujinx.Tests/Cpu/CpuTestSimdReg32.cs
+++ b/Ryujinx.Tests/Cpu/CpuTestSimdReg32.cs
@@ -12,11 +12,13 @@ namespace Ryujinx.Tests.Cpu
 #if SimdReg32
 
 #region "ValueSource (Opcodes)"
-        private static uint[] _V_Add_Sub_Wide_I_()
+        private static uint[] _V_Add_Sub_Long_Wide_I_()
         {
             return new uint[]
             {
+                0xf2800000u, // VADDL.S8 Q0, D0, D0
                 0xf2800100u, // VADDW.S8 Q0, Q0, D0
+                0xf2800200u, // VSUBL.S8 Q0, D0, D0
                 0xf2800300u  // VSUBW.S8 Q0, Q0, D0
             };
         }
@@ -74,6 +76,15 @@ namespace Ryujinx.Tests.Cpu
             };
         }
 
+        private static uint[] _Vmlal_Vmlsl_V_I_()
+        {
+            return new uint[]
+            {
+                0xf2800800u, // VMLAL.S8 Q0, D0, D0
+                0xf2800a00u  // VMLSL.S8 Q0, D0, D0
+            };
+        }
+
         private static uint[] _Vp_Add_Max_Min_F_()
         {
             return new uint[]
@@ -84,16 +95,30 @@ namespace Ryujinx.Tests.Cpu
             };
         }
 
-        // VPADD does not have an unsigned flag, so we check the opcode before setting it.
-        private static uint VpaddI8 = 0xf2000b10u; // VPADD.I8 D0, D0, D0
-
-        private static uint[] _Vp_Add_Max_Min_I_()
+        private static uint[] _Vp_Add_I_()
         {
             return new uint[]
             {
-                VpaddI8,
-                0xf2000a00u, // VPMAX.S8 D0, D0, D0
-                0xf2000a10u  // VPMIN.S8 D0, D0, D0
+                0xf2000b10u // VPADD.I8 D0, D0, D0
+            };
+        }
+
+        private static uint[] _V_Pmax_Pmin_Rhadd_I_()
+        {
+            return new uint[]
+            {
+                0xf2000a00u, // VPMAX .S8 D0, D0, D0
+                0xf2000a10u, // VPMIN .S8 D0, D0, D0
+                0xf2000100u, // VRHADD.S8 D0, D0, D0
+            };
+        }
+
+        private static uint[] _Vq_Add_Sub_I_()
+        {
+            return new uint[]
+            {
+                0xf2000050u, // VQADD.S8 Q0, Q0, Q0
+                0xf2000250u  // VQSUB.S8 Q0, Q0, Q0
             };
         }
 #endregion
@@ -350,7 +375,7 @@ namespace Ryujinx.Tests.Cpu
 
         [Explicit]
         [Test, Pairwise, Description("VADD.f32 V0, V0, V0")]
-        public void Vadd_f32([Values(0u)] uint rd,
+        public void Vadd_F32([Values(0u)] uint rd,
                              [Values(0u, 1u)] uint rn,
                              [Values(0u, 2u)] uint rm,
                              [ValueSource("_2S_F_")] ulong z0,
@@ -384,15 +409,15 @@ namespace Ryujinx.Tests.Cpu
         }
 
         [Test, Pairwise]
-        public void V_Add_Sub_Wide_I([ValueSource("_V_Add_Sub_Wide_I_")] uint opcode,
-                                     [Range(0u, 5u)] uint rd,
-                                     [Range(0u, 5u)] uint rn,
-                                     [Range(0u, 5u)] uint rm,
-                                     [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong z,
-                                     [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong a,
-                                     [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong b,
-                                     [Values(0u, 1u, 2u)] uint size, // <SU8, SU16, SU32>
-                                     [Values] bool u) // <S, U>
+        public void V_Add_Sub_Long_Wide_I([ValueSource("_V_Add_Sub_Long_Wide_I_")] uint opcode,
+                                          [Range(0u, 5u)] uint rd,
+                                          [Range(0u, 5u)] uint rn,
+                                          [Range(0u, 5u)] uint rm,
+                                          [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong z,
+                                          [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong a,
+                                          [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong b,
+                                          [Values(0u, 1u, 2u)] uint size, // <SU8, SU16, SU32>
+                                          [Values] bool u) // <S, U>
         {
             if (u)
             {
@@ -566,18 +591,17 @@ namespace Ryujinx.Tests.Cpu
             CompareAgainstUnicorn();
         }
 
-        [Test, Pairwise, Description("VMLSL.<type><size> <Vd>, <Vn>, <Vm>")]
-        public void Vmlsl_I([Values(0u)] uint rd,
-                            [Values(1u, 0u)] uint rn,
-                            [Values(2u, 0u)] uint rm,
-                            [Values(0u, 1u, 2u)] uint size,
-                            [Random(RndCnt)] ulong z,
-                            [Random(RndCnt)] ulong a,
-                            [Random(RndCnt)] ulong b,
-                            [Values] bool u)
+        [Test, Pairwise]
+        public void Vmlal_Vmlsl_I([ValueSource(nameof(_Vmlal_Vmlsl_V_I_))] uint opcode,
+                                  [Values(0u)] uint rd,
+                                  [Values(1u, 0u)] uint rn,
+                                  [Values(2u, 0u)] uint rm,
+                                  [Values(0u, 1u, 2u)] uint size,
+                                  [Random(RndCnt)] ulong z,
+                                  [Random(RndCnt)] ulong a,
+                                  [Random(RndCnt)] ulong b,
+                                  [Values] bool u)
         {
-            uint opcode = 0xf2800a00u; // VMLSL.S8 Q0, D0, D0
-
             opcode |= ((rm & 0xf) << 0)  | ((rm & 0x10) << 1);
             opcode |= ((rd & 0xf) << 12) | ((rd & 0x10) << 18);
             opcode |= ((rn & 0xf) << 16) | ((rn & 0x10) << 3);
@@ -736,17 +760,42 @@ namespace Ryujinx.Tests.Cpu
         }
 
         [Test, Pairwise]
-        public void Vp_Add_Max_Min_I([ValueSource("_Vp_Add_Max_Min_I_")] uint opcode,
-                                     [Values(0u)] uint rd,
-                                     [Range(0u, 5u)] uint rn,
-                                     [Range(0u, 5u)] uint rm,
-                                     [Values(0u, 1u, 2u)] uint size,
-                                     [Random(RndCnt)] ulong z,
-                                     [Random(RndCnt)] ulong a,
-                                     [Random(RndCnt)] ulong b,
-                                     [Values] bool u)
+        public void Vp_Add_I([ValueSource("_Vp_Add_I_")] uint opcode,
+                             [Values(0u)] uint rd,
+                             [Range(0u, 5u)] uint rn,
+                             [Range(0u, 5u)] uint rm,
+                             [Values(0u, 1u, 2u)] uint size,
+                             [Random(RndCnt)] ulong z,
+                             [Random(RndCnt)] ulong a,
+                             [Random(RndCnt)] ulong b)
         {
-            if (u && opcode != VpaddI8)
+            opcode |= ((rm & 0xf) << 0)  | ((rm & 0x10) << 1);
+            opcode |= ((rd & 0xf) << 12) | ((rd & 0x10) << 18);
+            opcode |= ((rn & 0xf) << 16) | ((rn & 0x10) << 3);
+
+            opcode |= size << 20;
+
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, z);
+            V128 v2 = MakeVectorE0E1(b, z);
+
+            SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
+
+            CompareAgainstUnicorn();
+        }
+
+        [Test, Pairwise]
+        public void V_Pmax_Pmin_Rhadd_I([ValueSource("_V_Pmax_Pmin_Rhadd_I_")] uint opcode,
+                                        [Values(0u)] uint rd,
+                                        [Range(0u, 5u)] uint rn,
+                                        [Range(0u, 5u)] uint rm,
+                                        [Values(0u, 1u, 2u)] uint size,
+                                        [Random(RndCnt)] ulong z,
+                                        [Random(RndCnt)] ulong a,
+                                        [Random(RndCnt)] ulong b,
+                                        [Values] bool u)
+        {
+            if (u)
             {
                 opcode |= 1 << 24;
             }
@@ -765,6 +814,71 @@ namespace Ryujinx.Tests.Cpu
 
             CompareAgainstUnicorn();
         }
+
+        [Test, Pairwise]
+        public void Vq_Add_Sub_I([ValueSource("_Vq_Add_Sub_I_")] uint opcode,
+                                 [Range(0u, 5u)] uint rd,
+                                 [Range(0u, 5u)] uint rn,
+                                 [Range(0u, 5u)] uint rm,
+                                 [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong z,
+                                 [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong a,
+                                 [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong b,
+                                 [Values(0u, 1u, 2u)] uint size, // <SU8, SU16, SU32>
+                                 [Values] bool u) // <S, U>
+        {
+            if (u)
+            {
+                opcode |= 1 << 24;
+            }
+
+            rd >>= 1; rd <<= 1;
+            rn >>= 1; rn <<= 1;
+            rm >>= 1; rm <<= 1;
+
+            opcode |= ((rd & 0xf) << 12) | ((rd & 0x10) << 18);
+            opcode |= ((rn & 0xf) << 16) | ((rn & 0x10) << 3);
+            opcode |= ((rm & 0xf) << 0)  | ((rm & 0x10) << 1);
+
+            opcode |= (size & 0x3) << 20;
+
+            V128 v0 = MakeVectorE0E1(z, ~z);
+            V128 v1 = MakeVectorE0E1(a, ~a);
+            V128 v2 = MakeVectorE0E1(b, ~b);
+
+            SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
+
+            CompareAgainstUnicorn();
+        }
+
+        [Test, Pairwise, Description("VQDMULH.<S16, S32> <Qd>, <Qn>, <Qm>")]
+        public void Vqdmulh_I([Range(0u, 5u)] uint rd,
+                              [Range(0u, 5u)] uint rn,
+                              [Range(0u, 5u)] uint rm,
+                              [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong z,
+                              [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong a,
+                              [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong b,
+                              [Values(1u, 2u)] uint size) // <S16, S32>
+        {
+            rd >>= 1; rd <<= 1;
+            rn >>= 1; rn <<= 1;
+            rm >>= 1; rm <<= 1;
+
+            uint opcode = 0xf2100b40u & ~(3u << 20); // VQDMULH.S16 Q0, Q0, Q0
+
+            opcode |= ((rd & 0xf) << 12) | ((rd & 0x10) << 18);
+            opcode |= ((rn & 0xf) << 16) | ((rn & 0x10) << 3);
+            opcode |= ((rm & 0xf) << 0)  | ((rm & 0x10) << 1);
+
+            opcode |= (size & 0x3) << 20;
+
+            V128 v0 = MakeVectorE0E1(z, ~z);
+            V128 v1 = MakeVectorE0E1(a, ~a);
+            V128 v2 = MakeVectorE0E1(b, ~b);
+
+            SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
+
+            CompareAgainstUnicorn();
+        }
 #endif
     }
 }
diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdShImm32.cs b/Ryujinx.Tests/Cpu/CpuTestSimdShImm32.cs
index a8c32d586..45481f854 100644
--- a/Ryujinx.Tests/Cpu/CpuTestSimdShImm32.cs
+++ b/Ryujinx.Tests/Cpu/CpuTestSimdShImm32.cs
@@ -41,9 +41,10 @@ namespace Ryujinx.Tests.Cpu
         {
             return new uint[]
             {
+                0xf2880010u, // VSHR.S8  D0, D0, #8
                 0xf2880110u, // VSRA.S8  D0, D0, #8
                 0xf2880210u, // VRSHR.S8 D0, D0, #8
-                0xf2880010u  // VSHR.S8  D0, D0, #8
+                0xf2880310u  // VRSRA.S8 D0, D0, #8
             };
         }
 
@@ -51,9 +52,10 @@ namespace Ryujinx.Tests.Cpu
         {
             return new uint[]
             {
+                0xf2900010u, // VSHR.S16  D0, D0, #16
                 0xf2900110u, // VSRA.S16  D0, D0, #16
                 0xf2900210u, // VRSHR.S16 D0, D0, #16
-                0xf2900010u  // VSHR.S16  D0, D0, #16
+                0xf2900310u  // VRSRA.S16 D0, D0, #16
             };
         }
 
@@ -61,9 +63,10 @@ namespace Ryujinx.Tests.Cpu
         {
             return new uint[]
             {
+                0xf2a00010u, // VSHR.S32  D0, D0, #32
                 0xf2a00110u, // VSRA.S32  D0, D0, #32
                 0xf2a00210u, // VRSHR.S32 D0, D0, #32
-                0xf2a00010u  // VSHR.S32  D0, D0, #32
+                0xf2a00310u  // VRSRA.S32 D0, D0, #32
             };
         }
 
@@ -76,6 +79,25 @@ namespace Ryujinx.Tests.Cpu
                 0xf2800090u  // VSHR.S64  D0, D0, #64
             };
         }
+
+        private static uint[] _Vqshrn_Vqrshrn_Vrshrn_Imm_()
+        {
+            return new uint[]
+            {
+                0xf2800910u, // VORR.I16 D0, #0 (immediate value changes it into QSHRN)
+                0xf2800950u, // VORR.I16 Q0, #0 (immediate value changes it into QRSHRN)
+                0xf2800850u  // VMOV.I16 Q0, #0 (immediate value changes it into RSHRN)
+            };
+        }
+
+        private static uint[] _Vqshrun_Vqrshrun_Imm_()
+        {
+            return new uint[]
+            {
+                0xf3800810u, // VMOV.I16 D0, #0x80 (immediate value changes it into QSHRUN)
+                0xf3800850u  // VMOV.I16 Q0, #0x80 (immediate value changes it into QRSHRUN)
+            };
+        }
 #endregion
 
         private const int RndCnt = 2;
@@ -230,18 +252,17 @@ namespace Ryujinx.Tests.Cpu
             CompareAgainstUnicorn();
         }
 
-        [Test, Pairwise, Description("VQRSHRN.<type><size> <Vd>, <Vm>, #<imm>")]
-        public void Vqrshrn_Imm([Values(0u, 1u)] uint rd,
-                                [Values(2u, 0u)] uint rm,
-                                [Values(0u, 1u, 2u)] uint size,
-                                [Random(RndCntShiftImm)] [Values(0u)] uint shiftImm,
-                                [Random(RndCnt)] ulong z,
-                                [Random(RndCnt)] ulong a,
-                                [Random(RndCnt)] ulong b,
-                                [Values] bool u)
+        [Test, Pairwise]
+        public void Vqshrn_Vqrshrn_Vrshrn_Imm([ValueSource("_Vqshrn_Vqrshrn_Vrshrn_Imm_")] uint opcode,
+                                              [Values(0u, 1u)] uint rd,
+                                              [Values(2u, 0u)] uint rm,
+                                              [Values(0u, 1u, 2u)] uint size,
+                                              [Random(RndCntShiftImm)] [Values(0u)] uint shiftImm,
+                                              [Random(RndCnt)] ulong z,
+                                              [Random(RndCnt)] ulong a,
+                                              [Random(RndCnt)] ulong b,
+                                              [Values] bool u)
         {
-            uint opcode = 0xf2800950u; // VORR.I16 Q0, #0 (immediate value changes it into QRSHRN)
-
             uint imm = 1u << ((int)size + 3);
             imm |= shiftImm & (imm - 1);
 
@@ -265,17 +286,16 @@ namespace Ryujinx.Tests.Cpu
             CompareAgainstUnicorn(fpsrMask: Fpsr.Qc);
         }
 
-        [Test, Pairwise, Description("VQRSHRUN.<type><size> <Vd>, <Vm>, #<imm>")]
-        public void Vqrshrun_Imm([Values(0u, 1u)] uint rd,
-                                 [Values(2u, 0u)] uint rm,
-                                 [Values(0u, 1u, 2u)] uint size,
-                                 [Random(RndCntShiftImm)] [Values(0u)] uint shiftImm,
-                                 [Random(RndCnt)] ulong z,
-                                 [Random(RndCnt)] ulong a,
-                                 [Random(RndCnt)] ulong b)
+        [Test, Pairwise]
+        public void Vqshrun_Vqrshrun_Imm([ValueSource("_Vqshrun_Vqrshrun_Imm_")] uint opcode,
+                                         [Values(0u, 1u)] uint rd,
+                                         [Values(2u, 0u)] uint rm,
+                                         [Values(0u, 1u, 2u)] uint size,
+                                         [Random(RndCntShiftImm)] [Values(0u)] uint shiftImm,
+                                         [Random(RndCnt)] ulong z,
+                                         [Random(RndCnt)] ulong a,
+                                         [Random(RndCnt)] ulong b)
         {
-            uint opcode = 0xf3800850u; // VMOV.I16 Q0, #0x80 (immediate value changes it into QRSHRUN)
-
             uint imm = 1u << ((int)size + 3);
             imm |= shiftImm & (imm - 1);