diff --git a/Ryujinx.Graphics/Gal/GalPipelineState.cs b/Ryujinx.Graphics/Gal/GalPipelineState.cs
index 8837eb8c4..1bfe2684f 100644
--- a/Ryujinx.Graphics/Gal/GalPipelineState.cs
+++ b/Ryujinx.Graphics/Gal/GalPipelineState.cs
@@ -26,6 +26,7 @@
         public GalComparisonOp DepthFunc;
 
         public bool StencilTestEnabled;
+        public bool StencilTwoSideEnabled;
 
         public GalComparisonOp StencilBackFuncFunc;
         public int StencilBackFuncRef;
@@ -52,6 +53,11 @@
         public GalBlendFactor BlendFuncSrcAlpha;
         public GalBlendFactor BlendFuncDstAlpha;
 
+        public bool ColorMaskR;
+        public bool ColorMaskG;
+        public bool ColorMaskB;
+        public bool ColorMaskA;
+
         public bool PrimitiveRestartEnabled;
         public uint PrimitiveRestartIndex;
 
diff --git a/Ryujinx.Graphics/Gal/OpenGL/OGLPipeline.cs b/Ryujinx.Graphics/Gal/OpenGL/OGLPipeline.cs
index 96da17f8e..b7825996e 100644
--- a/Ryujinx.Graphics/Gal/OpenGL/OGLPipeline.cs
+++ b/Ryujinx.Graphics/Gal/OpenGL/OGLPipeline.cs
@@ -195,6 +195,11 @@ namespace Ryujinx.Graphics.Gal.OpenGL
                 Enable(EnableCap.StencilTest, New.StencilTestEnabled);
             }
 
+            if (New.StencilTwoSideEnabled != Old.StencilTwoSideEnabled)
+            {
+                Enable((EnableCap)All.StencilTestTwoSideExt, New.StencilTwoSideEnabled);
+            }
+
             if (New.StencilTestEnabled)
             {
                 if (New.StencilBackFuncFunc != Old.StencilBackFuncFunc ||
@@ -298,6 +303,18 @@ namespace Ryujinx.Graphics.Gal.OpenGL
                 }
             }
 
+            if (New.ColorMaskR != Old.ColorMaskR ||
+                New.ColorMaskG != Old.ColorMaskG ||
+                New.ColorMaskB != Old.ColorMaskB ||
+                New.ColorMaskA != Old.ColorMaskA)
+            {
+                GL.ColorMask(
+                    New.ColorMaskR,
+                    New.ColorMaskG,
+                    New.ColorMaskB,
+                    New.ColorMaskA);
+            }
+
             if (New.PrimitiveRestartEnabled != Old.PrimitiveRestartEnabled)
             {
                 Enable(EnableCap.PrimitiveRestart, New.PrimitiveRestartEnabled);
diff --git a/Ryujinx.Graphics/Gal/Shader/GlslDecompiler.cs b/Ryujinx.Graphics/Gal/Shader/GlslDecompiler.cs
index 11f044494..d0f9223b9 100644
--- a/Ryujinx.Graphics/Gal/Shader/GlslDecompiler.cs
+++ b/Ryujinx.Graphics/Gal/Shader/GlslDecompiler.cs
@@ -360,7 +360,7 @@ namespace Ryujinx.Graphics.Gal.Shader
 
         private void PrintDeclSsy()
         {
-            SB.AppendLine("uint " + GlslDecl.SsyCursorName + "= 0;");
+            SB.AppendLine("uint " + GlslDecl.SsyCursorName + " = 0;");
 
             SB.AppendLine("uint " + GlslDecl.SsyStackName + "[" + GlslDecl.SsyStackSize + "];" + Environment.NewLine);
         }
diff --git a/Ryujinx.Graphics/Gal/Shader/ShaderDecodeAlu.cs b/Ryujinx.Graphics/Gal/Shader/ShaderDecodeAlu.cs
index 0a3c0da98..d4a76bc93 100644
--- a/Ryujinx.Graphics/Gal/Shader/ShaderDecodeAlu.cs
+++ b/Ryujinx.Graphics/Gal/Shader/ShaderDecodeAlu.cs
@@ -585,6 +585,7 @@ namespace Ryujinx.Graphics.Gal.Shader
             bool AbsA = OpCode.Read(46);
             bool NegA = OpCode.Read(48);
             bool AbsB = OpCode.Read(49);
+            bool Sat  = OpCode.Read(50);
 
             ShaderIrNode OperA = OpCode.Gpr8(), OperB;
 
@@ -603,12 +604,13 @@ namespace Ryujinx.Graphics.Gal.Shader
 
             ShaderIrNode Op = new ShaderIrOp(ShaderIrInst.Fadd, OperA, OperB);
 
-            Block.AddNode(OpCode.PredNode(new ShaderIrAsg(OpCode.Gpr0(), Op)));
+            Block.AddNode(OpCode.PredNode(new ShaderIrAsg(OpCode.Gpr0(), GetAluFsat(Op, Sat))));
         }
 
         private static void EmitFmul(ShaderIrBlock Block, long OpCode, ShaderOper Oper)
         {
             bool NegB = OpCode.Read(48);
+            bool Sat  = OpCode.Read(50);
 
             ShaderIrNode OperA = OpCode.Gpr8(), OperB;
 
@@ -625,13 +627,14 @@ namespace Ryujinx.Graphics.Gal.Shader
 
             ShaderIrNode Op = new ShaderIrOp(ShaderIrInst.Fmul, OperA, OperB);
 
-            Block.AddNode(OpCode.PredNode(new ShaderIrAsg(OpCode.Gpr0(), Op)));
+            Block.AddNode(OpCode.PredNode(new ShaderIrAsg(OpCode.Gpr0(), GetAluFsat(Op, Sat))));
         }
 
         private static void EmitFfma(ShaderIrBlock Block, long OpCode, ShaderOper Oper)
         {
             bool NegB = OpCode.Read(48);
             bool NegC = OpCode.Read(49);
+            bool Sat  = OpCode.Read(50);
 
             ShaderIrNode OperA = OpCode.Gpr8(), OperB, OperC;
 
@@ -658,7 +661,7 @@ namespace Ryujinx.Graphics.Gal.Shader
 
             ShaderIrOp Op = new ShaderIrOp(ShaderIrInst.Ffma, OperA, OperB, OperC);
 
-            Block.AddNode(OpCode.PredNode(new ShaderIrAsg(OpCode.Gpr0(), Op)));
+            Block.AddNode(OpCode.PredNode(new ShaderIrAsg(OpCode.Gpr0(), GetAluFsat(Op, Sat))));
         }
 
         private static void EmitIadd(ShaderIrBlock Block, long OpCode, ShaderOper Oper)
diff --git a/Ryujinx.Graphics/Gal/Shader/ShaderDecodeHelper.cs b/Ryujinx.Graphics/Gal/Shader/ShaderDecodeHelper.cs
index ebacd53ab..d07bcd917 100644
--- a/Ryujinx.Graphics/Gal/Shader/ShaderDecodeHelper.cs
+++ b/Ryujinx.Graphics/Gal/Shader/ShaderDecodeHelper.cs
@@ -2,6 +2,9 @@ namespace Ryujinx.Graphics.Gal.Shader
 {
     static class ShaderDecodeHelper
     {
+        private static readonly ShaderIrOperImmf ImmfZero = new ShaderIrOperImmf(0);
+        private static readonly ShaderIrOperImmf ImmfOne  = new ShaderIrOperImmf(1);
+
         public static ShaderIrNode GetAluFabsFneg(ShaderIrNode Node, bool Abs, bool Neg)
         {
             return GetAluFneg(GetAluFabs(Node, Abs), Neg);
@@ -17,6 +20,11 @@ namespace Ryujinx.Graphics.Gal.Shader
             return Neg ? new ShaderIrOp(ShaderIrInst.Fneg, Node) : Node;
         }
 
+        public static ShaderIrNode GetAluFsat(ShaderIrNode Node, bool Sat)
+        {
+            return Sat ? new ShaderIrOp(ShaderIrInst.Fclamp, Node, ImmfZero, ImmfOne) : Node;
+        }
+
         public static ShaderIrNode GetAluIabsIneg(ShaderIrNode Node, bool Abs, bool Neg)
         {
             return GetAluIneg(GetAluIabs(Node, Abs), Neg);
diff --git a/Ryujinx.Graphics/NvGpuEngine3d.cs b/Ryujinx.Graphics/NvGpuEngine3d.cs
index 3dd7746da..a2a969280 100644
--- a/Ryujinx.Graphics/NvGpuEngine3d.cs
+++ b/Ryujinx.Graphics/NvGpuEngine3d.cs
@@ -97,7 +97,8 @@ namespace Ryujinx.Graphics
             SetCullFace(State);
             SetDepth(State);
             SetStencil(State);
-            SetAlphaBlending(State);
+            SetBlending(State);
+            SetColorMask(State);
             SetPrimitiveRestart(State);
 
             for (int FbIndex = 0; FbIndex < 8; FbIndex++)
@@ -403,7 +404,7 @@ namespace Ryujinx.Graphics
             }
         }
 
-        private void SetAlphaBlending(GalPipelineState State)
+        private void SetBlending(GalPipelineState State)
         {
             //TODO: Support independent blend properly.
             State.BlendEnabled = ReadRegisterBool(NvGpuEngine3dReg.IBlendNEnable);
@@ -421,6 +422,16 @@ namespace Ryujinx.Graphics
             }
         }
 
+        private void SetColorMask(GalPipelineState State)
+        {
+            int ColorMask = ReadRegister(NvGpuEngine3dReg.ColorMask);
+
+            State.ColorMaskR = ((ColorMask >> 0)  & 0xf) != 0;
+            State.ColorMaskG = ((ColorMask >> 4)  & 0xf) != 0;
+            State.ColorMaskB = ((ColorMask >> 8)  & 0xf) != 0;
+            State.ColorMaskA = ((ColorMask >> 12) & 0xf) != 0;
+        }
+
         private void SetPrimitiveRestart(GalPipelineState State)
         {
             State.PrimitiveRestartEnabled = ReadRegisterBool(NvGpuEngine3dReg.PrimRestartEnable);
diff --git a/Ryujinx.Graphics/NvGpuEngine3dReg.cs b/Ryujinx.Graphics/NvGpuEngine3dReg.cs
index 418e5b6b4..ba211313e 100644
--- a/Ryujinx.Graphics/NvGpuEngine3dReg.cs
+++ b/Ryujinx.Graphics/NvGpuEngine3dReg.cs
@@ -78,6 +78,7 @@ namespace Ryujinx.Graphics
         CullFaceEnable       = 0x646,
         FrontFace            = 0x647,
         CullFace             = 0x648,
+        ColorMask            = 0x680,
         QueryAddress         = 0x6c0,
         QuerySequence        = 0x6c2,
         QueryControl         = 0x6c3,
diff --git a/Ryujinx.Graphics/Texture/ImageUtils.cs b/Ryujinx.Graphics/Texture/ImageUtils.cs
index 1b043245e..e1f370cda 100644
--- a/Ryujinx.Graphics/Texture/ImageUtils.cs
+++ b/Ryujinx.Graphics/Texture/ImageUtils.cs
@@ -235,18 +235,23 @@ namespace Ryujinx.Graphics.Texture
 
             int BytesPerPixel = Desc.BytesPerPixel;
 
-            int OutOffs = 0;
+            //Note: Each row of the texture needs to be aligned to 4 bytes.
+            int Pitch = (Width * BytesPerPixel + 3) & ~3;
 
-            byte[] Data = new byte[Width * Height * BytesPerPixel];
+            byte[] Data = new byte[Height * Pitch];
 
             for (int Y = 0; Y < Height; Y++)
-            for (int X = 0; X < Width;  X++)
             {
-                long Offset = (uint)Swizzle.GetSwizzleOffset(X, Y);
+                int OutOffs = Y * Pitch;
 
-                CpuMemory.ReadBytes(Position + Offset, Data, OutOffs, BytesPerPixel);
+                for (int X = 0; X < Width;  X++)
+                {
+                    long Offset = (uint)Swizzle.GetSwizzleOffset(X, Y);
 
-                OutOffs += BytesPerPixel;
+                    CpuMemory.ReadBytes(Position + Offset, Data, OutOffs, BytesPerPixel);
+
+                    OutOffs += BytesPerPixel;
+                }
             }
 
             return Data;