From fc2d5086e7cbf9463195381f686c2ba57bcc09e8 Mon Sep 17 00:00:00 2001 From: riperiperi Date: Sun, 17 May 2020 12:41:45 +0100 Subject: [PATCH] Improve performance when converting texture formats. Still more work to do. --- Ryujinx.Graphics.Gpu/Image/Texture.cs | 5 +- Ryujinx.Graphics.Texture/BlockLinearLayout.cs | 49 ++++ Ryujinx.Graphics.Texture/LayoutConverter.cs | 226 ++++++++++++++---- 3 files changed, 231 insertions(+), 49 deletions(-) diff --git a/Ryujinx.Graphics.Gpu/Image/Texture.cs b/Ryujinx.Graphics.Gpu/Image/Texture.cs index 0f952ffd4..b484a266b 100644 --- a/Ryujinx.Graphics.Gpu/Image/Texture.cs +++ b/Ryujinx.Graphics.Gpu/Image/Texture.cs @@ -84,6 +84,8 @@ namespace Ryujinx.Graphics.Gpu.Image private int _sequenceNumber; + private bool _noSync; + /// /// Constructs a new instance of the cached GPU texture. /// @@ -301,7 +303,7 @@ namespace Ryujinx.Graphics.Gpu.Image { // Texture buffers are not handled here, instead they are invalidated (if modified) // when the texture is bound. This is handled by the buffer manager. - if ((_sequenceNumber == _context.SequenceNumber && _hasData) || Info.Target == Target.TextureBuffer) + if ((_sequenceNumber == _context.SequenceNumber && _hasData) || _noSync) { return; } @@ -999,6 +1001,7 @@ namespace Ryujinx.Graphics.Gpu.Image _depth = info.GetDepth(); _layers = info.GetLayers(); + _noSync = Info.Target == Target.TextureBuffer; } /// diff --git a/Ryujinx.Graphics.Texture/BlockLinearLayout.cs b/Ryujinx.Graphics.Texture/BlockLinearLayout.cs index b95db7029..7a5ac75f3 100644 --- a/Ryujinx.Graphics.Texture/BlockLinearLayout.cs +++ b/Ryujinx.Graphics.Texture/BlockLinearLayout.cs @@ -33,6 +33,10 @@ namespace Ryujinx.Graphics.Texture private int _robSize; private int _sliceSize; + // Variables for built in iteration. + private int _yPart; + private int _zPart; + public BlockLinearLayout( int width, int height, @@ -97,5 +101,50 @@ namespace Ryujinx.Graphics.Texture return offset; } + + // Functions for built in iteration. + // Components of the offset can be updated separately, and combined to save some time. + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void SetY(int y) + { + int yh = y / GobHeight; + int offset = (yh >> _bhShift) * _robSize; + + offset += (yh & _bhMask) * GobSize; + + offset += ((y & 0x07) >> 1) << 6; + offset += ((y & 0x01) >> 0) << 4; + + _yPart = offset; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void SetZ(int z) + { + int offset = (z >> _bdShift) * _sliceSize; + + offset += ((z & _bdMask) * GobSize) << _bhShift; + + _zPart = offset; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public int GetOffsetWithLineOffset(int x) + { + int offset = (x / GobStride) << _xShift; + + offset += ((x & 0x3f) >> 5) << 8; + offset += ((x & 0x1f) >> 4) << 5; + offset += (x & 0x0f); + + return offset + _yPart + _zPart; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public int GetOffset(int x) + { + return GetOffsetWithLineOffset(x << _bppShift); + } } } \ No newline at end of file diff --git a/Ryujinx.Graphics.Texture/LayoutConverter.cs b/Ryujinx.Graphics.Texture/LayoutConverter.cs index ce2b37b54..f4c5cd441 100644 --- a/Ryujinx.Graphics.Texture/LayoutConverter.cs +++ b/Ryujinx.Graphics.Texture/LayoutConverter.cs @@ -1,12 +1,20 @@ using Ryujinx.Common; using System; - +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics; using static Ryujinx.Graphics.Texture.BlockLinearConstants; namespace Ryujinx.Graphics.Texture { public static class LayoutConverter { + [StructLayout(LayoutKind.Sequential, Pack = 1, Size = 12)] + private struct Bpp12Pixel + { + private ulong _elem1; + private uint _elem2; + } + private const int HostStrideAlignment = 4; public static Span ConvertBlockLinearToLinear( @@ -41,14 +49,14 @@ namespace Ryujinx.Graphics.Texture int mipGobBlocksInY = gobBlocksInY; int mipGobBlocksInZ = gobBlocksInZ; - int gobWidth = (GobStride / bytesPerPixel) * gobBlocksInTileX; + int gobWidth = (GobStride / bytesPerPixel) * gobBlocksInTileX; int gobHeight = gobBlocksInY * GobHeight; for (int level = 0; level < levels; level++) { - int w = Math.Max(1, width >> level); + int w = Math.Max(1, width >> level); int h = Math.Max(1, height >> level); - int d = Math.Max(1, depth >> level); + int d = Math.Max(1, depth >> level); w = BitUtils.DivRoundUp(w, blockWidth); h = BitUtils.DivRoundUp(h, blockHeight); @@ -86,36 +94,66 @@ namespace Ryujinx.Graphics.Texture mipGobBlocksInZ, bytesPerPixel); - for (int layer = 0; layer < layers; layer++) + unsafe void Convert(Span output, ReadOnlySpan data) where T : unmanaged { - int inBaseOffset = layer * sizeInfo.LayerSize + sizeInfo.GetMipOffset(level); - - for (int z = 0; z < d; z++) - for (int y = 0; y < h; y++) + fixed (byte* outputBPtr = output, dataBPtr = data) { - for (int x = 0; x < strideTrunc; x += 16) + for (int layer = 0; layer < layers; layer++) { - int offset = inBaseOffset + layoutConverter.GetOffsetWithLineOffset(x, y, z); + int inBaseOffset = layer * sizeInfo.LayerSize + sizeInfo.GetMipOffset(level); - Span dest = output.Slice(outOffs + x, 16); + for (int z = 0; z < d; z++) + { + layoutConverter.SetZ(z); + for (int y = 0; y < h; y++) + { + layoutConverter.SetY(y); + for (int x = 0; x < strideTrunc; x += 16) + { + int offset = inBaseOffset + layoutConverter.GetOffsetWithLineOffset(x); - data.Slice(offset, 16).CopyTo(dest); + *(Vector128*)(outputBPtr + outOffs + x) = *(Vector128*)(dataBPtr + offset); + } + + for (int x = xStart; x < w; x++) + { + int offset = inBaseOffset + layoutConverter.GetOffset(x); + + ((T*)(outputBPtr + outOffs))[x] = *(T*)(dataBPtr + offset); + } + + outOffs += stride; + } + } } - - for (int x = xStart; x < w; x++) - { - int offset = inBaseOffset + layoutConverter.GetOffset(x, y, z); - - Span dest = output.Slice(outOffs + x * bytesPerPixel, bytesPerPixel); - - data.Slice(offset, bytesPerPixel).CopyTo(dest); - } - - outOffs += stride; } } - } + switch (bytesPerPixel) + { + case 1: + Convert(output, data); + break; + case 2: + Convert(output, data); + break; + case 4: + Convert(output, data); + break; + case 8: + Convert(output, data); + break; + case 12: + Convert(output, data); + break; + case 16: + Convert>(output, data); + break; + + default: + throw new NotSupportedException($"Unable to convert ${bytesPerPixel} bpp pixel format."); + } + } return output; } @@ -137,18 +175,47 @@ namespace Ryujinx.Graphics.Texture int outOffs = 0; - for (int y = 0; y < h; y++) + unsafe void Convert(Span output, ReadOnlySpan data) where T : unmanaged { - for (int x = 0; x < w; x++) + fixed (byte* outputBPtr = output, dataBPtr = data) { - int offset = y * stride + x * bytesPerPixel; + for (int y = 0; y < h; y++) + { + for (int x = 0; x < w; x++) + { + int offset = y * stride + x * bytesPerPixel; - Span dest = output.Slice(outOffs + x * bytesPerPixel, bytesPerPixel); + ((T*)(outputBPtr + outOffs))[x] = *(T*)(dataBPtr + offset); + } - data.Slice(offset, bytesPerPixel).CopyTo(dest); + outOffs += outStride; + } } + } - outOffs += outStride; + switch (bytesPerPixel) + { + case 1: + Convert(output, data); + break; + case 2: + Convert(output, data); + break; + case 4: + Convert(output, data); + break; + case 8: + Convert(output, data); + break; + case 12: + Convert(output, data); + break; + case 16: + Convert>(output, data); + break; + + default: + throw new NotSupportedException($"Unable to convert ${bytesPerPixel} bpp pixel format."); } return output; @@ -217,25 +284,59 @@ namespace Ryujinx.Graphics.Texture mipGobBlocksInZ, bytesPerPixel); - for (int layer = 0; layer < layers; layer++) + unsafe void Convert(Span output, ReadOnlySpan data) where T : unmanaged { - int outBaseOffset = layer * sizeInfo.LayerSize + sizeInfo.GetMipOffset(level); - - for (int z = 0; z < d; z++) - for (int y = 0; y < h; y++) + fixed (byte* outputBPtr = output, dataBPtr = data) { - for (int x = 0; x < w; x++) + T* outputPtr = (T*)outputBPtr, dataPtr = (T*)dataBPtr; + for (int layer = 0; layer < layers; layer++) { - int offset = outBaseOffset + layoutConverter.GetOffset(x, y, z); + int outBaseOffset = layer * sizeInfo.LayerSize + sizeInfo.GetMipOffset(level); - Span dest = output.Slice(offset, bytesPerPixel); + for (int z = 0; z < d; z++) + { + layoutConverter.SetZ(z); + for (int y = 0; y < h; y++) + { + layoutConverter.SetY(y); + for (int x = 0; x < w; x++) + { + int offset = outBaseOffset + layoutConverter.GetOffset(x); - data.Slice(inOffs + x * bytesPerPixel, bytesPerPixel).CopyTo(dest); + *(T*)(outputBPtr + offset) = ((T*)(dataBPtr + inOffs))[x]; + } + + inOffs += stride; + } + } } - - inOffs += stride; } } + + switch (bytesPerPixel) + { + case 1: + Convert(output, data); + break; + case 2: + Convert(output, data); + break; + case 4: + Convert(output, data); + break; + case 8: + Convert(output, data); + break; + case 12: + Convert(output, data); + break; + case 16: + Convert>(output, data); + break; + + default: + throw new NotSupportedException($"Unable to convert ${bytesPerPixel} bpp pixel format."); + } } return output; @@ -259,18 +360,47 @@ namespace Ryujinx.Graphics.Texture int inOffs = 0; - for (int y = 0; y < h; y++) + unsafe void Convert(Span output, ReadOnlySpan data) where T : unmanaged { - for (int x = 0; x < w; x++) + fixed (byte* outputBPtr = output, dataBPtr = data) { - int offset = y * stride + x * bytesPerPixel; + for (int y = 0; y < h; y++) + { + for (int x = 0; x < w; x++) + { + int offset = y * stride + x * bytesPerPixel; - Span dest = output.Slice(offset, bytesPerPixel); + *(T*)(outputBPtr + offset) = ((T*)(dataBPtr + inOffs))[x]; + } - data.Slice(inOffs + x * bytesPerPixel, bytesPerPixel).CopyTo(dest); + inOffs += inStride; + } } + } - inOffs += inStride; + switch (bytesPerPixel) + { + case 1: + Convert(output, data); + break; + case 2: + Convert(output, data); + break; + case 4: + Convert(output, data); + break; + case 8: + Convert(output, data); + break; + case 12: + Convert(output, data); + break; + case 16: + Convert>(output, data); + break; + + default: + throw new NotSupportedException($"Unable to convert ${bytesPerPixel} bpp pixel format."); } return output;