mirror of
https://github.com/Ryujinx/Ryujinx.git
synced 2024-12-29 16:56:05 +00:00
1402d8391d
* Support NVDEC H264 interlaced video decoding and VIC deinterlacing * Remove unused code
124 lines
5 KiB
C#
124 lines
5 KiB
C#
using System;
|
|
using System.Runtime.Intrinsics;
|
|
using System.Runtime.Intrinsics.X86;
|
|
|
|
namespace Ryujinx.Graphics.Vic
|
|
{
|
|
static class Scaler
|
|
{
|
|
public static void DeinterlaceWeave(Span<byte> data, ReadOnlySpan<byte> prevData, int width, int fieldSize, bool isTopField)
|
|
{
|
|
// Prev I Curr I Curr P
|
|
// TTTTTTTT BBBBBBBB TTTTTTTT
|
|
// -------- -------- BBBBBBBB
|
|
|
|
if (isTopField)
|
|
{
|
|
for (int offset = 0; offset < data.Length; offset += fieldSize * 2)
|
|
{
|
|
prevData.Slice(offset >> 1, width).CopyTo(data.Slice(offset + fieldSize, width));
|
|
}
|
|
}
|
|
else
|
|
{
|
|
for (int offset = 0; offset < data.Length; offset += fieldSize * 2)
|
|
{
|
|
prevData.Slice(offset >> 1, width).CopyTo(data.Slice(offset, width));
|
|
}
|
|
}
|
|
}
|
|
|
|
public static void DeinterlaceBob(Span<byte> data, int width, int fieldSize, bool isTopField)
|
|
{
|
|
// Curr I Curr P
|
|
// TTTTTTTT TTTTTTTT
|
|
// -------- TTTTTTTT
|
|
|
|
if (isTopField)
|
|
{
|
|
for (int offset = 0; offset < data.Length; offset += fieldSize * 2)
|
|
{
|
|
data.Slice(offset, width).CopyTo(data.Slice(offset + fieldSize, width));
|
|
}
|
|
}
|
|
else
|
|
{
|
|
for (int offset = 0; offset < data.Length; offset += fieldSize * 2)
|
|
{
|
|
data.Slice(offset + fieldSize, width).CopyTo(data.Slice(offset, width));
|
|
}
|
|
}
|
|
}
|
|
|
|
public unsafe static void DeinterlaceMotionAdaptive(
|
|
Span<byte> data,
|
|
ReadOnlySpan<byte> prevData,
|
|
ReadOnlySpan<byte> nextData,
|
|
int width,
|
|
int fieldSize,
|
|
bool isTopField)
|
|
{
|
|
// Very simple motion adaptive algorithm.
|
|
// If the pixel changed between previous and next frame, use Bob, otherwise use Weave.
|
|
//
|
|
// Example pseudo code:
|
|
// C_even = (P_even == N_even) ? P_even : C_odd
|
|
// Where: C is current frame, P is previous frame and N is next frame, and even/odd are the fields.
|
|
//
|
|
// Note: This does not fully match the hardware algorithm.
|
|
// The motion adaptive deinterlacing implemented on hardware is considerably more complex,
|
|
// and hard to implement accurately without proper documentation as for example, the
|
|
// method used for motion estimation is unknown.
|
|
|
|
int start = isTopField ? fieldSize : 0;
|
|
int otherFieldOffset = isTopField ? -fieldSize : fieldSize;
|
|
|
|
fixed (byte* pData = data, pPrevData = prevData, pNextData = nextData)
|
|
{
|
|
for (int offset = start; offset < data.Length; offset += fieldSize * 2)
|
|
{
|
|
int refOffset = (offset - start) >> 1;
|
|
int x = 0;
|
|
|
|
if (Avx2.IsSupported)
|
|
{
|
|
for (; x < (width & ~0x1f); x += 32)
|
|
{
|
|
Vector256<byte> prevPixels = Avx.LoadVector256(pPrevData + refOffset + x);
|
|
Vector256<byte> nextPixels = Avx.LoadVector256(pNextData + refOffset + x);
|
|
Vector256<byte> bob = Avx.LoadVector256(pData + offset + otherFieldOffset + x);
|
|
Vector256<byte> diff = Avx2.CompareEqual(prevPixels, nextPixels);
|
|
Avx.Store(pData + offset + x, Avx2.BlendVariable(bob, prevPixels, diff));
|
|
}
|
|
}
|
|
else if (Sse41.IsSupported)
|
|
{
|
|
for (; x < (width & ~0xf); x += 16)
|
|
{
|
|
Vector128<byte> prevPixels = Sse2.LoadVector128(pPrevData + refOffset + x);
|
|
Vector128<byte> nextPixels = Sse2.LoadVector128(pNextData + refOffset + x);
|
|
Vector128<byte> bob = Sse2.LoadVector128(pData + offset + otherFieldOffset + x);
|
|
Vector128<byte> diff = Sse2.CompareEqual(prevPixels, nextPixels);
|
|
Sse2.Store(pData + offset + x, Sse41.BlendVariable(bob, prevPixels, diff));
|
|
}
|
|
}
|
|
|
|
for (; x < width; x++)
|
|
{
|
|
byte prevPixel = prevData[refOffset + x];
|
|
byte nextPixel = nextData[refOffset + x];
|
|
|
|
if (nextPixel != prevPixel)
|
|
{
|
|
data[offset + x] = data[offset + otherFieldOffset + x];
|
|
}
|
|
else
|
|
{
|
|
data[offset + x] = prevPixel;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
} |