mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-01-27 00:41:42 +00:00
video_core: Add basic command list processing (#117)
This commit is contained in:
parent
2696733cad
commit
b94efcba5a
|
@ -189,6 +189,7 @@ set(COMMON src/common/logging/backend.cpp
|
|||
src/common/alignment.h
|
||||
src/common/assert.cpp
|
||||
src/common/assert.h
|
||||
src/common/bit_field.h
|
||||
src/common/bounded_threadsafe_queue.h
|
||||
src/common/concepts.h
|
||||
src/common/config.cpp
|
||||
|
@ -262,7 +263,13 @@ set(CORE src/core/aerolib/stubs.cpp
|
|||
src/core/virtual_memory.h
|
||||
)
|
||||
|
||||
set(VIDEO_CORE src/video_core/pixel_format.h
|
||||
set(VIDEO_CORE src/video_core/amdgpu/liverpool.cpp
|
||||
src/video_core/amdgpu/liverpool.h
|
||||
src/video_core/amdgpu/pixel_format.cpp
|
||||
src/video_core/amdgpu/pixel_format.h
|
||||
src/video_core/amdgpu/pm4_cmds.h
|
||||
src/video_core/amdgpu/pm4_opcodes.h
|
||||
src/video_core/amdgpu/resource.h
|
||||
src/video_core/renderer_vulkan/renderer_vulkan.cpp
|
||||
src/video_core/renderer_vulkan/renderer_vulkan.h
|
||||
src/video_core/renderer_vulkan/vk_common.cpp
|
||||
|
|
11
LICENSES/BSD-3-Clause.txt
Normal file
11
LICENSES/BSD-3-Clause.txt
Normal file
|
@ -0,0 +1,11 @@
|
|||
Copyright (c) <year> <owner>.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
|
||||
|
||||
2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
|
||||
|
||||
3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
166
src/common/bit_field.h
Normal file
166
src/common/bit_field.h
Normal file
|
@ -0,0 +1,166 @@
|
|||
// SPDX-FileCopyrightText: 2014 Tony Wasserka
|
||||
// SPDX-FileCopyrightText: 2014 Dolphin Emulator Project
|
||||
// SPDX-License-Identifier: BSD-3-Clause AND GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstddef>
|
||||
#include <limits>
|
||||
#include <type_traits>
|
||||
|
||||
/*
|
||||
* Abstract bitfield class
|
||||
*
|
||||
* Allows endianness-independent access to individual bitfields within some raw
|
||||
* integer value. The assembly generated by this class is identical to the
|
||||
* usage of raw bitfields, so it's a perfectly fine replacement.
|
||||
*
|
||||
* For BitField<X,Y,Z>, X is the distance of the bitfield to the LSB of the
|
||||
* raw value, Y is the length in bits of the bitfield. Z is an integer type
|
||||
* which determines the sign of the bitfield. Z must have the same size as the
|
||||
* raw integer.
|
||||
*
|
||||
*
|
||||
* General usage:
|
||||
*
|
||||
* Create a new union with the raw integer value as a member.
|
||||
* Then for each bitfield you want to expose, add a BitField member
|
||||
* in the union. The template parameters are the bit offset and the number
|
||||
* of desired bits.
|
||||
*
|
||||
* Changes in the bitfield members will then get reflected in the raw integer
|
||||
* value and vice-versa.
|
||||
*
|
||||
*
|
||||
* Sample usage:
|
||||
*
|
||||
* union SomeRegister
|
||||
* {
|
||||
* u32 hex;
|
||||
*
|
||||
* BitField<0,7,u32> first_seven_bits; // unsigned
|
||||
* BitField<7,8,u32> next_eight_bits; // unsigned
|
||||
* BitField<3,15,s32> some_signed_fields; // signed
|
||||
* };
|
||||
*
|
||||
* This is equivalent to the little-endian specific code:
|
||||
*
|
||||
* union SomeRegister
|
||||
* {
|
||||
* u32 hex;
|
||||
*
|
||||
* struct
|
||||
* {
|
||||
* u32 first_seven_bits : 7;
|
||||
* u32 next_eight_bits : 8;
|
||||
* };
|
||||
* struct
|
||||
* {
|
||||
* u32 : 3; // padding
|
||||
* s32 some_signed_fields : 15;
|
||||
* };
|
||||
* };
|
||||
*
|
||||
*
|
||||
* Caveats:
|
||||
*
|
||||
* 1)
|
||||
* BitField provides automatic casting from and to the storage type where
|
||||
* appropriate. However, when using non-typesafe functions like printf, an
|
||||
* explicit cast must be performed on the BitField object to make sure it gets
|
||||
* passed correctly, e.g.:
|
||||
* printf("Value: %d", (s32)some_register.some_signed_fields);
|
||||
*
|
||||
* 2)
|
||||
* Not really a caveat, but potentially irritating: This class is used in some
|
||||
* packed structures that do not guarantee proper alignment. Therefore we have
|
||||
* to use #pragma pack here not to pack the members of the class, but instead
|
||||
* to break GCC's assumption that the members of the class are aligned on
|
||||
* sizeof(StorageType).
|
||||
*/
|
||||
#pragma pack(1)
|
||||
template <std::size_t Position, std::size_t Bits, typename T>
|
||||
struct BitField {
|
||||
private:
|
||||
// UnderlyingType is T for non-enum types and the underlying type of T if
|
||||
// T is an enumeration. Note that T is wrapped within an enable_if in the
|
||||
// former case to workaround compile errors which arise when using
|
||||
// std::underlying_type<T>::type directly.
|
||||
using UnderlyingType = typename std::conditional_t<std::is_enum_v<T>, std::underlying_type<T>,
|
||||
std::enable_if<true, T>>::type;
|
||||
|
||||
// We store the value as the unsigned type to avoid undefined behaviour on value shifting
|
||||
using StorageType = std::make_unsigned_t<UnderlyingType>;
|
||||
|
||||
public:
|
||||
/// Constants to allow limited introspection of fields if needed
|
||||
static constexpr std::size_t position = Position;
|
||||
static constexpr std::size_t bits = Bits;
|
||||
static constexpr StorageType mask = (((StorageType)~0) >> (8 * sizeof(T) - bits)) << position;
|
||||
|
||||
/**
|
||||
* Formats a value by masking and shifting it according to the field parameters. A value
|
||||
* containing several bitfields can be assembled by formatting each of their values and ORing
|
||||
* the results together.
|
||||
*/
|
||||
[[nodiscard]] static constexpr StorageType FormatValue(const T& value) {
|
||||
return (static_cast<StorageType>(value) << position) & mask;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extracts a value from the passed storage. In most situations prefer use the member functions
|
||||
* (such as Value() or operator T), but this can be used to extract a value from a bitfield
|
||||
* union in a constexpr context.
|
||||
*/
|
||||
[[nodiscard]] static constexpr T ExtractValue(const StorageType& storage) {
|
||||
if constexpr (std::numeric_limits<UnderlyingType>::is_signed) {
|
||||
std::size_t shift = 8 * sizeof(T) - bits;
|
||||
return static_cast<T>(static_cast<UnderlyingType>(storage << (shift - position)) >>
|
||||
shift);
|
||||
} else {
|
||||
return static_cast<T>((storage & mask) >> position);
|
||||
}
|
||||
}
|
||||
|
||||
// This constructor and assignment operator might be considered ambiguous:
|
||||
// Would they initialize the storage or just the bitfield?
|
||||
// Hence, delete them. Use the Assign method to set bitfield values!
|
||||
BitField(T val) = delete;
|
||||
BitField& operator=(T val) = delete;
|
||||
|
||||
constexpr BitField() noexcept = default;
|
||||
|
||||
constexpr BitField(const BitField&) noexcept = default;
|
||||
constexpr BitField& operator=(const BitField&) noexcept = default;
|
||||
|
||||
constexpr BitField(BitField&&) noexcept = default;
|
||||
constexpr BitField& operator=(BitField&&) noexcept = default;
|
||||
|
||||
[[nodiscard]] constexpr operator T() const {
|
||||
return Value();
|
||||
}
|
||||
|
||||
constexpr void Assign(const T& value) {
|
||||
storage = (static_cast<StorageType>(storage) & ~mask) | FormatValue(value);
|
||||
}
|
||||
|
||||
[[nodiscard]] constexpr T Value() const {
|
||||
return ExtractValue(storage);
|
||||
}
|
||||
|
||||
[[nodiscard]] constexpr explicit operator bool() const {
|
||||
return Value() != 0;
|
||||
}
|
||||
|
||||
private:
|
||||
StorageType storage;
|
||||
|
||||
static_assert(bits + position <= 8 * sizeof(T), "Bitfield out of range");
|
||||
|
||||
// And, you know, just in case people specify something stupid like bits=position=0x80000000
|
||||
static_assert(position < 8 * sizeof(T), "Invalid position");
|
||||
static_assert(bits <= 8 * sizeof(T), "Invalid number of bits");
|
||||
static_assert(bits > 0, "Invalid number of bits");
|
||||
static_assert(std::is_trivially_copyable_v<T>, "T must be trivially copyable in a BitField");
|
||||
};
|
||||
#pragma pack()
|
|
@ -13,8 +13,8 @@
|
|||
namespace Common {
|
||||
|
||||
NativeClock::NativeClock()
|
||||
: rdtsc_frequency{EstimateRDTSCFrequency()}, ns_rdtsc_factor{GetFixedPoint64Factor(
|
||||
std::nano::den, rdtsc_frequency)},
|
||||
: rdtsc_frequency{EstimateRDTSCFrequency()},
|
||||
ns_rdtsc_factor{GetFixedPoint64Factor(std::nano::den, rdtsc_frequency)},
|
||||
us_rdtsc_factor{GetFixedPoint64Factor(std::micro::den, rdtsc_frequency)},
|
||||
ms_rdtsc_factor{GetFixedPoint64Factor(std::milli::den, rdtsc_frequency)} {}
|
||||
|
||||
|
|
|
@ -1,13 +1,21 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "core/libraries/error_codes.h"
|
||||
#include "core/libraries/gnmdriver/gnmdriver.h"
|
||||
#include "core/libraries/libs.h"
|
||||
#include "video_core/amdgpu/liverpool.h"
|
||||
#include "video_core/amdgpu/pm4_cmds.h"
|
||||
#include "video_core/renderer_vulkan/renderer_vulkan.h"
|
||||
|
||||
namespace Libraries::GnmDriver {
|
||||
|
||||
using namespace AmdGpu;
|
||||
|
||||
static std::unique_ptr<AmdGpu::Liverpool> liverpool;
|
||||
|
||||
int PS4_SYSV_ABI sceGnmAddEqEvent() {
|
||||
LOG_ERROR(Lib_GnmDriver, "(STUBBED) called");
|
||||
return ORBIS_OK;
|
||||
|
@ -133,19 +141,45 @@ int PS4_SYSV_ABI sceGnmDispatchInitDefaultHardwareState() {
|
|||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
s32 PS4_SYSV_ABI sceGnmDrawIndex(u32* cmd, u64 size, u32 index_count, const void* index_addr,
|
||||
s32 PS4_SYSV_ABI sceGnmDrawIndex(u32* cmdbuf, u64 size, u32 index_count, uintptr_t index_addr,
|
||||
u32 flags, u32 type) {
|
||||
LOG_INFO(Lib_GnmDriver,
|
||||
"(STUBBED) called cmd_buffer = 0x{:x} size = {} index_count = {} index_addr = 0x{:x} "
|
||||
"flags = 0x{:x} type = {}",
|
||||
reinterpret_cast<uint64_t>(cmd), size, index_count,
|
||||
reinterpret_cast<uint64_t>(index_addr), flags, type);
|
||||
return 0;
|
||||
reinterpret_cast<uint64_t>(cmdbuf), size, index_count, index_addr, flags, type);
|
||||
|
||||
if (cmdbuf && (size == 10) && (index_addr != 0) && (index_addr & 1) == 0 &&
|
||||
(flags & 0x1ffffffe) == 0) {
|
||||
const auto predicate = flags & 1 ? PM4Predicate::PredEnable : PM4Predicate::PredDisable;
|
||||
auto* draw_index = reinterpret_cast<PM4CmdDrawIndex2*>(cmdbuf);
|
||||
|
||||
draw_index->header =
|
||||
PM4Type3Header{PM4ItOpcode::DrawIndex2, 4, PM4ShaderType::ShaderGraphics, predicate};
|
||||
draw_index->maxSize = index_count;
|
||||
draw_index->indexBaseLo = u32(index_addr);
|
||||
draw_index->indexBaseHi = u32(index_addr >> 32);
|
||||
draw_index->indexCount = index_count;
|
||||
draw_index->drawInitiator = 0;
|
||||
|
||||
cmdbuf[6] = 0xc0021000;
|
||||
cmdbuf[7] = 0;
|
||||
return ORBIS_OK;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceGnmDrawIndexAuto() {
|
||||
LOG_ERROR(Lib_GnmDriver, "(STUBBED) called");
|
||||
return ORBIS_OK;
|
||||
int PS4_SYSV_ABI sceGnmDrawIndexAuto(u32* cmdbuf, u32 size, u32 index_count, u32 flags) {
|
||||
LOG_INFO(Lib_GnmDriver, "called");
|
||||
|
||||
if (cmdbuf && (size == 7) && (flags & 0x1ffffffe) == 0) {
|
||||
*cmdbuf = flags & 1 | 0xc0012d00;
|
||||
cmdbuf[1] = index_count;
|
||||
cmdbuf[2] = 2;
|
||||
cmdbuf[3] = 0xc0021000;
|
||||
cmdbuf[4] = 0;
|
||||
return ORBIS_OK;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceGnmDrawIndexIndirect() {
|
||||
|
@ -653,8 +687,39 @@ int PS4_SYSV_ABI sceGnmSetPsShader() {
|
|||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceGnmSetPsShader350() {
|
||||
LOG_ERROR(Lib_GnmDriver, "(STUBBED) called");
|
||||
int PS4_SYSV_ABI sceGnmSetPsShader350(u32* cmdBuffer, u32 numDwords, const u32* psRegs) {
|
||||
if (!cmdBuffer || numDwords <= 0x27) {
|
||||
return -1;
|
||||
}
|
||||
if (!psRegs) {
|
||||
cmdBuffer = PM4CmdSetData::SetShReg(cmdBuffer, 8u, 0u,
|
||||
0u); // SPI_SHADER_PGM_LO_PS/SPI_SHADER_PGM_HI_PS
|
||||
cmdBuffer = PM4CmdSetData::SetContextReg(cmdBuffer, 0x203u, 0u); // DB_SHADER_CONTROL
|
||||
cmdBuffer = PM4CmdSetData::SetContextReg(cmdBuffer, 0x8fu, 0xfu); // CB_SHADER_MASK
|
||||
*(u64*)cmdBuffer = 0xc01c1000;
|
||||
} else {
|
||||
if (psRegs[1] != 0) {
|
||||
LOG_ERROR(Lib_GnmDriver, "Invalid shader address.");
|
||||
return -1;
|
||||
}
|
||||
|
||||
u32* start = cmdBuffer;
|
||||
cmdBuffer = PM4CmdSetData::SetShReg(cmdBuffer, 8u, psRegs[0],
|
||||
0u); // SPI_SHADER_PGM_LO_PS/SPI_SHADER_PGM_HI_PS
|
||||
cmdBuffer = PM4CmdSetData::SetShReg(
|
||||
cmdBuffer, 10u, psRegs[2],
|
||||
psRegs[3]); // SPI_SHADER_USER_DATA_PS_4/SPI_SHADER_USER_DATA_PS_5
|
||||
cmdBuffer = PM4CmdSetData::SetContextReg(
|
||||
cmdBuffer, 0x1c4u, psRegs[4], psRegs[5]); // SPI_SHADER_Z_FORMAT/SPI_SHADER_COL_FORMAT
|
||||
cmdBuffer = PM4CmdSetData::SetContextReg(cmdBuffer, 0x1b3u, psRegs[6],
|
||||
psRegs[7]); // SPI_PS_INPUT_ENA
|
||||
cmdBuffer = PM4CmdSetData::SetContextReg(cmdBuffer, 0x1b6u, psRegs[8]); // SPI_PS_IN_CONTROL
|
||||
cmdBuffer = PM4CmdSetData::SetContextReg(cmdBuffer, 0x1b8u, psRegs[9]); // SPI_BARYC_CNTL
|
||||
cmdBuffer =
|
||||
PM4CmdSetData::SetContextReg(cmdBuffer, 0x203u, psRegs[10]); // DB_SHADER_CONTROL
|
||||
cmdBuffer = PM4CmdSetData::SetContextReg(cmdBuffer, 0x8fu, psRegs[11]); // CB_SHADER_MASK
|
||||
*(u64*)cmdBuffer = 0xc00a1000;
|
||||
}
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
|
@ -688,14 +753,42 @@ int PS4_SYSV_ABI sceGnmSetVgtControl() {
|
|||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceGnmSetVsShader(u32* cmd, u64 size, const u32* vs_regs, u32 shader_modifier) {
|
||||
int PS4_SYSV_ABI sceGnmSetVsShader(u32* cmdbuf, u64 size, const u32* vs_regs, u32 shader_modifier) {
|
||||
LOG_INFO(Lib_GnmDriver,
|
||||
"(STUBBED) called cmd_buffer = 0x{:x} size = {} shader_modifier = {} vs_reg0 = "
|
||||
"(STUBBED) called cmd_buffer = 0x{:x} size = {} shader_modifier = {} vs_reg0 = "
|
||||
"0x{:x} vs_reg1 = 0x{:x} vs_reg2 = 0x{:x} vs_reg3 = 0x{:x} vs_reg4 = 0x{:x} vs_reg5 = "
|
||||
"0x{:x} vs_reg6 = 0x{:x}",
|
||||
reinterpret_cast<uint64_t>(cmd), size, shader_modifier, vs_regs[0], vs_regs[1],
|
||||
reinterpret_cast<uint64_t>(cmdbuf), size, shader_modifier, vs_regs[0], vs_regs[1],
|
||||
vs_regs[2], vs_regs[3], vs_regs[4], vs_regs[5], vs_regs[6]);
|
||||
return 0;
|
||||
|
||||
if (!cmdbuf || size <= 0x1c) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (!vs_regs) {
|
||||
LOG_ERROR(Lib_GnmDriver, "Null pointer passed as argument");
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (shader_modifier & 0xfcfffc3f) {
|
||||
LOG_ERROR(Lib_GnmDriver, "Invalid modifier mask");
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (vs_regs[1] != 0) {
|
||||
LOG_ERROR(Lib_GnmDriver, "Invalid shader address");
|
||||
return -1;
|
||||
}
|
||||
|
||||
const u32 var = shader_modifier == 0 ? vs_regs[2] : (vs_regs[2] & 0xfcfffc3f | shader_modifier);
|
||||
cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 0x48u, vs_regs[0], 0u); // SPI_SHADER_PGM_LO_VS
|
||||
cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 0x4au, var, vs_regs[3]); // SPI_SHADER_PGM_RSRC1_VS
|
||||
cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x207u, vs_regs[6]); // PA_CL_VS_OUT_CNTL
|
||||
cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x1b1u, vs_regs[4]); // SPI_VS_OUT_CONFIG
|
||||
cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x1c3u, vs_regs[5]); // SPI_SHADER_POS_FORMAT
|
||||
*(u64*)cmdbuf = 0xc00a1000;
|
||||
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceGnmSetWaveLimitMultiplier() {
|
||||
|
@ -903,8 +996,35 @@ int PS4_SYSV_ABI sceGnmSubmitAndFlipCommandBuffersForWorkload() {
|
|||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceGnmSubmitCommandBuffers() {
|
||||
LOG_ERROR(Lib_GnmDriver, "(STUBBED) called");
|
||||
int PS4_SYSV_ABI sceGnmSubmitCommandBuffers(u32 count, void* dcbGpuAddrs[], u32* dcbSizesInBytes,
|
||||
void* ccbGpuAddrs[], u32* ccbSizesInBytes) {
|
||||
LOG_INFO(Lib_GnmDriver, "called");
|
||||
ASSERT_MSG(count == 1, "Multiple command buffer submission is unsupported!");
|
||||
|
||||
if (!dcbGpuAddrs || !dcbSizesInBytes) {
|
||||
LOG_ERROR(Lib_GnmDriver, "dcbGpuAddrs and dcbSizesInBytes must not be NULL");
|
||||
return 0x80d11000;
|
||||
}
|
||||
|
||||
for (u32 i = 0; i < count; i++) {
|
||||
if (dcbSizesInBytes[i] == 0) {
|
||||
LOG_ERROR(Lib_GnmDriver, "Submitting a null DCB {}", i);
|
||||
return 0x80d11000;
|
||||
}
|
||||
if (dcbSizesInBytes[i] > 0x3ffffc) {
|
||||
LOG_ERROR(Lib_GnmDriver, "dcbSizesInBytes[{}] ({}) is limited to (2*20)-1 DWORDS", i,
|
||||
dcbSizesInBytes[i]);
|
||||
return 0x80d11000;
|
||||
}
|
||||
if (ccbSizesInBytes && ccbSizesInBytes[i] > 0x3ffffc) {
|
||||
LOG_ERROR(Lib_GnmDriver, "ccbSizesInBytes[{}] ({}) is limited to (2*20)-1 DWORDS", i,
|
||||
ccbSizesInBytes[i]);
|
||||
return 0x80d11000;
|
||||
}
|
||||
}
|
||||
|
||||
liverpool->ProcessCmdList(reinterpret_cast<u32*>(dcbGpuAddrs[0]), dcbSizesInBytes[0]);
|
||||
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
|
@ -1279,6 +1399,8 @@ int PS4_SYSV_ABI Func_F916890425496553() {
|
|||
}
|
||||
|
||||
void RegisterlibSceGnmDriver(Core::Loader::SymbolsResolver* sym) {
|
||||
liverpool = std::make_unique<AmdGpu::Liverpool>();
|
||||
|
||||
LIB_FUNCTION("b0xyllnVY-I", "libSceGnmDriver", 1, "libSceGnmDriver", 1, 1, sceGnmAddEqEvent);
|
||||
LIB_FUNCTION("b08AgtPlHPg", "libSceGnmDriver", 1, "libSceGnmDriver", 1, 1,
|
||||
sceGnmAreSubmitsAllowed);
|
||||
|
|
|
@ -36,9 +36,9 @@ int PS4_SYSV_ABI sceGnmDispatchDirect();
|
|||
int PS4_SYSV_ABI sceGnmDispatchIndirect();
|
||||
int PS4_SYSV_ABI sceGnmDispatchIndirectOnMec();
|
||||
int PS4_SYSV_ABI sceGnmDispatchInitDefaultHardwareState();
|
||||
s32 PS4_SYSV_ABI sceGnmDrawIndex(u32* cmd, u64 size, u32 index_count, const void* index_addr,
|
||||
s32 PS4_SYSV_ABI sceGnmDrawIndex(u32* cmd, u64 size, u32 index_count, uintptr_t index_addr,
|
||||
u32 flags, u32 type);
|
||||
int PS4_SYSV_ABI sceGnmDrawIndexAuto();
|
||||
int PS4_SYSV_ABI sceGnmDrawIndexAuto(u32* cmdbuf, u32 size, u32 index_count, u32 flags);
|
||||
int PS4_SYSV_ABI sceGnmDrawIndexIndirect();
|
||||
int PS4_SYSV_ABI sceGnmDrawIndexIndirectCountMulti();
|
||||
int PS4_SYSV_ABI sceGnmDrawIndexIndirectMulti();
|
||||
|
@ -140,7 +140,7 @@ int PS4_SYSV_ABI sceGnmSetGsShader();
|
|||
int PS4_SYSV_ABI sceGnmSetHsShader();
|
||||
int PS4_SYSV_ABI sceGnmSetLsShader();
|
||||
int PS4_SYSV_ABI sceGnmSetPsShader();
|
||||
int PS4_SYSV_ABI sceGnmSetPsShader350();
|
||||
int PS4_SYSV_ABI sceGnmSetPsShader350(u32* cmdBuffer, u32 numDwords, const u32* psRegs);
|
||||
int PS4_SYSV_ABI sceGnmSetResourceRegistrationUserMemory();
|
||||
int PS4_SYSV_ABI sceGnmSetResourceUserData();
|
||||
int PS4_SYSV_ABI sceGnmSetSpiEnableSqCounters();
|
||||
|
@ -189,7 +189,8 @@ int PS4_SYSV_ABI sceGnmSqttSwitchTraceBuffer2();
|
|||
int PS4_SYSV_ABI sceGnmSqttWaitForEvent();
|
||||
int PS4_SYSV_ABI sceGnmSubmitAndFlipCommandBuffers();
|
||||
int PS4_SYSV_ABI sceGnmSubmitAndFlipCommandBuffersForWorkload();
|
||||
int PS4_SYSV_ABI sceGnmSubmitCommandBuffers();
|
||||
int PS4_SYSV_ABI sceGnmSubmitCommandBuffers(u32 count, void* dcbGpuAddrs[], u32* dcbSizesInBytes,
|
||||
void* ccbGpuAddrs[], u32* ccbSizesInBytes);
|
||||
int PS4_SYSV_ABI sceGnmSubmitCommandBuffersForWorkload();
|
||||
int PS4_SYSV_ABI sceGnmSubmitDone();
|
||||
int PS4_SYSV_ABI sceGnmUnmapComputeQueue();
|
||||
|
|
95
src/video_core/amdgpu/liverpool.cpp
Normal file
95
src/video_core/amdgpu/liverpool.cpp
Normal file
|
@ -0,0 +1,95 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/io_file.h"
|
||||
#include "video_core/amdgpu/liverpool.h"
|
||||
#include "video_core/amdgpu/pm4_cmds.h"
|
||||
|
||||
namespace AmdGpu {
|
||||
|
||||
Liverpool::Liverpool() = default;
|
||||
|
||||
void Liverpool::ProcessCmdList(u32* cmdbuf, u32 size_in_bytes) {
|
||||
auto* header = reinterpret_cast<PM4Header*>(cmdbuf);
|
||||
u32 processed_cmd_size = 0;
|
||||
|
||||
while (processed_cmd_size < size_in_bytes) {
|
||||
PM4Header* next_header{};
|
||||
const u32 type = header->type;
|
||||
switch (type) {
|
||||
case 3: {
|
||||
const PM4ItOpcode opcode = header->type3.opcode;
|
||||
const u32 count = header->type3.NumWords();
|
||||
switch (opcode) {
|
||||
case PM4ItOpcode::Nop:
|
||||
break;
|
||||
case PM4ItOpcode::SetContextReg: {
|
||||
auto* set_data = reinterpret_cast<PM4CmdSetData*>(header);
|
||||
std::memcpy(®s.reg_array[ContextRegWordOffset + set_data->regOffset], header + 2,
|
||||
(count - 1) * sizeof(u32));
|
||||
break;
|
||||
}
|
||||
case PM4ItOpcode::SetShReg: {
|
||||
auto* set_data = reinterpret_cast<PM4CmdSetData*>(header);
|
||||
std::memcpy(®s.reg_array[ShRegWordOffset + set_data->regOffset], header + 2,
|
||||
(count - 1) * sizeof(u32));
|
||||
break;
|
||||
}
|
||||
case PM4ItOpcode::SetUconfigReg: {
|
||||
auto* set_data = reinterpret_cast<PM4CmdSetData*>(header);
|
||||
std::memcpy(®s.reg_array[UconfigRegWordOffset + set_data->regOffset], header + 2,
|
||||
(count - 1) * sizeof(u32));
|
||||
break;
|
||||
}
|
||||
case PM4ItOpcode::IndexType: {
|
||||
auto* index_type = reinterpret_cast<PM4CmdDrawIndexType*>(header);
|
||||
regs.index_buffer_type.raw = index_type->raw;
|
||||
break;
|
||||
}
|
||||
case PM4ItOpcode::DrawIndex2: {
|
||||
auto* draw_index = reinterpret_cast<PM4CmdDrawIndex2*>(header);
|
||||
regs.max_index_size = draw_index->maxSize;
|
||||
regs.index_base_address.base_addr_lo = draw_index->indexBaseLo;
|
||||
regs.index_base_address.base_addr_hi.Assign(draw_index->indexBaseHi);
|
||||
regs.num_indices = draw_index->indexCount;
|
||||
regs.draw_initiator = draw_index->drawInitiator;
|
||||
// rasterizer->DrawIndex();
|
||||
break;
|
||||
}
|
||||
case PM4ItOpcode::DrawIndexAuto: {
|
||||
auto* draw_index = reinterpret_cast<PM4CmdDrawIndexAuto*>(header);
|
||||
regs.num_indices = draw_index->index_count;
|
||||
regs.draw_initiator = draw_index->draw_initiator;
|
||||
// rasterizer->DrawIndex();
|
||||
break;
|
||||
}
|
||||
case PM4ItOpcode::EventWriteEop: {
|
||||
auto* event_write = reinterpret_cast<PM4CmdEventWriteEop*>(header);
|
||||
const InterruptSelect irq_sel = event_write->intSel;
|
||||
const DataSelect data_sel = event_write->dataSel;
|
||||
ASSERT(irq_sel == InterruptSelect::None && data_sel == DataSelect::Data64);
|
||||
*event_write->Address() = event_write->DataQWord();
|
||||
break;
|
||||
}
|
||||
case PM4ItOpcode::DmaData: {
|
||||
auto* dma_data = reinterpret_cast<PM4DmaData*>(header);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
UNREACHABLE_MSG("Unknown PM4 type 3 opcode {:#x} with count {}",
|
||||
static_cast<u32>(opcode), count);
|
||||
}
|
||||
next_header = header + header->type3.NumWords() + 1;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
UNREACHABLE_MSG("Invalid PM4 type {}", type);
|
||||
}
|
||||
|
||||
processed_cmd_size += uintptr_t(next_header) - uintptr_t(header);
|
||||
header = next_header;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace AmdGpu
|
647
src/video_core/amdgpu/liverpool.h
Normal file
647
src/video_core/amdgpu/liverpool.h
Normal file
|
@ -0,0 +1,647 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include "common/bit_field.h"
|
||||
#include "common/types.h"
|
||||
|
||||
namespace AmdGpu {
|
||||
|
||||
#define GFX6_3D_REG_INDEX(field_name) (offsetof(AmdGpu::Liverpool::Regs, field_name) / sizeof(u32))
|
||||
|
||||
#define CONCAT2(x, y) DO_CONCAT2(x, y)
|
||||
#define DO_CONCAT2(x, y) x##y
|
||||
#define INSERT_PADDING_WORDS(num_words) \
|
||||
[[maybe_unused]] std::array<u32, num_words> CONCAT2(pad, __LINE__)
|
||||
|
||||
struct Liverpool {
|
||||
static constexpr u32 NumColorBuffers = 8;
|
||||
static constexpr u32 NumViewports = 16;
|
||||
static constexpr u32 NumClipPlanes = 6;
|
||||
static constexpr u32 NumWordsShaderUserData = 16;
|
||||
static constexpr u32 UconfigRegWordOffset = 0xC000;
|
||||
static constexpr u32 ContextRegWordOffset = 0xA000;
|
||||
static constexpr u32 ShRegWordOffset = 0x2C00;
|
||||
static constexpr u32 NumRegs = 0xD000;
|
||||
|
||||
using UserData = std::array<u32, NumWordsShaderUserData>;
|
||||
|
||||
struct ShaderProgram {
|
||||
u32 address_lo;
|
||||
u32 address_hi;
|
||||
union {
|
||||
BitField<0, 6, u64> num_vgprs;
|
||||
BitField<6, 4, u64> num_sgprs;
|
||||
BitField<33, 5, u64> num_user_regs;
|
||||
} settings;
|
||||
UserData user_data;
|
||||
|
||||
const u8* Address() const {
|
||||
const uintptr_t addr = uintptr_t(address_hi) << 40 | uintptr_t(address_lo) << 8;
|
||||
return reinterpret_cast<const u8*>(addr);
|
||||
}
|
||||
};
|
||||
|
||||
enum class ShaderExportComp : u32 {
|
||||
None = 0,
|
||||
OneComp = 1,
|
||||
TwoComp = 2,
|
||||
FourCompCompressed = 3,
|
||||
FourComp = 4,
|
||||
};
|
||||
|
||||
union ShaderPosFormat {
|
||||
u32 raw;
|
||||
BitField<0, 4, ShaderExportComp> pos0;
|
||||
BitField<4, 4, ShaderExportComp> pos1;
|
||||
BitField<8, 4, ShaderExportComp> pos2;
|
||||
BitField<12, 4, ShaderExportComp> pos3;
|
||||
};
|
||||
|
||||
enum class ShaderExportFormat : u32 {
|
||||
Zero = 0,
|
||||
R_32 = 1,
|
||||
GR_32 = 2,
|
||||
AR_32 = 3,
|
||||
ABGR_FP16 = 4,
|
||||
ABGR_UNORM16 = 5,
|
||||
ABGR_SNORM16 = 6,
|
||||
ABGR_UINT16 = 7,
|
||||
ABGR_SINT16 = 8,
|
||||
ABGR_32 = 9,
|
||||
};
|
||||
|
||||
union ColorExportFormat {
|
||||
u32 raw;
|
||||
BitField<0, 4, ShaderExportFormat> col0;
|
||||
BitField<4, 4, ShaderExportFormat> col1;
|
||||
BitField<8, 4, ShaderExportFormat> col2;
|
||||
BitField<12, 4, ShaderExportFormat> col3;
|
||||
BitField<16, 4, ShaderExportFormat> col4;
|
||||
BitField<20, 4, ShaderExportFormat> col5;
|
||||
BitField<24, 4, ShaderExportFormat> col6;
|
||||
BitField<28, 4, ShaderExportFormat> col7;
|
||||
};
|
||||
|
||||
union VsOutputControl {
|
||||
u32 raw;
|
||||
BitField<0, 8, u32> clip_distance_enable;
|
||||
BitField<8, 8, u32> cull_distance_enable;
|
||||
BitField<16, 1, u32> use_vtx_point_size;
|
||||
BitField<17, 1, u32> use_vtx_edge_flag;
|
||||
BitField<18, 1, u32> use_vtx_render_target_idx;
|
||||
BitField<19, 1, u32> use_vtx_viewport_idx;
|
||||
BitField<20, 1, u32> use_vtx_kill_flag;
|
||||
|
||||
bool IsClipDistEnabled(u32 index) const {
|
||||
return (clip_distance_enable.Value() >> index) & 1;
|
||||
}
|
||||
|
||||
bool IsCullDistEnabled(u32 index) const {
|
||||
return (cull_distance_enable.Value() >> index) & 1;
|
||||
}
|
||||
};
|
||||
|
||||
enum class ZOrder : u32 {
|
||||
LateZ = 0,
|
||||
EarlyZLateZ = 1,
|
||||
ReZ = 2,
|
||||
EarlyZReZ = 3,
|
||||
};
|
||||
|
||||
enum class ConservativeDepth : u32 {
|
||||
Any = 0,
|
||||
LessThanZ = 1,
|
||||
GreaterThanZ = 2,
|
||||
};
|
||||
|
||||
union DepthBufferControl {
|
||||
u32 raw;
|
||||
BitField<0, 1, u32> z_export_enable;
|
||||
BitField<1, 1, u32> stencil_test_val_export_enable;
|
||||
BitField<2, 1, u32> stencil_op_val_export_enable;
|
||||
BitField<4, 2, ZOrder> z_order;
|
||||
BitField<6, 1, u32> kill_enable;
|
||||
BitField<7, 1, u32> coverage_to_mask_enable;
|
||||
BitField<8, 1, u32> mask_export_enable;
|
||||
BitField<9, 1, u32> exec_on_hier_fail;
|
||||
BitField<10, 1, u32> exec_on_noop;
|
||||
BitField<11, 1, u32> alpha_to_mask_disable;
|
||||
BitField<12, 1, u32> depth_before_shader;
|
||||
BitField<13, 2, ConservativeDepth> conservative_z_export;
|
||||
};
|
||||
|
||||
enum class CompareFunc : u32 {
|
||||
Never = 0,
|
||||
Less = 1,
|
||||
Equal = 2,
|
||||
LessEqual = 3,
|
||||
Greater = 4,
|
||||
NotEqual = 5,
|
||||
GreaterEqual = 6,
|
||||
Always = 7,
|
||||
};
|
||||
|
||||
union DepthControl {
|
||||
u32 raw;
|
||||
BitField<0, 1, u32> stencil_enable;
|
||||
BitField<1, 1, u32> depth_enable;
|
||||
BitField<2, 1, u32> depth_write_enable;
|
||||
BitField<3, 1, u32> depth_bounds_enable;
|
||||
BitField<4, 3, CompareFunc> depth_func;
|
||||
BitField<7, 1, u32> backface_enable;
|
||||
BitField<8, 3, CompareFunc> stencil_ref_func;
|
||||
BitField<20, 3, CompareFunc> stencil_bf_func;
|
||||
BitField<30, 1, u32> enable_color_writes_on_depth_fail;
|
||||
BitField<31, 1, u32> disable_color_writes_on_depth_pass;
|
||||
};
|
||||
|
||||
union DepthSize {
|
||||
u32 raw;
|
||||
BitField<0, 11, u32> pitch_tile_max;
|
||||
BitField<11, 11, u32> height_tile_max;
|
||||
|
||||
u32 Pitch() const {
|
||||
return (pitch_tile_max + 1) << 3;
|
||||
}
|
||||
|
||||
u32 Height() const {
|
||||
return (height_tile_max + 1) << 3;
|
||||
}
|
||||
};
|
||||
|
||||
union DepthSlice {
|
||||
u32 raw;
|
||||
BitField<0, 22, u32> slice_tile_max;
|
||||
};
|
||||
|
||||
enum class StencilFunc : u32 {
|
||||
Keep = 0,
|
||||
Zero = 1,
|
||||
Ones = 2,
|
||||
ReplaceTest = 3,
|
||||
ReplaceOp = 4,
|
||||
AddClamp = 5,
|
||||
SubClamp = 6,
|
||||
Invert = 7,
|
||||
AddWrap = 8,
|
||||
SubWrap = 9,
|
||||
And = 10,
|
||||
Or = 11,
|
||||
Xor = 12,
|
||||
Nand = 13,
|
||||
Nor = 14,
|
||||
Xnor = 15,
|
||||
};
|
||||
|
||||
union StencilControl {
|
||||
u32 raw;
|
||||
BitField<0, 4, StencilFunc> stencil_fail_front;
|
||||
BitField<4, 4, StencilFunc> stencil_zpass_front;
|
||||
BitField<8, 4, StencilFunc> stencil_zfail_front;
|
||||
BitField<12, 4, StencilFunc> stencil_fail_back;
|
||||
BitField<16, 4, StencilFunc> stencil_zpass_back;
|
||||
BitField<20, 4, StencilFunc> stencil_zfail_back;
|
||||
};
|
||||
|
||||
union StencilRefMask {
|
||||
u32 raw;
|
||||
BitField<0, 8, u32> stencil_test_val;
|
||||
BitField<8, 8, u32> stencil_mask;
|
||||
BitField<16, 8, u32> stencil_write_mask;
|
||||
BitField<24, 8, u32> stencil_op_val;
|
||||
};
|
||||
|
||||
union StencilInfo {
|
||||
u32 raw;
|
||||
BitField<0, 1, u32> format;
|
||||
};
|
||||
|
||||
enum class ClipSpace : u32 {
|
||||
MinusWToW = 0,
|
||||
ZeroToW = 1,
|
||||
};
|
||||
|
||||
enum class PrimKillCond : u32 {
|
||||
AllVtx = 0,
|
||||
AnyVtx = 1,
|
||||
};
|
||||
|
||||
union ClipperControl {
|
||||
u32 raw;
|
||||
BitField<0, 6, u32> user_clip_plane_enable;
|
||||
BitField<16, 1, u32> clip_disable;
|
||||
BitField<19, 1, ClipSpace> clip_space;
|
||||
BitField<21, 1, PrimKillCond> vtx_kill_or;
|
||||
BitField<22, 1, u32> dx_rasterization_kill;
|
||||
BitField<23, 1, u32> dx_linear_attr_clip_enable;
|
||||
BitField<26, 1, u32> zclip_near_disable;
|
||||
BitField<26, 1, u32> zclip_far_disable;
|
||||
};
|
||||
|
||||
enum class PolygonMode : u32 {
|
||||
Point = 0,
|
||||
Line = 1,
|
||||
Fill = 2,
|
||||
};
|
||||
|
||||
enum class ProvokingVtxLast : u32 {
|
||||
First = 0,
|
||||
Last = 1,
|
||||
};
|
||||
|
||||
enum class CullMode : u32 {
|
||||
None = 0,
|
||||
Front = 1,
|
||||
Back = 2,
|
||||
FrontAndBack = 3,
|
||||
};
|
||||
|
||||
union PolygonControl {
|
||||
u32 raw;
|
||||
BitField<0, 1, u32> cull_front;
|
||||
BitField<1, 1, u32> cull_back;
|
||||
BitField<3, 2, u32> enable_polygon_mode;
|
||||
BitField<5, 3, PolygonMode> polygon_mode_front;
|
||||
BitField<8, 3, PolygonMode> polygon_mode_back;
|
||||
BitField<11, 1, u32> enable_polygon_offset_front;
|
||||
BitField<12, 1, u32> enable_polygon_offset_back;
|
||||
BitField<13, 1, u32> enable_polygon_offset_para;
|
||||
BitField<13, 1, u32> enable_window_offset;
|
||||
BitField<19, 1, ProvokingVtxLast> provoking_vtx_last;
|
||||
|
||||
PolygonMode PolyMode() const {
|
||||
return enable_polygon_mode ? polygon_mode_front.Value() : PolygonMode::Fill;
|
||||
}
|
||||
|
||||
CullMode CullingMode() const {
|
||||
return static_cast<CullMode>(cull_front | cull_back << 1);
|
||||
}
|
||||
};
|
||||
|
||||
union VsOutputConfig {
|
||||
u32 raw;
|
||||
BitField<1, 5, u32> export_count_min_one;
|
||||
BitField<6, 1, u32> half_pack;
|
||||
|
||||
u32 NumExports() const {
|
||||
return export_count_min_one.Value() + 1;
|
||||
}
|
||||
};
|
||||
|
||||
union ColorBufferMask {
|
||||
u32 raw;
|
||||
BitField<0, 4, u32> output0_mask;
|
||||
BitField<4, 4, u32> output1_mask;
|
||||
BitField<8, 4, u32> output2_mask;
|
||||
BitField<12, 4, u32> output3_mask;
|
||||
BitField<16, 4, u32> output4_mask;
|
||||
BitField<20, 4, u32> output5_mask;
|
||||
BitField<24, 4, u32> output6_mask;
|
||||
BitField<28, 4, u32> output7_mask;
|
||||
};
|
||||
|
||||
struct IndexBufferBase {
|
||||
BitField<0, 8, u32> base_addr_hi;
|
||||
u32 base_addr_lo;
|
||||
|
||||
VAddr Address() const {
|
||||
return base_addr_lo | u64(base_addr_hi) << 32;
|
||||
}
|
||||
};
|
||||
|
||||
enum class IndexType : u32 {
|
||||
Index16 = 0,
|
||||
Index32 = 1,
|
||||
};
|
||||
|
||||
enum class IndexSwapMode : u32 {
|
||||
None = 0,
|
||||
Swap16 = 1,
|
||||
Swap32 = 2,
|
||||
SwapWord = 3,
|
||||
};
|
||||
|
||||
union IndexBufferType {
|
||||
u32 raw;
|
||||
BitField<0, 2, IndexType> index_type;
|
||||
BitField<2, 2, IndexSwapMode> swap_mode;
|
||||
};
|
||||
|
||||
union VgtNumInstances {
|
||||
u32 num_instances;
|
||||
|
||||
u32 NumInstances() const {
|
||||
return num_instances == 0 ? 1 : num_instances;
|
||||
}
|
||||
};
|
||||
|
||||
struct Scissor {
|
||||
union {
|
||||
BitField<0, 16, s32> top_left_x;
|
||||
BitField<16, 16, s32> top_left_y;
|
||||
};
|
||||
union {
|
||||
BitField<0, 15, u32> bottom_right_x;
|
||||
BitField<16, 15, u32> bottom_right_y;
|
||||
};
|
||||
|
||||
u32 GetWidth() const {
|
||||
return static_cast<u32>(bottom_right_x - top_left_x);
|
||||
}
|
||||
|
||||
u32 GetHeight() const {
|
||||
return static_cast<u32>(bottom_right_y - top_left_y);
|
||||
}
|
||||
};
|
||||
|
||||
struct ViewportScissor {
|
||||
union {
|
||||
BitField<0, 15, s32> top_left_x;
|
||||
BitField<15, 15, s32> top_left_y;
|
||||
BitField<30, 1, s32> window_offset_disble;
|
||||
};
|
||||
union {
|
||||
BitField<0, 15, s32> bottom_right_x;
|
||||
BitField<15, 15, s32> bottom_right_y;
|
||||
};
|
||||
};
|
||||
|
||||
struct ViewportDepth {
|
||||
float zmin;
|
||||
float zmax;
|
||||
};
|
||||
|
||||
struct ViewportBounds {
|
||||
float xscale;
|
||||
float xoffset;
|
||||
float yscale;
|
||||
float yoffset;
|
||||
float zoffset;
|
||||
float zscale;
|
||||
};
|
||||
|
||||
union ViewportControl {
|
||||
BitField<0, 1, u32> xscale_enable;
|
||||
BitField<1, 1, u32> xoffset_enable;
|
||||
BitField<2, 1, u32> yscale_enable;
|
||||
BitField<3, 1, u32> yoffset_enable;
|
||||
BitField<4, 1, u32> zscale_enable;
|
||||
BitField<5, 1, u32> zoffset_enable;
|
||||
BitField<8, 1, u32> xy_transformed;
|
||||
BitField<9, 1, u32> z_transformed;
|
||||
BitField<10, 1, u32> w_transformed;
|
||||
};
|
||||
|
||||
struct ClipUserData {
|
||||
u32 data_x;
|
||||
u32 data_y;
|
||||
u32 data_z;
|
||||
u32 data_w;
|
||||
};
|
||||
|
||||
struct ColorBuffer {
|
||||
enum class EndianSwap : u32 {
|
||||
None = 0,
|
||||
Swap8In16 = 1,
|
||||
Swap8In32 = 2,
|
||||
Swap8In64 = 3,
|
||||
};
|
||||
|
||||
enum class Format : u32 {
|
||||
Invalid = 0,
|
||||
Color_8 = 1,
|
||||
Color_16 = 2,
|
||||
Color_8_8 = 3,
|
||||
Color_32 = 4,
|
||||
Color_16_16 = 5,
|
||||
Color_10_11_11 = 6,
|
||||
Color_11_11_10 = 7,
|
||||
Color_10_10_10_2 = 8,
|
||||
Color_2_10_10_10 = 9,
|
||||
Color_8_8_8_8 = 10,
|
||||
Color_32_32 = 11,
|
||||
Color_16_16_16_16 = 12,
|
||||
Color_32_32_32_32 = 14,
|
||||
Color_5_6_5 = 16,
|
||||
Color_1_5_5_5 = 17,
|
||||
Color_5_5_5_1 = 18,
|
||||
Color_4_4_4_4 = 19,
|
||||
Color_8_24 = 20,
|
||||
Color_24_8 = 21,
|
||||
Color_X24_8_32_FL = 22,
|
||||
};
|
||||
|
||||
enum class NumberType : u32 {
|
||||
Unorm = 0,
|
||||
Snorm = 1,
|
||||
Uint = 4,
|
||||
Sint = 5,
|
||||
Srgb = 6,
|
||||
Float = 7,
|
||||
};
|
||||
|
||||
enum class SwapMode : u32 {
|
||||
Standard = 0,
|
||||
Alternate = 1,
|
||||
StandardReverse = 2,
|
||||
AlternateReverse = 3,
|
||||
};
|
||||
|
||||
enum class RoundMode : u32 {
|
||||
ByHalf = 0,
|
||||
Truncate = 1,
|
||||
};
|
||||
|
||||
u32 base_address;
|
||||
union {
|
||||
BitField<0, 11, u32> tile_max;
|
||||
BitField<20, 11, u32> fmask_tile_max;
|
||||
} pitch;
|
||||
union {
|
||||
BitField<0, 22, u32> tile_max;
|
||||
} slice;
|
||||
union {
|
||||
BitField<0, 11, u32> slice_start;
|
||||
BitField<13, 11, u32> slice_max;
|
||||
} view;
|
||||
union {
|
||||
BitField<0, 2, EndianSwap> endian;
|
||||
BitField<2, 5, Format> format;
|
||||
BitField<7, 1, u32> linear_general;
|
||||
BitField<8, 2, NumberType> number_type;
|
||||
BitField<11, 2, SwapMode> comp_swap;
|
||||
BitField<13, 1, u32> fast_clear;
|
||||
BitField<14, 1, u32> compression;
|
||||
BitField<15, 1, u32> blend_clamp;
|
||||
BitField<16, 1, u32> blend_bypass;
|
||||
BitField<17, 1, u32> simple_float;
|
||||
BitField<18, 1, RoundMode> round_mode;
|
||||
BitField<19, 1, u32> cmask_is_linear;
|
||||
} info;
|
||||
union {
|
||||
BitField<0, 5, u32> tile_mode_index;
|
||||
BitField<5, 5, u32> fmask_tile_mode_index;
|
||||
BitField<12, 3, u32> num_samples_log2;
|
||||
BitField<15, 3, u32> num_fragments_log2;
|
||||
BitField<18, 1, u32> force_dst_alpha_1;
|
||||
} attrib;
|
||||
INSERT_PADDING_WORDS(1);
|
||||
u32 cmask_base_address;
|
||||
union {
|
||||
BitField<0, 14, u32> tile_max;
|
||||
} cmask_slice;
|
||||
u32 fmask_base_address;
|
||||
union {
|
||||
BitField<0, 14, u32> tile_max;
|
||||
} fmask_slice;
|
||||
u32 clear_word0;
|
||||
u32 clear_word1;
|
||||
INSERT_PADDING_WORDS(2);
|
||||
|
||||
u32 Pitch() const {
|
||||
return (pitch.tile_max + 1) << 3;
|
||||
}
|
||||
|
||||
u32 Height() const {
|
||||
return (slice.tile_max + 1) * 64 / Pitch();
|
||||
}
|
||||
|
||||
u64 Address() const {
|
||||
return u64(base_address) << 8;
|
||||
}
|
||||
|
||||
u64 CmaskAddress() const {
|
||||
return u64(cmask_base_address) << 8;
|
||||
}
|
||||
};
|
||||
|
||||
enum class PrimitiveType : u32 {
|
||||
None = 0,
|
||||
PointList = 1,
|
||||
LineList = 2,
|
||||
LineStrip = 3,
|
||||
TriangleList = 4,
|
||||
TriangleFan = 5,
|
||||
TriangleStrip = 6,
|
||||
PatchPrimitive = 9,
|
||||
AdjLineList = 10,
|
||||
AdjLineStrip = 11,
|
||||
AdjTriangleList = 12,
|
||||
AdjTriangleStrip = 13,
|
||||
RectList = 17,
|
||||
LineLoop = 18,
|
||||
QuadList = 19,
|
||||
QuadStrip = 20,
|
||||
Polygon = 21,
|
||||
};
|
||||
|
||||
union Regs {
|
||||
struct {
|
||||
INSERT_PADDING_WORDS(0x2C08);
|
||||
ShaderProgram ps_program;
|
||||
INSERT_PADDING_WORDS(0x2C);
|
||||
ShaderProgram vs_program;
|
||||
INSERT_PADDING_WORDS(0xA008 - 0x2C4C - 16);
|
||||
u32 depth_bounds_min;
|
||||
u32 depth_bounds_max;
|
||||
u32 stencil_clear;
|
||||
u32 depth_clear;
|
||||
Scissor screen_scissor;
|
||||
INSERT_PADDING_WORDS(0xA011 - 0xA00C - 2);
|
||||
StencilInfo stencil_info;
|
||||
u32 z_read_base;
|
||||
u32 stencil_read_base;
|
||||
u32 z_write_base;
|
||||
u32 stencil_write_base;
|
||||
DepthSize depth_size;
|
||||
DepthSlice depth_slice;
|
||||
INSERT_PADDING_WORDS(0xA08E - 0xA018);
|
||||
ColorBufferMask color_target_mask;
|
||||
ColorBufferMask color_shader_mask;
|
||||
INSERT_PADDING_WORDS(0xA094 - 0xA08E - 2);
|
||||
std::array<ViewportScissor, NumViewports> viewport_scissors;
|
||||
std::array<ViewportDepth, NumViewports> viewport_depths;
|
||||
INSERT_PADDING_WORDS(0xA10B - 0xA0D4);
|
||||
StencilControl stencil_control;
|
||||
StencilRefMask stencil_ref_front;
|
||||
StencilRefMask stencil_ref_back;
|
||||
INSERT_PADDING_WORDS(1);
|
||||
std::array<ViewportBounds, NumViewports> viewports;
|
||||
std::array<ClipUserData, NumClipPlanes> clip_user_data;
|
||||
INSERT_PADDING_WORDS(0xA1B1 - 0xA187);
|
||||
VsOutputConfig vs_output_config;
|
||||
INSERT_PADDING_WORDS(0xA1C3 - 0xA1B1 - 1);
|
||||
ShaderPosFormat shader_pos_format;
|
||||
ShaderExportFormat z_export_format;
|
||||
ColorExportFormat color_export_format;
|
||||
INSERT_PADDING_WORDS(0xA1F9 - 0xA1C3 - 3);
|
||||
IndexBufferBase index_base_address;
|
||||
INSERT_PADDING_WORDS(1);
|
||||
u32 draw_initiator;
|
||||
INSERT_PADDING_WORDS(0xA200 - 0xA1F9 - 4);
|
||||
DepthControl depth_control;
|
||||
INSERT_PADDING_WORDS(2);
|
||||
DepthBufferControl depth_buffer_control;
|
||||
ClipperControl clipper_control;
|
||||
PolygonControl polygon_control;
|
||||
ViewportControl viewport_control;
|
||||
VsOutputControl vs_output_control;
|
||||
INSERT_PADDING_WORDS(0xA29E - 0xA207 - 1);
|
||||
u32 max_index_size;
|
||||
IndexBufferType index_buffer_type;
|
||||
INSERT_PADDING_WORDS(0xA2A1 - 0xA29E - 2);
|
||||
u32 enable_primitive_id;
|
||||
INSERT_PADDING_WORDS(0xA318 - 0xA2A1 - 1);
|
||||
ColorBuffer color_buffers[NumColorBuffers];
|
||||
INSERT_PADDING_WORDS(0xC242 - 0xA390);
|
||||
PrimitiveType primitive_type;
|
||||
INSERT_PADDING_WORDS(0xC24C - 0xC243);
|
||||
u32 num_indices;
|
||||
VgtNumInstances num_instances;
|
||||
};
|
||||
std::array<u32, NumRegs> reg_array{};
|
||||
};
|
||||
|
||||
Regs regs{};
|
||||
|
||||
public:
|
||||
Liverpool();
|
||||
|
||||
void ProcessCmdList(u32* cmdbuf, u32 size_in_bytes);
|
||||
};
|
||||
|
||||
static_assert(GFX6_3D_REG_INDEX(ps_program) == 0x2C08);
|
||||
static_assert(GFX6_3D_REG_INDEX(vs_program) == 0x2C48);
|
||||
static_assert(GFX6_3D_REG_INDEX(vs_program.user_data) == 0x2C4C);
|
||||
static_assert(GFX6_3D_REG_INDEX(screen_scissor) == 0xA00C);
|
||||
static_assert(GFX6_3D_REG_INDEX(depth_slice) == 0xA017);
|
||||
static_assert(GFX6_3D_REG_INDEX(color_target_mask) == 0xA08E);
|
||||
static_assert(GFX6_3D_REG_INDEX(color_shader_mask) == 0xA08F);
|
||||
static_assert(GFX6_3D_REG_INDEX(viewport_scissors) == 0xA094);
|
||||
static_assert(GFX6_3D_REG_INDEX(stencil_control) == 0xA10B);
|
||||
static_assert(GFX6_3D_REG_INDEX(viewports) == 0xA10F);
|
||||
static_assert(GFX6_3D_REG_INDEX(clip_user_data) == 0xA16F);
|
||||
static_assert(GFX6_3D_REG_INDEX(vs_output_config) == 0xA1B1);
|
||||
static_assert(GFX6_3D_REG_INDEX(shader_pos_format) == 0xA1C3);
|
||||
static_assert(GFX6_3D_REG_INDEX(z_export_format) == 0xA1C4);
|
||||
static_assert(GFX6_3D_REG_INDEX(color_export_format) == 0xA1C5);
|
||||
static_assert(GFX6_3D_REG_INDEX(index_base_address) == 0xA1F9);
|
||||
static_assert(GFX6_3D_REG_INDEX(draw_initiator) == 0xA1FC);
|
||||
static_assert(GFX6_3D_REG_INDEX(clipper_control) == 0xA204);
|
||||
static_assert(GFX6_3D_REG_INDEX(viewport_control) == 0xA206);
|
||||
static_assert(GFX6_3D_REG_INDEX(vs_output_control) == 0xA207);
|
||||
static_assert(GFX6_3D_REG_INDEX(index_buffer_type) == 0xA29F);
|
||||
static_assert(GFX6_3D_REG_INDEX(enable_primitive_id) == 0xA2A1);
|
||||
static_assert(GFX6_3D_REG_INDEX(color_buffers[0].base_address) == 0xA318);
|
||||
static_assert(GFX6_3D_REG_INDEX(color_buffers[0].pitch) == 0xA319);
|
||||
static_assert(GFX6_3D_REG_INDEX(color_buffers[0].slice) == 0xA31A);
|
||||
static_assert(GFX6_3D_REG_INDEX(color_buffers[7].base_address) == 0xA381);
|
||||
static_assert(GFX6_3D_REG_INDEX(primitive_type) == 0xC242);
|
||||
static_assert(GFX6_3D_REG_INDEX(num_instances) == 0xC24D);
|
||||
|
||||
#undef GFX6_3D_REG_INDEX
|
||||
|
||||
} // namespace AmdGpu
|
22
src/video_core/amdgpu/pixel_format.cpp
Normal file
22
src/video_core/amdgpu/pixel_format.cpp
Normal file
|
@ -0,0 +1,22 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <array>
|
||||
#include "video_core/amdgpu/pixel_format.h"
|
||||
|
||||
namespace AmdGpu {
|
||||
|
||||
u32 getNumComponents(DataFormat format) {
|
||||
constexpr std::array numComponentsPerElement = {
|
||||
0, 1, 1, 2, 1, 2, 3, 3, 4, 4, 4, 2, 4, 3, 4, -1, 3, 4, 4, 4, 2,
|
||||
2, 2, -1, -1, -1, -1, -1, -1, -1, -1, -1, 3, 3, 3, 4, 4, 4, 1, 2, 3, 4,
|
||||
-1, -1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 1, 1};
|
||||
|
||||
const u32 index = static_cast<u32>(format);
|
||||
if (index >= numComponentsPerElement.size()) {
|
||||
return 0;
|
||||
}
|
||||
return numComponentsPerElement[index];
|
||||
}
|
||||
|
||||
} // namespace AmdGpu
|
64
src/video_core/amdgpu/pixel_format.h
Normal file
64
src/video_core/amdgpu/pixel_format.h
Normal file
|
@ -0,0 +1,64 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common/types.h"
|
||||
|
||||
namespace AmdGpu {
|
||||
|
||||
// Table 8.13 Data and Image Formats [Sea Islands Series Instruction Set Architecture]
|
||||
enum class DataFormat : u32 {
|
||||
FormatInvalid = 0,
|
||||
Format8 = 1,
|
||||
Format16 = 2,
|
||||
Format8_8 = 3,
|
||||
Format32 = 4,
|
||||
Format16_16 = 5,
|
||||
Format10_11_11 = 6,
|
||||
Format11_11_10 = 7,
|
||||
Format10_10_10_2 = 8,
|
||||
Format2_10_10_10 = 9,
|
||||
Format8_8_8_8 = 10,
|
||||
Format32_32 = 11,
|
||||
Format16_16_16_16 = 12,
|
||||
Format32_32_32 = 13,
|
||||
Format32_32_32_32 = 14,
|
||||
Format5_6_5 = 16,
|
||||
Format1_5_5_5 = 17,
|
||||
Format5_5_5_1 = 18,
|
||||
Format4_4_4_4 = 19,
|
||||
Format8_24 = 20,
|
||||
Format24_8 = 21,
|
||||
FormatX24_8_32 = 22,
|
||||
FormatGB_GR = 32,
|
||||
FormatBG_RG = 33,
|
||||
Format5_9_9_9 = 34,
|
||||
FormatBc1 = 35,
|
||||
FormatBc2 = 36,
|
||||
FormatBc3 = 37,
|
||||
FormatBc4 = 38,
|
||||
FormatBc5 = 39,
|
||||
FormatBc6 = 40,
|
||||
FormatBc7 = 41,
|
||||
};
|
||||
|
||||
enum class NumberFormat : u32 {
|
||||
Unorm = 0,
|
||||
Snorm = 1,
|
||||
Uscaled = 2,
|
||||
Sscaled = 3,
|
||||
Uint = 4,
|
||||
Sint = 5,
|
||||
SnormNz = 6,
|
||||
Float = 7,
|
||||
Srgb = 9,
|
||||
Ubnorm = 10,
|
||||
UbnromNz = 11,
|
||||
Ubint = 12,
|
||||
Ubscaled = 13,
|
||||
};
|
||||
|
||||
u32 getNumComponents(DataFormat format);
|
||||
|
||||
} // namespace AmdGpu
|
290
src/video_core/amdgpu/pm4_cmds.h
Normal file
290
src/video_core/amdgpu/pm4_cmds.h
Normal file
|
@ -0,0 +1,290 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstring>
|
||||
#include "common/bit_field.h"
|
||||
#include "common/types.h"
|
||||
#include "video_core/amdgpu/pm4_opcodes.h"
|
||||
|
||||
namespace AmdGpu {
|
||||
|
||||
/// This enum defines the Shader types supported in PM4 type 3 header
|
||||
enum class PM4ShaderType : u32 {
|
||||
ShaderGraphics = 0, ///< Graphics shader
|
||||
ShaderCompute = 1 ///< Compute shader
|
||||
};
|
||||
|
||||
/// This enum defines the predicate value supported in PM4 type 3 header
|
||||
enum class PM4Predicate : u32 {
|
||||
PredDisable = 0, ///< Predicate disabled
|
||||
PredEnable = 1 ///< Predicate enabled
|
||||
};
|
||||
|
||||
union PM4Type0Header {
|
||||
u32 raw;
|
||||
BitField<0, 16, u32> base; ///< DWORD Memory-mapped address
|
||||
BitField<16, 14, u32> count; ///< Count of DWORDs in the *information* body (N - 1 for N dwords)
|
||||
BitField<30, 2, u32> type; ///< Packet identifier. It should be 0 for type 0 packets.
|
||||
|
||||
u32 NumWords() const {
|
||||
return count + 1;
|
||||
}
|
||||
};
|
||||
|
||||
union PM4Type3Header {
|
||||
constexpr PM4Type3Header(PM4ItOpcode code, u32 num_words_min_one,
|
||||
PM4ShaderType stype = PM4ShaderType::ShaderGraphics,
|
||||
PM4Predicate pred = PM4Predicate::PredDisable) {
|
||||
raw = 0;
|
||||
predicate.Assign(pred);
|
||||
shaderType.Assign(stype);
|
||||
opcode.Assign(code);
|
||||
count.Assign(num_words_min_one);
|
||||
type.Assign(3);
|
||||
}
|
||||
|
||||
u32 NumWords() const {
|
||||
return count + 1;
|
||||
}
|
||||
|
||||
u32 raw;
|
||||
BitField<0, 1, PM4Predicate> predicate; ///< Predicated version of packet when set
|
||||
BitField<1, 1, PM4ShaderType> shaderType; ///< 0: Graphics, 1: Compute Shader
|
||||
BitField<8, 8, PM4ItOpcode> opcode; ///< IT opcode
|
||||
BitField<16, 14, u32> count; ///< Number of DWORDs - 1 in the information body.
|
||||
BitField<30, 2, u32> type; ///< Packet identifier. It should be 3 for type 3 packets
|
||||
};
|
||||
|
||||
union PM4Header {
|
||||
u32 raw;
|
||||
PM4Type0Header type0;
|
||||
PM4Type3Header type3;
|
||||
BitField<30, 2, u32> type;
|
||||
};
|
||||
|
||||
template <PM4ItOpcode opcode, typename... Args>
|
||||
constexpr u32* Write(u32* cmdbuf, PM4ShaderType type, Args... data) {
|
||||
// Write the PM4 header.
|
||||
PM4Type3Header header{opcode, sizeof...(Args) - 1, type};
|
||||
std::memcpy(cmdbuf, &header, sizeof(header));
|
||||
|
||||
// Write arguments
|
||||
const std::array<u32, sizeof...(Args)> args{data...};
|
||||
std::memcpy(++cmdbuf, args.data(), sizeof(args));
|
||||
cmdbuf += args.size();
|
||||
return cmdbuf;
|
||||
}
|
||||
|
||||
union ContextControlEnable {
|
||||
u32 raw;
|
||||
BitField<0, 1, u32> enableSingleCntxConfigReg; ///< single context config reg
|
||||
BitField<1, 1, u32> enableMultiCntxRenderReg; ///< multi context render state reg
|
||||
BitField<15, 1, u32> enableUserConfigReg__CI; ///< User Config Reg on CI(reserved for SI)
|
||||
BitField<16, 1, u32> enableGfxSHReg; ///< Gfx SH Registers
|
||||
BitField<24, 1, u32> enableCSSHReg; ///< CS SH Registers
|
||||
BitField<31, 1, u32> enableDw; ///< DW enable
|
||||
};
|
||||
|
||||
struct PM4CmdContextControl {
|
||||
PM4Type3Header header;
|
||||
ContextControlEnable loadControl; ///< Enable bits for loading
|
||||
ContextControlEnable shadowEnable; ///< Enable bits for shadowing
|
||||
};
|
||||
|
||||
union LoadAddressHigh {
|
||||
u32 raw;
|
||||
BitField<0, 16, u32>
|
||||
addrHi; ///< bits for the block in Memory from where the CP will fetch the state
|
||||
BitField<31, 1, u32>
|
||||
waitIdle; ///< if set the CP will wait for the graphics pipe to be idle by writing
|
||||
///< to the GRBM Wait Until register with "Wait for 3D idle"
|
||||
};
|
||||
|
||||
/**
|
||||
* PM4CMDLOADDATA can be used with the following opcodes
|
||||
* - IT_LOAD_CONFIG_REG
|
||||
* - IT_LOAD_CONTEXT_REG
|
||||
* - IT_LOAD_SH_REG
|
||||
*/
|
||||
struct PM4CmdLoadData {
|
||||
PM4Type3Header header;
|
||||
u32 addrLo; ///< low 32 address bits for the block in memory from where the CP will fetch the
|
||||
///< state
|
||||
LoadAddressHigh addrHi;
|
||||
u32 regOffset; ///< offset in DWords from the register base address
|
||||
u32 numDwords; ///< number of DWords that the CP will fetch and write into the chip. A value of
|
||||
///< zero will fetch nothing
|
||||
};
|
||||
|
||||
enum class LoadDataIndex : u32 {
|
||||
DirectAddress = 0, /// ADDR_LO is direct address
|
||||
Offset = 1, /// ARRD_LO is ignored and memory offset is in addrOffset
|
||||
};
|
||||
|
||||
enum class LoadDataFormat : u32 {
|
||||
OffsetAndSize = 0, /// Data is consecutive DWORDs
|
||||
OffsetAndData = 1, /// Register offset and data is interleaved
|
||||
};
|
||||
|
||||
union LoadAddressLow {
|
||||
u32 raw;
|
||||
BitField<0, 1, LoadDataIndex> index;
|
||||
BitField<2, 30, u32> addrLo; ///< bits for the block in Memory from where the CP will fetch the
|
||||
///< state. DWORD aligned
|
||||
};
|
||||
|
||||
/**
|
||||
* PM4CMDLOADDATAINDEX can be used with the following opcodes (VI+)
|
||||
* - IT_LOAD_CONTEXT_REG_INDEX
|
||||
* - IT_LOAD_SH_REG_INDEX
|
||||
*/
|
||||
struct PM4CmdLoadDataIndex {
|
||||
PM4Type3Header header;
|
||||
LoadAddressLow addrLo; ///< low 32 address bits for the block in memory from where the CP will
|
||||
///< fetch the state
|
||||
u32 addrOffset; ///< addrLo.index = 1 Indexed mode
|
||||
union {
|
||||
BitField<0, 16, u32> regOffset; ///< offset in DWords from the register base address
|
||||
BitField<31, 1, LoadDataFormat> dataFormat;
|
||||
u32 raw;
|
||||
};
|
||||
u32 numDwords; ///< Number of DWords that the CP will fetch and write
|
||||
///< into the chip. A value of zero will fetch nothing
|
||||
};
|
||||
|
||||
/**
|
||||
* PM4CMDSETDATA can be used with the following opcodes:
|
||||
*
|
||||
* - IT_SET_CONFIG_REG
|
||||
* - IT_SET_CONTEXT_REG
|
||||
* - IT_SET_CONTEXT_REG_INDIRECT
|
||||
* - IT_SET_SH_REG
|
||||
* - IT_SET_SH_REG_INDEX
|
||||
* - IT_SET_UCONFIG_REG
|
||||
*/
|
||||
struct PM4CmdSetData {
|
||||
PM4Type3Header header;
|
||||
union {
|
||||
u32 raw;
|
||||
BitField<0, 16, u32> regOffset; ///< Offset in DWords from the register base address
|
||||
BitField<28, 4, u32> index; ///< Index for UCONFIG/CONTEXT on CI+
|
||||
///< Program to zero for other opcodes and on SI
|
||||
};
|
||||
|
||||
template <PM4ShaderType type = PM4ShaderType::ShaderGraphics, typename... Args>
|
||||
static constexpr u32* SetContextReg(u32* cmdbuf, Args... data) {
|
||||
return Write<PM4ItOpcode::SetContextReg>(cmdbuf, type, data...);
|
||||
}
|
||||
|
||||
template <PM4ShaderType type = PM4ShaderType::ShaderGraphics, typename... Args>
|
||||
static constexpr u32* SetShReg(u32* cmdbuf, Args... data) {
|
||||
return Write<PM4ItOpcode::SetShReg>(cmdbuf, type, data...);
|
||||
}
|
||||
};
|
||||
|
||||
struct PM4CmdNop {
|
||||
PM4Type3Header header;
|
||||
};
|
||||
|
||||
struct PM4CmdDrawIndexOffset2 {
|
||||
PM4Type3Header header;
|
||||
u32 maxSize; ///< Maximum number of indices
|
||||
u32 indexOffset; ///< Zero based starting index number in the index buffer
|
||||
u32 indexCount; ///< number of indices in the Index Buffer
|
||||
u32 drawInitiator; ///< draw Initiator Register
|
||||
};
|
||||
|
||||
struct PM4CmdDrawIndex2 {
|
||||
PM4Type3Header header;
|
||||
u32 maxSize; ///< maximum number of indices
|
||||
u32 indexBaseLo; ///< base Address Lo [31:1] of Index Buffer
|
||||
///< (Word-Aligned). Written to the VGT_DMA_BASE register.
|
||||
u32 indexBaseHi; ///< base Address Hi [39:32] of Index Buffer.
|
||||
///< Written to the VGT_DMA_BASE_HI register
|
||||
u32 indexCount; ///< number of indices in the Index Buffer.
|
||||
///< Written to the VGT_NUM_INDICES register.
|
||||
u32 drawInitiator; ///< written to the VGT_DRAW_INITIATOR register
|
||||
};
|
||||
|
||||
struct PM4CmdDrawIndexType {
|
||||
PM4Type3Header header;
|
||||
union {
|
||||
u32 raw;
|
||||
BitField<0, 2, u32> indexType; ///< Select 16 Vs 32bit index
|
||||
BitField<2, 2, u32> swapMode; ///< DMA swap mode
|
||||
};
|
||||
};
|
||||
|
||||
struct PM4CmdDrawIndexAuto {
|
||||
PM4Type3Header header;
|
||||
u32 index_count;
|
||||
u32 draw_initiator;
|
||||
};
|
||||
|
||||
enum class DataSelect : u32 {
|
||||
None = 0,
|
||||
Data32Low = 1,
|
||||
Data64 = 2,
|
||||
GpuClock64 = 3,
|
||||
PerfCounter = 4,
|
||||
};
|
||||
|
||||
enum class InterruptSelect : u32 {
|
||||
None = 0,
|
||||
IrqOnly = 1,
|
||||
IrqWhenWriteConfirm = 2,
|
||||
};
|
||||
|
||||
struct PM4CmdEventWriteEop {
|
||||
PM4Type3Header header;
|
||||
union {
|
||||
u32 event_control;
|
||||
BitField<0, 6, u32> eventType; ///< Event type written to VGT_EVENT_INITIATOR
|
||||
BitField<8, 4, u32> eventIndex; ///< Event index
|
||||
};
|
||||
u32 addressLo;
|
||||
union {
|
||||
u32 data_control;
|
||||
BitField<0, 16, u32> addressHi; ///< High bits of address
|
||||
BitField<24, 2, InterruptSelect> intSel; ///< Selects interrupt action for end-of-pipe
|
||||
BitField<29, 3, DataSelect> dataSel; ///< Selects source of data
|
||||
};
|
||||
u32 dataLo; ///< Value that will be written to memory when event occurs
|
||||
u32 dataHi; ///< Value that will be written to memory when event occurs
|
||||
|
||||
u64* Address() const {
|
||||
return reinterpret_cast<u64*>(addressLo | u64(addressHi) << 32);
|
||||
}
|
||||
|
||||
u64 DataQWord() const {
|
||||
return dataLo | u64(dataHi) << 32;
|
||||
}
|
||||
};
|
||||
|
||||
struct PM4DmaData {
|
||||
PM4Type3Header header;
|
||||
union {
|
||||
BitField<0, 1, u32> engine;
|
||||
BitField<12, 1, u32> src_atc;
|
||||
BitField<13, 2, u32> src_cache_policy;
|
||||
BitField<15, 1, u32> src_volatile;
|
||||
BitField<20, 2, u32> dst_sel;
|
||||
BitField<24, 1, u32> dst_atc;
|
||||
BitField<25, 2, u32> dst_cache_policy;
|
||||
BitField<27, 1, u32> dst_volatile;
|
||||
BitField<29, 2, u32> src_sel;
|
||||
BitField<31, 1, u32> cp_sync;
|
||||
};
|
||||
union {
|
||||
u32 src_addr_lo;
|
||||
u32 data;
|
||||
};
|
||||
u32 src_addr_hi;
|
||||
u32 dst_addr_lo;
|
||||
u32 dst_addr_hi;
|
||||
u32 command;
|
||||
};
|
||||
|
||||
} // namespace AmdGpu
|
62
src/video_core/amdgpu/pm4_opcodes.h
Normal file
62
src/video_core/amdgpu/pm4_opcodes.h
Normal file
|
@ -0,0 +1,62 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common/types.h"
|
||||
|
||||
namespace AmdGpu {
|
||||
|
||||
enum class PM4ItOpcode : u32 {
|
||||
Nop = 0x10,
|
||||
SetBase = 0x11,
|
||||
ClearState = 0x12,
|
||||
IndexBufferSize = 0x13,
|
||||
DispatchDirect = 0x15,
|
||||
DispatchIndirect = 0x16,
|
||||
AtomicGds = 0x1D,
|
||||
Atomic = 0x1E,
|
||||
OcclusionQuery = 0x1F,
|
||||
SetPredication = 0x20,
|
||||
RegRmw = 0x21,
|
||||
CondExec = 0x22,
|
||||
PredExec = 0x23,
|
||||
DrawIndirect = 0x24,
|
||||
DrawIndexIndirect = 0x25,
|
||||
IndexBase = 0x26,
|
||||
DrawIndex2 = 0x27,
|
||||
ContextControl = 0x28,
|
||||
IndexType = 0x2A,
|
||||
DrawIndirectMulti = 0x2C,
|
||||
DrawIndexAuto = 0x2D,
|
||||
NumInstances = 0x2F,
|
||||
DrawIndexMultiAuto = 0x30,
|
||||
IndirectBufferConst = 0x33,
|
||||
DrawIndexOffset2 = 0x35,
|
||||
WriteData = 0x37,
|
||||
DrawIndexIndirectMulti = 0x38,
|
||||
MemSemaphore = 0x39,
|
||||
IndirectBuffer = 0x3F,
|
||||
CondIndirectBuffer = 0x3F,
|
||||
CopyData = 0x40,
|
||||
CommandProcessorDma = 0x41,
|
||||
SurfaceSync = 0x43,
|
||||
CondWrite = 0x45,
|
||||
EventWrite = 0x46,
|
||||
EventWriteEop = 0x47,
|
||||
EventWriteEos = 0x48,
|
||||
PremableCntl = 0x4A,
|
||||
DmaData = 0x50,
|
||||
ContextRegRmw = 0x51,
|
||||
LoadShReg = 0x5F,
|
||||
LoadConfigReg = 0x60,
|
||||
LoadContextReg = 0x61,
|
||||
SetConfigReg = 0x68,
|
||||
SetContextReg = 0x69,
|
||||
SetContextRegIndirect = 0x73,
|
||||
SetShReg = 0x76,
|
||||
SetShRegOffset = 0x77,
|
||||
SetUconfigReg = 0x79
|
||||
};
|
||||
|
||||
} // namespace AmdGpu
|
33
src/video_core/amdgpu/resource.h
Normal file
33
src/video_core/amdgpu/resource.h
Normal file
|
@ -0,0 +1,33 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common/bit_field.h"
|
||||
#include "common/types.h"
|
||||
#include "video_core/amdgpu/pixel_format.h"
|
||||
|
||||
namespace AmdGpu {
|
||||
|
||||
// Table 8.5 Buffer Resource Descriptor [Sea Islands Series Instruction Set Architecture]
|
||||
struct Buffer {
|
||||
union {
|
||||
BitField<0, 44, u64> base_address;
|
||||
BitField<48, 14, u64> stride;
|
||||
BitField<62, 1, u64> cache_swizzle;
|
||||
BitField<63, 1, u64> swizzle_enable;
|
||||
};
|
||||
u32 num_records;
|
||||
union {
|
||||
BitField<0, 3, u32> dst_sel_x;
|
||||
BitField<3, 3, u32> dst_sel_y;
|
||||
BitField<6, 3, u32> dst_sel_z;
|
||||
BitField<9, 3, u32> dst_sel_w;
|
||||
BitField<12, 3, NumberFormat> num_format;
|
||||
BitField<15, 4, DataFormat> data_format;
|
||||
BitField<19, 2, u32> element_size;
|
||||
BitField<21, 2, u32> index_stride;
|
||||
};
|
||||
};
|
||||
|
||||
} // namespace AmdGpu
|
|
@ -1,86 +0,0 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <climits>
|
||||
#include <utility>
|
||||
#include "common/types.h"
|
||||
|
||||
namespace VideoCore {
|
||||
|
||||
// Based on Table 8.13 Data and Image Formats in Sea Islands Series Instruction Set Architecture
|
||||
enum class PixelFormat : u32 {
|
||||
Invalid,
|
||||
R32G32B32A32_Float,
|
||||
B32G32R32A32_Float,
|
||||
R32G32B32X32_Float,
|
||||
B32G32R32X32_Float,
|
||||
R32G32B32A32_Uint,
|
||||
R32G32B32A32_Sint,
|
||||
R32G32B32_Float,
|
||||
R32G32B32_Uint,
|
||||
R32G32B32_Sint,
|
||||
R16G16B16A16_Float,
|
||||
R16G16B16X16_Float,
|
||||
B16G16R16X16_Float,
|
||||
R16G16B16A16_Uint,
|
||||
R16G16B16A16_Sint,
|
||||
R16G16B16A16_Unorm,
|
||||
B16G16R16A16_Unorm,
|
||||
R16G16B16X16_Unorm,
|
||||
B16G16R16X16_Unorm,
|
||||
R16G16B16A16_Snorm,
|
||||
L32A32_Float,
|
||||
R32G32_Float,
|
||||
R32G32_Uint,
|
||||
R32G32_Sint,
|
||||
R11G11B10_Float,
|
||||
R8G8B8A8_Unorm,
|
||||
R8G8B8X8_Unorm,
|
||||
R8G8B8A8_UnormSrgb,
|
||||
R8G8B8X8_UnormSrgb,
|
||||
R8G8B8A8_Uint,
|
||||
R8G8B8A8_Snorm,
|
||||
R8G8B8A8_Sint,
|
||||
L16A16_Float,
|
||||
R16G16_Float,
|
||||
L16A16_Unorm,
|
||||
R16G16_Unorm,
|
||||
R16G16_Uint,
|
||||
R16G16_Snorm,
|
||||
R16G16_Sint,
|
||||
R32_Float,
|
||||
L32_Float,
|
||||
A32_Float,
|
||||
R32_Uint,
|
||||
R32_Sint,
|
||||
R8G8_Unorm,
|
||||
R8G8_Uint,
|
||||
R8G8_Snorm,
|
||||
R8G8_Sint,
|
||||
L8A8_Unorm,
|
||||
L8A8_UnormSrgb,
|
||||
R16_Float,
|
||||
L16_Float,
|
||||
A16_Float,
|
||||
R16_Unorm,
|
||||
L16_Unorm,
|
||||
A16_Unorm,
|
||||
R16_Uint,
|
||||
R16_Snorm,
|
||||
R16_Sint,
|
||||
R8_Unorm,
|
||||
L8_Unorm,
|
||||
L8_UnormSrgb,
|
||||
R8_Uint,
|
||||
R8_Snorm,
|
||||
R8_Sint,
|
||||
A8_Unorm,
|
||||
};
|
||||
|
||||
constexpr bool IsDepthStencilFormat(PixelFormat format) {
|
||||
return false;
|
||||
}
|
||||
|
||||
} // namespace VideoCore
|
|
@ -95,9 +95,9 @@ void UniqueImage::Create(const vk::ImageCreateInfo& image_ci) {
|
|||
|
||||
Image::Image(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_,
|
||||
const ImageInfo& info_, VAddr cpu_addr)
|
||||
: instance{&instance_}, scheduler{&scheduler_}, info{info_}, image{instance->GetDevice(),
|
||||
instance->GetAllocator()},
|
||||
cpu_addr{cpu_addr}, cpu_addr_end{cpu_addr + info.guest_size_bytes} {
|
||||
: instance{&instance_}, scheduler{&scheduler_}, info{info_},
|
||||
image{instance->GetDevice(), instance->GetAllocator()}, cpu_addr{cpu_addr},
|
||||
cpu_addr_end{cpu_addr + info.guest_size_bytes} {
|
||||
vk::ImageCreateFlags flags{};
|
||||
if (info.type == vk::ImageType::e2D && info.resources.layers >= 6 &&
|
||||
info.size.width == info.size.height) {
|
||||
|
|
|
@ -6,7 +6,6 @@
|
|||
#include "common/enum.h"
|
||||
#include "common/types.h"
|
||||
#include "core/libraries/videoout/buffer.h"
|
||||
#include "video_core/pixel_format.h"
|
||||
#include "video_core/renderer_vulkan/vk_common.h"
|
||||
#include "video_core/texture_cache/types.h"
|
||||
|
||||
|
|
|
@ -6,45 +6,14 @@
|
|||
|
||||
namespace VideoCore {
|
||||
|
||||
[[nodiscard]] vk::ImageViewType ConvertImageViewType(const ImageViewType type) {
|
||||
switch (type) {
|
||||
case ImageViewType::e1D:
|
||||
return vk::ImageViewType::e1D;
|
||||
case ImageViewType::e2D:
|
||||
return vk::ImageViewType::e2D;
|
||||
case ImageViewType::e3D:
|
||||
return vk::ImageViewType::e3D;
|
||||
case ImageViewType::Buffer:
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
UNREACHABLE_MSG("Invalid image type={}", static_cast<u32>(type));
|
||||
return {};
|
||||
}
|
||||
|
||||
[[nodiscard]] vk::Format ConvertPixelFormat(const PixelFormat format) {
|
||||
switch (format) {
|
||||
default:
|
||||
break;
|
||||
}
|
||||
UNREACHABLE_MSG("Unknown format={}", static_cast<u32>(format));
|
||||
return {};
|
||||
}
|
||||
|
||||
ImageView::ImageView(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler,
|
||||
const ImageViewInfo& info_, vk::Image image)
|
||||
: info{info_} {
|
||||
const vk::ImageViewCreateInfo image_view_ci = {
|
||||
.image = image,
|
||||
.viewType = ConvertImageViewType(info.type),
|
||||
.format = ConvertPixelFormat(info.format),
|
||||
.components{
|
||||
.r = vk::ComponentSwizzle::eIdentity,
|
||||
.g = vk::ComponentSwizzle::eIdentity,
|
||||
.b = vk::ComponentSwizzle::eIdentity,
|
||||
.a = vk::ComponentSwizzle::eIdentity,
|
||||
},
|
||||
.viewType = info.type,
|
||||
.format = info.format,
|
||||
.components = info.mapping,
|
||||
.subresourceRange{
|
||||
.aspectMask = vk::ImageAspectFlagBits::eColor,
|
||||
.baseMipLevel = 0U,
|
||||
|
|
|
@ -3,7 +3,6 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include "video_core/pixel_format.h"
|
||||
#include "video_core/renderer_vulkan/vk_common.h"
|
||||
#include "video_core/texture_cache/types.h"
|
||||
|
||||
|
@ -25,30 +24,26 @@ enum class ImageViewType : u32 {
|
|||
Buffer,
|
||||
};
|
||||
|
||||
enum class SwizzleSource : u32 {
|
||||
Zero = 0,
|
||||
One = 1,
|
||||
R = 2,
|
||||
G = 3,
|
||||
B = 4,
|
||||
A = 5,
|
||||
};
|
||||
|
||||
struct ImageViewInfo {
|
||||
ImageViewType type{};
|
||||
PixelFormat format{};
|
||||
vk::ImageViewType type{};
|
||||
vk::Format format{};
|
||||
SubresourceRange range;
|
||||
u8 x_source = static_cast<u8>(SwizzleSource::R);
|
||||
u8 y_source = static_cast<u8>(SwizzleSource::G);
|
||||
u8 z_source = static_cast<u8>(SwizzleSource::B);
|
||||
u8 w_source = static_cast<u8>(SwizzleSource::A);
|
||||
vk::ComponentMapping mapping{};
|
||||
|
||||
auto operator<=>(const ImageViewInfo&) const = default;
|
||||
};
|
||||
|
||||
class ImageView {
|
||||
struct ImageView {
|
||||
explicit ImageView(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler,
|
||||
const ImageViewInfo& info, vk::Image image);
|
||||
~ImageView();
|
||||
|
||||
ImageView(const ImageView&) = delete;
|
||||
ImageView& operator=(const ImageView&) = delete;
|
||||
|
||||
ImageView(ImageView&&) = default;
|
||||
ImageView& operator=(ImageView&&) = default;
|
||||
|
||||
ImageId image_id{};
|
||||
Extent3D size{0, 0, 0};
|
||||
ImageViewInfo info{};
|
||||
|
|
Loading…
Reference in a new issue