diff --git a/CMakeLists.txt b/CMakeLists.txt index d0c27c50..3f5832d2 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -630,6 +630,8 @@ set(SHADER_RECOMPILER src/shader_recompiler/exception.h src/shader_recompiler/backend/spirv/emit_spirv_instructions.h src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp src/shader_recompiler/backend/spirv/emit_spirv_logical.cpp + src/shader_recompiler/backend/spirv/emit_spirv_quad_rect.cpp + src/shader_recompiler/backend/spirv/emit_spirv_quad_rect.h src/shader_recompiler/backend/spirv/emit_spirv_select.cpp src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp src/shader_recompiler/backend/spirv/emit_spirv_special.cpp diff --git a/src/common/io_file.h b/src/common/io_file.h index feb2110a..45787a09 100644 --- a/src/common/io_file.h +++ b/src/common/io_file.h @@ -207,7 +207,7 @@ public: return WriteSpan(string); } - static size_t WriteBytes(const std::filesystem::path path, std::span data) { + static size_t WriteBytes(const std::filesystem::path path, const auto& data) { IOFile out(path, FileAccessMode::Write); return out.Write(data); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index e545e8e3..0ce9eea7 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -1,5 +1,6 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later + #include #include #include diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_quad_rect.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_quad_rect.cpp new file mode 100644 index 00000000..74a807c5 --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_quad_rect.cpp @@ -0,0 +1,329 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include +#include "shader_recompiler/backend/spirv/emit_spirv_quad_rect.h" +#include "shader_recompiler/runtime_info.h" + +namespace Shader::Backend::SPIRV { + +using Sirit::Id; + +constexpr u32 SPIRV_VERSION_1_5 = 0x00010500; + +struct QuadRectListEmitter : public Sirit::Module { + explicit QuadRectListEmitter(const FragmentRuntimeInfo& fs_info_) + : Sirit::Module{SPIRV_VERSION_1_5}, fs_info{fs_info_}, inputs{fs_info_.num_inputs}, + outputs{fs_info_.num_inputs} { + void_id = TypeVoid(); + bool_id = TypeBool(); + float_id = TypeFloat(32); + uint_id = TypeUInt(32U); + int_id = TypeInt(32U, true); + bvec2_id = TypeVector(bool_id, 2); + vec2_id = TypeVector(float_id, 2); + vec3_id = TypeVector(float_id, 3); + vec4_id = TypeVector(float_id, 4); + + float_one = Constant(float_id, 1.0f); + float_min_one = Constant(float_id, -1.0f); + int_zero = Constant(int_id, 0); + + const Id float_arr{TypeArray(float_id, Constant(uint_id, 1U))}; + gl_per_vertex_type = TypeStruct(vec4_id, float_id, float_arr, float_arr); + Decorate(gl_per_vertex_type, spv::Decoration::Block); + MemberDecorate(gl_per_vertex_type, 0U, spv::Decoration::BuiltIn, + static_cast(spv::BuiltIn::Position)); + MemberDecorate(gl_per_vertex_type, 1U, spv::Decoration::BuiltIn, + static_cast(spv::BuiltIn::PointSize)); + MemberDecorate(gl_per_vertex_type, 2U, spv::Decoration::BuiltIn, + static_cast(spv::BuiltIn::ClipDistance)); + MemberDecorate(gl_per_vertex_type, 3U, spv::Decoration::BuiltIn, + static_cast(spv::BuiltIn::CullDistance)); + } + + /// Emits tessellation control shader for interpolating the 4th vertex of rectange primitive + void EmitRectListTCS() { + DefineEntry(spv::ExecutionModel::TessellationControl); + + // Set passthrough tessellation factors + const Id output_float_id{TypePointer(spv::StorageClass::Output, float_id)}; + for (int i = 0; i < 4; i++) { + const Id ptr{OpAccessChain(output_float_id, gl_tess_level_outer, Int(i))}; + OpStore(ptr, float_one); + } + for (int i = 0; i < 2; i++) { + const Id ptr{OpAccessChain(output_float_id, gl_tess_level_inner, Int(i))}; + OpStore(ptr, float_one); + } + + const Id input_vec4{TypePointer(spv::StorageClass::Input, vec4_id)}; + const Id output_vec4{TypePointer(spv::StorageClass::Output, vec4_id)}; + + // Emit interpolation block of the 4th vertex in rect. + // Load positions + std::array pos; + for (int i = 0; i < 3; i++) { + pos[i] = OpLoad(vec4_id, OpAccessChain(input_vec4, gl_in, Int(i), int_zero)); + } + + std::array point_coord_equal; + for (int i = 0; i < 3; i++) { + // point_coord_equal[i] = equal(gl_in[i].gl_Position.xy, gl_in[(i + 1) % + // 3].gl_Position.xy); + const Id pos_l_xy{OpVectorShuffle(vec2_id, pos[i], pos[i], 0, 1)}; + const Id pos_r_xy{OpVectorShuffle(vec2_id, pos[(i + 1) % 3], pos[(i + 1) % 3], 0, 1)}; + point_coord_equal[i] = OpFOrdEqual(bvec2_id, pos_l_xy, pos_r_xy); + } + + std::array bary_coord; + std::array is_edge_vertex; + for (int i = 0; i < 3; i++) { + // bool xy_equal = point_coord_equal[i].x && point_coord_equal[(i + 2) % 3].y; + const Id xy_equal{ + OpLogicalAnd(bool_id, OpCompositeExtract(bool_id, point_coord_equal[i], 0), + OpCompositeExtract(bool_id, point_coord_equal[(i + 2) % 3], 1))}; + // bool yx_equal = point_coord_equal[i].y && point_coord_equal[(i + 2) % 3].x; + const Id yx_equal{ + OpLogicalAnd(bool_id, OpCompositeExtract(bool_id, point_coord_equal[i], 1), + OpCompositeExtract(bool_id, point_coord_equal[(i + 2) % 3], 0))}; + // bary_coord[i] = (xy_equal || yx_equal) ? -1.f : 1.f; + is_edge_vertex[i] = OpLogicalOr(bool_id, xy_equal, yx_equal); + bary_coord[i] = OpSelect(float_id, is_edge_vertex[i], float_min_one, float_one); + } + + const auto interpolate = [&](Id v0, Id v1, Id v2) { + // return v0 * bary_coord.x + v1 * bary_coord.y + v2 * bary_coord.z; + const Id p0{OpVectorTimesScalar(vec4_id, v0, bary_coord[0])}; + const Id p1{OpVectorTimesScalar(vec4_id, v1, bary_coord[1])}; + const Id p2{OpVectorTimesScalar(vec4_id, v2, bary_coord[2])}; + return OpFAdd(vec4_id, p0, OpFAdd(vec4_id, p1, p2)); + }; + + // int vertex_index_id = is_edge_vertex[1] ? 1 : (is_edge_vertex[2] ? 2 : 0); + Id vertex_index{OpSelect(int_id, is_edge_vertex[2], Int(2), Int(0))}; + vertex_index = OpSelect(int_id, is_edge_vertex[1], Int(1), vertex_index); + + // int index = (vertex_index_id + gl_InvocationID) % 3; + const Id invocation_id{OpLoad(int_id, gl_invocation_id)}; + const Id invocation_3{OpIEqual(bool_id, invocation_id, Int(3))}; + const Id index{OpSMod(int_id, OpIAdd(int_id, vertex_index, invocation_id), Int(3))}; + + // gl_out[gl_InvocationID].gl_Position = gl_InvocationID == 3 ? pos3 : + // gl_in[index].gl_Position; + const Id pos3{interpolate(pos[0], pos[1], pos[2])}; + const Id in_ptr{OpAccessChain(input_vec4, gl_in, index, Int(0))}; + const Id position{OpSelect(vec4_id, invocation_3, pos3, OpLoad(vec4_id, in_ptr))}; + OpStore(OpAccessChain(output_vec4, gl_out, invocation_id, Int(0)), position); + + // Set attributes + for (int i = 0; i < inputs.size(); i++) { + // vec4 in_paramN3 = interpolate(bary_coord, in_paramN[0], in_paramN[1], in_paramN[2]); + const Id v0{OpLoad(vec4_id, OpAccessChain(input_vec4, inputs[i], Int(0)))}; + const Id v1{OpLoad(vec4_id, OpAccessChain(input_vec4, inputs[i], Int(1)))}; + const Id v2{OpLoad(vec4_id, OpAccessChain(input_vec4, inputs[i], Int(2)))}; + const Id in_param3{interpolate(v0, v1, v2)}; + // out_paramN[gl_InvocationID] = gl_InvocationID == 3 ? in_paramN3 : in_paramN[index]; + const Id in_param{OpLoad(vec4_id, OpAccessChain(input_vec4, inputs[i], index))}; + const Id out_param{OpSelect(vec4_id, invocation_3, in_param3, in_param)}; + OpStore(OpAccessChain(output_vec4, outputs[i], invocation_id), out_param); + } + + OpReturn(); + OpFunctionEnd(); + } + + /// Emits a passthrough quad tessellation control shader that outputs 4 control points. + void EmitQuadListTCS() { + DefineEntry(spv::ExecutionModel::TessellationControl); + const Id array_type{TypeArray(int_id, Int(4))}; + const Id values{ConstantComposite(array_type, Int(1), Int(2), Int(0), Int(3))}; + const Id indices{AddLocalVariable(TypePointer(spv::StorageClass::Function, array_type), + spv::StorageClass::Function, values)}; + + // Set passthrough tessellation factors + const Id output_float{TypePointer(spv::StorageClass::Output, float_id)}; + for (int i = 0; i < 4; i++) { + const Id ptr{OpAccessChain(output_float, gl_tess_level_outer, Int(i))}; + OpStore(ptr, float_one); + } + for (int i = 0; i < 2; i++) { + const Id ptr{OpAccessChain(output_float, gl_tess_level_inner, Int(i))}; + OpStore(ptr, float_one); + } + + const Id input_vec4{TypePointer(spv::StorageClass::Input, vec4_id)}; + const Id output_vec4{TypePointer(spv::StorageClass::Output, vec4_id)}; + const Id func_int{TypePointer(spv::StorageClass::Function, int_id)}; + const Id invocation_id{OpLoad(int_id, gl_invocation_id)}; + const Id index{OpLoad(int_id, OpAccessChain(func_int, indices, invocation_id))}; + + // gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position; + const Id in_position{OpLoad(vec4_id, OpAccessChain(input_vec4, gl_in, index, Int(0)))}; + OpStore(OpAccessChain(output_vec4, gl_out, invocation_id, Int(0)), in_position); + + for (int i = 0; i < inputs.size(); i++) { + // out_paramN[gl_InvocationID] = in_paramN[gl_InvocationID]; + const Id in_param{OpLoad(vec4_id, OpAccessChain(input_vec4, inputs[i], index))}; + OpStore(OpAccessChain(output_vec4, outputs[i], invocation_id), in_param); + } + + OpReturn(); + OpFunctionEnd(); + } + + /// Emits a passthrough quad tessellation evaluation shader that outputs 4 control points. + void EmitPassthroughTES() { + DefineEntry(spv::ExecutionModel::TessellationEvaluation); + + // const int index = int(gl_TessCoord.y) * 2 + int(gl_TessCoord.x); + const Id input_float{TypePointer(spv::StorageClass::Input, float_id)}; + const Id tess_coord_x{OpLoad(float_id, OpAccessChain(input_float, gl_tess_coord, Int(0)))}; + const Id tess_coord_y{OpLoad(float_id, OpAccessChain(input_float, gl_tess_coord, Int(1)))}; + const Id index{OpIAdd(int_id, OpIMul(int_id, OpConvertFToS(int_id, tess_coord_y), Int(2)), + OpConvertFToS(int_id, tess_coord_x))}; + + // gl_Position = gl_in[index].gl_Position; + const Id input_vec4{TypePointer(spv::StorageClass::Input, vec4_id)}; + const Id output_vec4{TypePointer(spv::StorageClass::Output, vec4_id)}; + const Id position{OpLoad(vec4_id, OpAccessChain(input_vec4, gl_in, index, Int(0)))}; + OpStore(OpAccessChain(output_vec4, gl_per_vertex, Int(0)), position); + + // out_paramN = in_paramN[index]; + for (int i = 0; i < inputs.size(); i++) { + const Id param{OpLoad(vec4_id, OpAccessChain(input_vec4, inputs[i], index))}; + OpStore(outputs[i], param); + } + + OpReturn(); + OpFunctionEnd(); + } + +private: + Id Int(s32 value) { + return Constant(int_id, value); + } + + Id AddInput(Id type) { + const Id input{AddGlobalVariable(TypePointer(spv::StorageClass::Input, type), + spv::StorageClass::Input)}; + interfaces.push_back(input); + return input; + } + + Id AddOutput(Id type) { + const Id output{AddGlobalVariable(TypePointer(spv::StorageClass::Output, type), + spv::StorageClass::Output)}; + interfaces.push_back(output); + return output; + } + + void DefineEntry(spv::ExecutionModel model) { + AddCapability(spv::Capability::Shader); + AddCapability(spv::Capability::Tessellation); + const Id void_function{TypeFunction(void_id)}; + main = OpFunction(void_id, spv::FunctionControlMask::MaskNone, void_function); + if (model == spv::ExecutionModel::TessellationControl) { + AddExecutionMode(main, spv::ExecutionMode::OutputVertices, 4U); + } else { + AddExecutionMode(main, spv::ExecutionMode::Quads); + AddExecutionMode(main, spv::ExecutionMode::SpacingEqual); + AddExecutionMode(main, spv::ExecutionMode::VertexOrderCw); + } + DefineInputs(model); + DefineOutputs(model); + AddEntryPoint(model, main, "main", interfaces); + AddLabel(OpLabel()); + } + + void DefineOutputs(spv::ExecutionModel model) { + if (model == spv::ExecutionModel::TessellationControl) { + const Id gl_per_vertex_array{TypeArray(gl_per_vertex_type, Constant(uint_id, 4U))}; + gl_out = AddOutput(gl_per_vertex_array); + + const Id arr2_id{TypeArray(float_id, Constant(uint_id, 2U))}; + gl_tess_level_inner = AddOutput(arr2_id); + Decorate(gl_tess_level_inner, spv::Decoration::BuiltIn, spv::BuiltIn::TessLevelInner); + Decorate(gl_tess_level_inner, spv::Decoration::Patch); + + const Id arr4_id{TypeArray(float_id, Constant(uint_id, 4U))}; + gl_tess_level_outer = AddOutput(arr4_id); + Decorate(gl_tess_level_outer, spv::Decoration::BuiltIn, spv::BuiltIn::TessLevelOuter); + Decorate(gl_tess_level_outer, spv::Decoration::Patch); + } else { + gl_per_vertex = AddOutput(gl_per_vertex_type); + } + for (int i = 0; i < fs_info.num_inputs; i++) { + outputs[i] = AddOutput(model == spv::ExecutionModel::TessellationControl + ? TypeArray(vec4_id, Int(4)) + : vec4_id); + Decorate(outputs[i], spv::Decoration::Location, fs_info.inputs[i].param_index); + } + } + + void DefineInputs(spv::ExecutionModel model) { + if (model == spv::ExecutionModel::TessellationEvaluation) { + gl_tess_coord = AddInput(vec3_id); + Decorate(gl_tess_coord, spv::Decoration::BuiltIn, spv::BuiltIn::TessCoord); + } else { + gl_invocation_id = AddInput(int_id); + Decorate(gl_invocation_id, spv::Decoration::BuiltIn, spv::BuiltIn::InvocationId); + } + const Id gl_per_vertex_array{TypeArray(gl_per_vertex_type, Constant(uint_id, 32U))}; + gl_in = AddInput(gl_per_vertex_array); + const Id float_arr{TypeArray(vec4_id, Int(32))}; + for (int i = 0; i < fs_info.num_inputs; i++) { + inputs[i] = AddInput(float_arr); + Decorate(inputs[i], spv::Decoration::Location, fs_info.inputs[i].param_index); + } + } + +private: + FragmentRuntimeInfo fs_info; + Id main; + Id void_id; + Id bool_id; + Id float_id; + Id uint_id; + Id int_id; + Id bvec2_id; + Id vec2_id; + Id vec3_id; + Id vec4_id; + Id float_one; + Id float_min_one; + Id int_zero; + Id gl_per_vertex_type; + Id gl_in; + union { + Id gl_out; + Id gl_per_vertex; + }; + Id gl_tess_level_inner; + Id gl_tess_level_outer; + union { + Id gl_tess_coord; + Id gl_invocation_id; + }; + std::vector inputs; + std::vector outputs; + std::vector interfaces; +}; + +std::vector EmitAuxilaryTessShader(AuxShaderType type, const FragmentRuntimeInfo& fs_info) { + QuadRectListEmitter ctx{fs_info}; + switch (type) { + case AuxShaderType::RectListTCS: + ctx.EmitRectListTCS(); + break; + case AuxShaderType::QuadListTCS: + ctx.EmitQuadListTCS(); + break; + case AuxShaderType::PassthroughTES: + ctx.EmitPassthroughTES(); + break; + } + return ctx.Assemble(); +} + +} // namespace Shader::Backend::SPIRV \ No newline at end of file diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_quad_rect.h b/src/shader_recompiler/backend/spirv/emit_spirv_quad_rect.h new file mode 100644 index 00000000..c6c970ec --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_quad_rect.h @@ -0,0 +1,24 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include +#include "common/types.h" + +namespace Shader { +struct FragmentRuntimeInfo; +} + +namespace Shader::Backend::SPIRV { + +enum class AuxShaderType : u32 { + RectListTCS, + QuadListTCS, + PassthroughTES, +}; + +[[nodiscard]] std::vector EmitAuxilaryTessShader(AuxShaderType type, + const FragmentRuntimeInfo& fs_info); + +} // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/runtime_info.h b/src/shader_recompiler/runtime_info.h index 23e23c11..bbf74f5d 100644 --- a/src/shader_recompiler/runtime_info.h +++ b/src/shader_recompiler/runtime_info.h @@ -227,7 +227,7 @@ struct RuntimeInfo { ComputeRuntimeInfo cs_info; }; - RuntimeInfo(Stage stage_) { + void Initialize(Stage stage_) { memset(this, 0, sizeof(*this)); stage = stage_; } diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp index f265fb68..75f06365 100644 --- a/src/video_core/buffer_cache/buffer_cache.cpp +++ b/src/video_core/buffer_cache/buffer_cache.cpp @@ -238,32 +238,14 @@ u32 BufferCache::BindIndexBuffer(bool& is_indexed, u32 index_offset) { // Emulate QuadList and Polygon primitive types with CPU made index buffer. const auto& regs = liverpool->regs; if (!is_indexed) { - bool needs_index_buffer = false; - if (regs.primitive_type == AmdGpu::PrimitiveType::QuadList || - regs.primitive_type == AmdGpu::PrimitiveType::Polygon) { - needs_index_buffer = true; - } - - if (!needs_index_buffer) { + if (regs.primitive_type != AmdGpu::PrimitiveType::Polygon) { return regs.num_indices; } // Emit indices. const u32 index_size = 3 * regs.num_indices; const auto [data, offset] = stream_buffer.Map(index_size); - - switch (regs.primitive_type) { - case AmdGpu::PrimitiveType::QuadList: - Vulkan::LiverpoolToVK::EmitQuadToTriangleListIndices(data, regs.num_indices); - break; - case AmdGpu::PrimitiveType::Polygon: - Vulkan::LiverpoolToVK::EmitPolygonToTriangleListIndices(data, regs.num_indices); - break; - default: - UNREACHABLE(); - break; - } - + Vulkan::LiverpoolToVK::EmitPolygonToTriangleListIndices(data, regs.num_indices); stream_buffer.Commit(); // Bind index buffer. @@ -282,31 +264,6 @@ u32 BufferCache::BindIndexBuffer(bool& is_indexed, u32 index_offset) { VAddr index_address = regs.index_base_address.Address(); index_address += index_offset * index_size; - if (regs.primitive_type == AmdGpu::PrimitiveType::QuadList) { - // Convert indices. - const u32 new_index_size = regs.num_indices * index_size * 6 / 4; - const auto [data, offset] = stream_buffer.Map(new_index_size); - const auto index_ptr = reinterpret_cast(index_address); - switch (index_type) { - case vk::IndexType::eUint16: - Vulkan::LiverpoolToVK::ConvertQuadToTriangleListIndices(data, index_ptr, - regs.num_indices); - break; - case vk::IndexType::eUint32: - Vulkan::LiverpoolToVK::ConvertQuadToTriangleListIndices(data, index_ptr, - regs.num_indices); - break; - default: - UNREACHABLE_MSG("Unsupported QuadList index type {}", vk::to_string(index_type)); - break; - } - stream_buffer.Commit(); - - // Bind index buffer. - const auto cmdbuf = scheduler.CommandBuffer(); - cmdbuf.bindIndexBuffer(stream_buffer.Handle(), offset, index_type); - return new_index_size / index_size; - } if (regs.primitive_type == AmdGpu::PrimitiveType::Polygon) { UNREACHABLE(); } diff --git a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp index 6df89dba..25ff88b9 100644 --- a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp +++ b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp @@ -116,12 +116,12 @@ vk::PrimitiveTopology PrimitiveType(AmdGpu::PrimitiveType type) { return vk::PrimitiveTopology::eTriangleStripWithAdjacency; case AmdGpu::PrimitiveType::PatchPrimitive: return vk::PrimitiveTopology::ePatchList; - case AmdGpu::PrimitiveType::QuadList: case AmdGpu::PrimitiveType::Polygon: // Needs to generate index buffer on the fly. return vk::PrimitiveTopology::eTriangleList; + case AmdGpu::PrimitiveType::QuadList: case AmdGpu::PrimitiveType::RectList: - return vk::PrimitiveTopology::eTriangleStrip; + return vk::PrimitiveTopology::ePatchList; default: UNREACHABLE(); return vk::PrimitiveTopology::eTriangleList; diff --git a/src/video_core/renderer_vulkan/liverpool_to_vk.h b/src/video_core/renderer_vulkan/liverpool_to_vk.h index 72bddc6b..d5f8e693 100644 --- a/src/video_core/renderer_vulkan/liverpool_to_vk.h +++ b/src/video_core/renderer_vulkan/liverpool_to_vk.h @@ -70,34 +70,6 @@ vk::ClearValue ColorBufferClearValue(const AmdGpu::Liverpool::ColorBuffer& color vk::SampleCountFlagBits NumSamples(u32 num_samples, vk::SampleCountFlags supported_flags); -static constexpr u16 NumVerticesPerQuad = 4; - -inline void EmitQuadToTriangleListIndices(u8* out_ptr, u32 num_vertices) { - u16* out_data = reinterpret_cast(out_ptr); - for (u16 i = 0; i < num_vertices; i += NumVerticesPerQuad) { - *out_data++ = i; - *out_data++ = i + 1; - *out_data++ = i + 2; - *out_data++ = i; - *out_data++ = i + 2; - *out_data++ = i + 3; - } -} - -template -void ConvertQuadToTriangleListIndices(u8* out_ptr, const u8* in_ptr, u32 num_vertices) { - T* out_data = reinterpret_cast(out_ptr); - const T* in_data = reinterpret_cast(in_ptr); - for (u16 i = 0; i < num_vertices; i += NumVerticesPerQuad) { - *out_data++ = in_data[i]; - *out_data++ = in_data[i + 1]; - *out_data++ = in_data[i + 2]; - *out_data++ = in_data[i]; - *out_data++ = in_data[i + 2]; - *out_data++ = in_data[i + 3]; - } -} - inline void EmitPolygonToTriangleListIndices(u8* out_ptr, u32 num_vertices) { u16* out_data = reinterpret_cast(out_ptr); for (u16 i = 1; i < num_vertices - 1; i++) { diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 222ffb5a..3cba7a7e 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -7,25 +7,30 @@ #include #include "common/assert.h" +#include "common/io_file.h" #include "common/scope_exit.h" +#include "shader_recompiler/backend/spirv/emit_spirv_quad_rect.h" +#include "shader_recompiler/frontend/fetch_shader.h" #include "shader_recompiler/runtime_info.h" #include "video_core/amdgpu/resource.h" #include "video_core/buffer_cache/buffer_cache.h" #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" - -#include "shader_recompiler/frontend/fetch_shader.h" #include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_scheduler.h" +#include "video_core/renderer_vulkan/vk_shader_util.h" #include "video_core/texture_cache/texture_cache.h" namespace Vulkan { -GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& scheduler_, - DescriptorHeap& desc_heap_, const GraphicsPipelineKey& key_, - vk::PipelineCache pipeline_cache, - std::span infos, - std::optional fetch_shader_, - std::span modules) +using Shader::Backend::SPIRV::AuxShaderType; + +GraphicsPipeline::GraphicsPipeline( + const Instance& instance_, Scheduler& scheduler_, DescriptorHeap& desc_heap_, + const GraphicsPipelineKey& key_, vk::PipelineCache pipeline_cache, + std::span infos, + std::span runtime_infos, + std::optional fetch_shader_, + std::span modules) : Pipeline{instance_, scheduler_, desc_heap_, pipeline_cache}, key{key_}, fetch_shader{std::move(fetch_shader_)} { const vk::Device device = instance.GetDevice(); @@ -88,11 +93,6 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul .pVertexAttributeDescriptions = vertex_attributes.data(), }; - if (key.prim_type == AmdGpu::PrimitiveType::RectList && !IsEmbeddedVs()) { - LOG_WARNING(Render_Vulkan, - "Rectangle List primitive type is only supported for embedded VS"); - } - auto prim_restart = key.enable_primitive_restart != 0; if (prim_restart && IsPrimitiveListTopology() && !instance.IsListRestartSupported()) { LOG_WARNING(Render_Vulkan, @@ -106,9 +106,11 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul ASSERT_MSG(!prim_restart || key.primitive_restart_index == 0xFFFF || key.primitive_restart_index == 0xFFFFFFFF, "Primitive restart index other than -1 is not supported yet"); - + const bool is_rect_list = key.prim_type == AmdGpu::PrimitiveType::RectList; + const bool is_quad_list = key.prim_type == AmdGpu::PrimitiveType::QuadList; + const auto& fs_info = runtime_infos[u32(Shader::LogicalStage::Fragment)].fs_info; const vk::PipelineTessellationStateCreateInfo tessellation_state = { - .patchControlPoints = key.patch_control_points, + .patchControlPoints = is_rect_list ? 3U : (is_quad_list ? 4U : key.patch_control_points), }; const vk::PipelineRasterizationStateCreateInfo raster_state = { @@ -232,6 +234,14 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul .module = modules[stage], .pName = "main", }); + } else if (is_rect_list || is_quad_list) { + const auto type = is_quad_list ? AuxShaderType::QuadListTCS : AuxShaderType::RectListTCS; + auto tcs = Shader::Backend::SPIRV::EmitAuxilaryTessShader(type, fs_info); + shader_stages.emplace_back(vk::PipelineShaderStageCreateInfo{ + .stage = vk::ShaderStageFlagBits::eTessellationControl, + .module = CompileSPV(tcs, instance.GetDevice()), + .pName = "main", + }); } stage = u32(Shader::LogicalStage::TessellationEval); if (infos[stage]) { @@ -240,6 +250,14 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul .module = modules[stage], .pName = "main", }); + } else if (is_rect_list || is_quad_list) { + auto tes = + Shader::Backend::SPIRV::EmitAuxilaryTessShader(AuxShaderType::PassthroughTES, fs_info); + shader_stages.emplace_back(vk::PipelineShaderStageCreateInfo{ + .stage = vk::ShaderStageFlagBits::eTessellationEvaluation, + .module = CompileSPV(tes, instance.GetDevice()), + .pName = "main", + }); } stage = u32(Shader::LogicalStage::Fragment); if (infos[stage]) { @@ -322,8 +340,7 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul .pStages = shader_stages.data(), .pVertexInputState = !instance.IsVertexInputDynamicState() ? &vertex_input_info : nullptr, .pInputAssemblyState = &input_assembly, - .pTessellationState = - stages[u32(Shader::LogicalStage::TessellationControl)] ? &tessellation_state : nullptr, + .pTessellationState = &tessellation_state, .pViewportState = &viewport_info, .pRasterizationState = &raster_state, .pMultisampleState = &multisampling, diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index f25341bb..5a1e3c27 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -18,7 +18,7 @@ class TextureCache; namespace Vulkan { -static constexpr u32 MaxShaderStages = 5; +static constexpr u32 MaxShaderStages = static_cast(Shader::LogicalStage::NumLogicalStages); static constexpr u32 MaxVertexBufferCount = 32; class Instance; @@ -64,6 +64,7 @@ public: GraphicsPipeline(const Instance& instance, Scheduler& scheduler, DescriptorHeap& desc_heap, const GraphicsPipelineKey& key, vk::PipelineCache pipeline_cache, std::span stages, + std::span runtime_infos, std::optional fetch_shader, std::span modules); ~GraphicsPipeline(); @@ -72,11 +73,6 @@ public: return fetch_shader; } - bool IsEmbeddedVs() const noexcept { - static constexpr size_t EmbeddedVsHash = 0x9b2da5cf47f8c29f; - return key.stage_hashes[u32(Shader::LogicalStage::Vertex)] == EmbeddedVsHash; - } - auto GetWriteMasks() const { return key.write_masks; } diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 43e02dd9..a7d78668 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -80,8 +80,8 @@ void GatherVertexOutputs(Shader::VertexRuntimeInfo& info, : (ctl.IsCullDistEnabled(7) ? VsOutput::CullDist7 : VsOutput::None)); } -Shader::RuntimeInfo PipelineCache::BuildRuntimeInfo(Stage stage, LogicalStage l_stage) { - auto info = Shader::RuntimeInfo{stage}; +const Shader::RuntimeInfo& PipelineCache::BuildRuntimeInfo(Stage stage, LogicalStage l_stage) { + auto& info = runtime_infos[u32(l_stage)]; const auto& regs = liverpool->regs; const auto BuildCommon = [&](const auto& program) { info.num_user_data = program.settings.num_user_regs; @@ -90,6 +90,7 @@ Shader::RuntimeInfo PipelineCache::BuildRuntimeInfo(Stage stage, LogicalStage l_ info.fp_denorm_mode32 = program.settings.fp_denorm_mode32; info.fp_round_mode32 = program.settings.fp_round_mode32; }; + info.Initialize(stage); switch (stage) { case Stage::Local: { BuildCommon(regs.ls_program); @@ -220,9 +221,9 @@ const GraphicsPipeline* PipelineCache::GetGraphicsPipeline() { } const auto [it, is_new] = graphics_pipelines.try_emplace(graphics_key); if (is_new) { - it.value() = - std::make_unique(instance, scheduler, desc_heap, graphics_key, - *pipeline_cache, infos, fetch_shader, modules); + it.value() = std::make_unique(instance, scheduler, desc_heap, + graphics_key, *pipeline_cache, infos, + runtime_infos, fetch_shader, modules); if (Config::collectShadersForDebug()) { for (auto stage = 0; stage < MaxShaderStages; ++stage) { if (infos[stage]) { diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index ec440644..d4a51efd 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -76,7 +76,7 @@ private: vk::ShaderModule CompileModule(Shader::Info& info, Shader::RuntimeInfo& runtime_info, std::span code, size_t perm_idx, Shader::Backend::Bindings& binding); - Shader::RuntimeInfo BuildRuntimeInfo(Shader::Stage stage, Shader::LogicalStage l_stage); + const Shader::RuntimeInfo& BuildRuntimeInfo(Shader::Stage stage, Shader::LogicalStage l_stage); private: const Instance& instance; @@ -90,6 +90,7 @@ private: tsl::robin_map> program_cache; tsl::robin_map> compute_pipelines; tsl::robin_map> graphics_pipelines; + std::array runtime_infos{}; std::array infos{}; std::array modules{}; std::optional fetch_shader{}; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 417b6d4b..44772b4c 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -245,7 +245,6 @@ void Rasterizer::Draw(bool is_indexed, u32 index_offset) { } auto state = PrepareRenderState(pipeline->GetMrtMask()); - if (!BindResources(pipeline)) { return; } @@ -267,10 +266,7 @@ void Rasterizer::Draw(bool is_indexed, u32 index_offset) { cmdbuf.drawIndexed(num_indices, regs.num_instances.NumInstances(), 0, s32(vertex_offset), instance_offset); } else { - const u32 num_vertices = - regs.primitive_type == AmdGpu::PrimitiveType::RectList ? 4 : regs.num_indices; - cmdbuf.draw(num_vertices, regs.num_instances.NumInstances(), vertex_offset, - instance_offset); + cmdbuf.draw(num_indices, regs.num_instances.NumInstances(), vertex_offset, instance_offset); } ResetBindings(); @@ -285,18 +281,14 @@ void Rasterizer::DrawIndirect(bool is_indexed, VAddr arg_address, u32 offset, u3 } const auto& regs = liverpool->regs; - if (regs.primitive_type == AmdGpu::PrimitiveType::QuadList || - regs.primitive_type == AmdGpu::PrimitiveType::Polygon) { - // We use a generated index buffer to convert quad lists and polygons to triangles. Since it + if (regs.primitive_type == AmdGpu::PrimitiveType::Polygon) { + // We use a generated index buffer to convert polygons to triangles. Since it // changes type of the draw, arguments are not valid for this case. We need to run a // conversion pass to repack the indirect arguments buffer first. LOG_WARNING(Render_Vulkan, "Primitive type is not supported for indirect draw"); return; } - ASSERT_MSG(regs.primitive_type != AmdGpu::PrimitiveType::RectList, - "Unsupported primitive type for indirect draw"); - const GraphicsPipeline* pipeline = pipeline_cache.GetGraphicsPipeline(); if (!pipeline) { return; @@ -1009,19 +1001,26 @@ void Rasterizer::UpdateViewportScissorState() { regs.clipper_control.clip_space == AmdGpu::Liverpool::ClipSpace::MinusWToW ? 1.0f : 0.0f; + const auto vp_ctl = regs.viewport_control; for (u32 i = 0; i < Liverpool::NumViewports; i++) { const auto& vp = regs.viewports[i]; const auto& vp_d = regs.viewport_depths[i]; if (vp.xscale == 0) { continue; } + const auto xoffset = vp_ctl.xoffset_enable ? vp.xoffset : 0.f; + const auto xscale = vp_ctl.xscale_enable ? vp.xscale : 1.f; + const auto yoffset = vp_ctl.yoffset_enable ? vp.yoffset : 0.f; + const auto yscale = vp_ctl.yscale_enable ? vp.yscale : 1.f; + const auto zoffset = vp_ctl.zoffset_enable ? vp.zoffset : 0.f; + const auto zscale = vp_ctl.zscale_enable ? vp.zscale : 1.f; viewports.push_back({ - .x = vp.xoffset - vp.xscale, - .y = vp.yoffset - vp.yscale, - .width = vp.xscale * 2.0f, - .height = vp.yscale * 2.0f, - .minDepth = vp.zoffset - vp.zscale * reduce_z, - .maxDepth = vp.zscale + vp.zoffset, + .x = xoffset - xscale, + .y = yoffset - yscale, + .width = xscale * 2.0f, + .height = yscale * 2.0f, + .minDepth = zoffset - zscale * reduce_z, + .maxDepth = zscale + zoffset, }); } diff --git a/src/video_core/renderer_vulkan/vk_shader_util.cpp b/src/video_core/renderer_vulkan/vk_shader_util.cpp index f9347d6e..08703c3d 100644 --- a/src/video_core/renderer_vulkan/vk_shader_util.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_util.cpp @@ -126,6 +126,10 @@ EShLanguage ToEshShaderStage(vk::ShaderStageFlagBits stage) { return EShLanguage::EShLangVertex; case vk::ShaderStageFlagBits::eGeometry: return EShLanguage::EShLangGeometry; + case vk::ShaderStageFlagBits::eTessellationControl: + return EShLanguage::EShLangTessControl; + case vk::ShaderStageFlagBits::eTessellationEvaluation: + return EShLanguage::EShLangTessEvaluation; case vk::ShaderStageFlagBits::eFragment: return EShLanguage::EShLangFragment; case vk::ShaderStageFlagBits::eCompute: