From fa198e228d88b7a77100ddb2c58fc755be32cdc8 Mon Sep 17 00:00:00 2001 From: psucien Date: Mon, 10 Jun 2024 23:20:32 +0200 Subject: [PATCH 1/3] renderer_vulkan: another fix for vertex buffer offsets --- src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp | 6 ++++-- src/video_core/renderer_vulkan/vk_stream_buffer.cpp | 3 +-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 3f2195d7e..8f438020a 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -429,8 +429,10 @@ void GraphicsPipeline::BindVertexBuffers(StreamBuffer& staging) const { for (u32 i = 0; i < num_buffers; ++i) { const auto& buffer = guest_buffers[i]; const auto& host_buffer = std::ranges::find_if( - ranges_merged.cbegin(), ranges_merged.cend(), - [&](const BufferRange& range) { return (buffer.base_address >= range.base_address); }); + ranges_merged.cbegin(), ranges_merged.cend(), [&](const BufferRange& range) { + return (buffer.base_address >= range.base_address && + buffer.base_address < range.end_address); + }); assert(host_buffer != ranges_merged.cend()); host_buffers[i] = staging.Handle(); diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp index 86a03a034..116f7896d 100644 --- a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp +++ b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp @@ -232,8 +232,7 @@ void StreamBuffer::WaitPendingOperations(u64 requested_upper_bound) { } u64 StreamBuffer::Copy(VAddr src, size_t size, size_t alignment /*= 0*/) { - static const u64 MinUniformAlignment = instance.UniformMinAlignment(); - const auto [data, offset, _] = Map(size, MinUniformAlignment); + const auto [data, offset, _] = Map(size, alignment); std::memcpy(data, reinterpret_cast(src), size); Commit(size); return offset; From 1dd9f7a99e897b479891cb7fcca9d7380b04ccb1 Mon Sep 17 00:00:00 2001 From: psucien Date: Mon, 10 Jun 2024 23:48:06 +0200 Subject: [PATCH 2/3] timer_management: `sceKernelUsleep` on <1ms delays --- src/core/libraries/kernel/time_management.cpp | 21 +++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/src/core/libraries/kernel/time_management.cpp b/src/core/libraries/kernel/time_management.cpp index 8c31c550c..3f5f92eef 100644 --- a/src/core/libraries/kernel/time_management.cpp +++ b/src/core/libraries/kernel/time_management.cpp @@ -9,9 +9,16 @@ #include "core/libraries/libs.h" #ifdef _WIN64 +#include #include + +// http://stackoverflow.com/a/31411628/4725495 +static u32(__stdcall* NtDelayExecution)(BOOL Alertable, PLARGE_INTEGER DelayInterval) = + (u32(__stdcall*)(BOOL, PLARGE_INTEGER))GetProcAddress(GetModuleHandle("ntdll.dll"), + "NtDelayExecution"); #else #include +#include #endif namespace Libraries::Kernel { @@ -40,8 +47,18 @@ u64 PS4_SYSV_ABI sceKernelReadTsc() { } int PS4_SYSV_ABI sceKernelUsleep(u32 microseconds) { - ASSERT(microseconds >= 1000); - std::this_thread::sleep_for(std::chrono::microseconds(microseconds)); + if (microseconds < 1000u) { +#if _WIN64 + LARGE_INTEGER interval{ + .QuadPart = -1 * (microseconds * 10u), + }; + NtDelayExecution(FALSE, &interval); + } else { + std::this_thread::sleep_for(std::chrono::microseconds(microseconds)); + } +#else + usleep(microseconds); +#endif return 0; } From b110523d90fd86ba7b5d48d447f72cb1a32fa396 Mon Sep 17 00:00:00 2001 From: psucien Date: Mon, 10 Jun 2024 23:49:23 +0200 Subject: [PATCH 3/3] recompiler: trivial missing ops (VALU OR and SALU LE, GE) added --- src/shader_recompiler/frontend/translate/translate.cpp | 9 +++++++++ src/shader_recompiler/frontend/translate/translate.h | 1 + src/shader_recompiler/frontend/translate/vector_alu.cpp | 7 +++++++ 3 files changed, 17 insertions(+) diff --git a/src/shader_recompiler/frontend/translate/translate.cpp b/src/shader_recompiler/frontend/translate/translate.cpp index c0ddf4ae9..a32cde39b 100644 --- a/src/shader_recompiler/frontend/translate/translate.cpp +++ b/src/shader_recompiler/frontend/translate/translate.cpp @@ -228,6 +228,9 @@ void Translate(IR::Block* block, std::span inst_list, Info& info) case Opcode::V_AND_B32: translator.V_AND_B32(inst); break; + case Opcode::V_OR_B32: + translator.V_OR_B32(inst); + break; case Opcode::V_LSHLREV_B32: translator.V_LSHLREV_B32(inst); break; @@ -318,6 +321,9 @@ void Translate(IR::Block* block, std::span inst_list, Info& info) case Opcode::V_CMP_EQ_I32: translator.V_CMP_U32(ConditionOp::EQ, true, false, inst); break; + case Opcode::V_CMP_LE_I32: + translator.V_CMP_U32(ConditionOp::LE, true, false, inst); + break; case Opcode::V_CMP_NE_U32: translator.V_CMP_U32(ConditionOp::LG, false, false, inst); break; @@ -378,6 +384,9 @@ void Translate(IR::Block* block, std::span inst_list, Info& info) case Opcode::S_CMP_GT_I32: translator.S_CMP(ConditionOp::GT, true, inst); break; + case Opcode::S_CMP_GE_I32: + translator.S_CMP(ConditionOp::GE, true, inst); + break; case Opcode::S_CMP_EQ_I32: translator.S_CMP(ConditionOp::EQ, true, inst); break; diff --git a/src/shader_recompiler/frontend/translate/translate.h b/src/shader_recompiler/frontend/translate/translate.h index a8964fc9e..64d6d7f08 100644 --- a/src/shader_recompiler/frontend/translate/translate.h +++ b/src/shader_recompiler/frontend/translate/translate.h @@ -62,6 +62,7 @@ public: void V_CVT_PKRTZ_F16_F32(const GcnInst& inst); void V_MUL_F32(const GcnInst& inst); void V_CNDMASK_B32(const GcnInst& inst); + void V_OR_B32(const GcnInst& inst); void V_AND_B32(const GcnInst& inst); void V_LSHLREV_B32(const GcnInst& inst); void V_ADD_I32(const GcnInst& inst); diff --git a/src/shader_recompiler/frontend/translate/vector_alu.cpp b/src/shader_recompiler/frontend/translate/vector_alu.cpp index dbd9471f1..2281a038c 100644 --- a/src/shader_recompiler/frontend/translate/vector_alu.cpp +++ b/src/shader_recompiler/frontend/translate/vector_alu.cpp @@ -50,6 +50,13 @@ void Translator::V_CNDMASK_B32(const GcnInst& inst) { ir.SetVectorReg(dst_reg, IR::U32F32{result}); } +void Translator::V_OR_B32(const GcnInst& inst) { + const IR::U32 src0{GetSrc(inst.src[0])}; + const IR::U32 src1{ir.GetVectorReg(IR::VectorReg(inst.src[1].code))}; + const IR::VectorReg dst_reg{inst.dst[0].code}; + ir.SetVectorReg(dst_reg, ir.BitwiseOr(src0, src1)); +} + void Translator::V_AND_B32(const GcnInst& inst) { const IR::U32 src0{GetSrc(inst.src[0])}; const IR::U32 src1{ir.GetVectorReg(IR::VectorReg(inst.src[1].code))};