// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later #include "common/assert.h" #include "common/io_file.h" #include "common/thread.h" #include "video_core/amdgpu/liverpool.h" #include "video_core/amdgpu/pm4_cmds.h" #include "video_core/renderer_vulkan/vk_rasterizer.h" namespace AmdGpu { std::array Liverpool::ConstantEngine::constants_heap; Liverpool::Liverpool() { process_thread = std::jthread{std::bind_front(&Liverpool::Process, this)}; } Liverpool::~Liverpool() { process_thread.request_stop(); cv_submit.notify_one(); process_thread.join(); } void Liverpool::Process(std::stop_token stoken) { Common::SetCurrentThreadName("GPU_CommandProcessor"); while (!stoken.stop_requested()) { { std::unique_lock lock{m_submit}; cv_submit.wait(lock, stoken, [this]() { return num_submits != 0; }); } if (stoken.stop_requested()) { break; } int qid = -1; while (num_submits) { qid = (qid + 1) % NumTotalQueues; auto& queue = mapped_queues[qid]; Task::Handle task{}; { std::scoped_lock lock{queue.m_access}; if (queue.submits.empty()) { continue; } task = queue.submits.front(); } task.resume(); if (task.done()) { task.destroy(); std::scoped_lock lock{queue.m_access}; queue.submits.pop(); --num_submits; } } cv_complete.notify_all(); // Notify GPU idle } } void Liverpool::WaitGpuIdle() { std::unique_lock lock{m_submit}; cv_complete.wait(lock, [this]() { return num_submits == 0; }); } Liverpool::Task Liverpool::ProcessCeUpdate(std::span ccb) { while (!ccb.empty()) { const auto* header = reinterpret_cast(ccb.data()); const u32 type = header->type; if (type != 3) { // No other types of packets were spotted so far UNREACHABLE_MSG("Invalid PM4 type {}", type); } const PM4ItOpcode opcode = header->type3.opcode; const auto* it_body = reinterpret_cast(header) + 1; switch (opcode) { case PM4ItOpcode::Nop: { const auto* nop = reinterpret_cast(header); break; } case PM4ItOpcode::WriteConstRam: { const auto* write_const = reinterpret_cast(header); memcpy(cblock.constants_heap.data() + write_const->Offset(), &write_const->data, write_const->Size()); break; } case PM4ItOpcode::DumpConstRam: { const auto* dump_const = reinterpret_cast(header); memcpy(dump_const->Address(), cblock.constants_heap.data() + dump_const->Offset(), dump_const->Size()); break; } case PM4ItOpcode::IncrementCeCounter: { ++cblock.ce_count; break; } case PM4ItOpcode::WaitOnDeCounterDiff: { const auto diff = it_body[0]; while ((cblock.de_count - cblock.ce_count) >= diff) { co_yield {}; } break; } default: const u32 count = header->type3.NumWords(); UNREACHABLE_MSG("Unknown PM4 type 3 opcode {:#x} with count {}", static_cast(opcode), count); } ccb = ccb.subspan(header->type3.NumWords() + 1); } } Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::span ccb) { cblock.Reset(); // TODO: potentially, ASCs also can depend on CE and in this case the // CE task should be moved into more global scope Task ce_task{}; if (!ccb.empty()) { // In case of CCB provided kick off CE asap to have the constant heap ready to use ce_task = ProcessCeUpdate(ccb); ce_task.handle.resume(); } while (!dcb.empty()) { const auto* header = reinterpret_cast(dcb.data()); const u32 type = header->type; if (type != 3) { // No other types of packets were spotted so far UNREACHABLE_MSG("Invalid PM4 type {}", type); } const u32 count = header->type3.NumWords(); const PM4ItOpcode opcode = header->type3.opcode; switch (opcode) { case PM4ItOpcode::Nop: { const auto* nop = reinterpret_cast(header); if (nop->header.count.Value() == 0) { break; } switch (nop->data_block[0]) { case PM4CmdNop::PayloadType::PatchedFlip: { // There is no evidence that GPU CP drives flip events by parsing // special NOP packets. For convenience lets assume that it does. Platform::IrqC::Instance()->Signal(Platform::InterruptId::GfxFlip); break; } default: break; } break; } case PM4ItOpcode::SetContextReg: { const auto* set_data = reinterpret_cast(header); std::memcpy(®s.reg_array[ContextRegWordOffset + set_data->reg_offset], header + 2, (count - 1) * sizeof(u32)); break; } case PM4ItOpcode::SetShReg: { const auto* set_data = reinterpret_cast(header); std::memcpy(®s.reg_array[ShRegWordOffset + set_data->reg_offset], header + 2, (count - 1) * sizeof(u32)); break; } case PM4ItOpcode::SetUconfigReg: { const auto* set_data = reinterpret_cast(header); std::memcpy(®s.reg_array[UconfigRegWordOffset + set_data->reg_offset], header + 2, (count - 1) * sizeof(u32)); break; } case PM4ItOpcode::IndexType: { const auto* index_type = reinterpret_cast(header); regs.index_buffer_type.raw = index_type->raw; break; } case PM4ItOpcode::DrawIndex2: { const auto* draw_index = reinterpret_cast(header); regs.max_index_size = draw_index->max_size; regs.index_base_address.base_addr_lo = draw_index->index_base_lo; regs.index_base_address.base_addr_hi.Assign(draw_index->index_base_hi); regs.num_indices = draw_index->index_count; regs.draw_initiator = draw_index->draw_initiator; if (rasterizer) { rasterizer->Draw(true); } break; } case PM4ItOpcode::DrawIndexAuto: { const auto* draw_index = reinterpret_cast(header); regs.num_indices = draw_index->index_count; regs.draw_initiator = draw_index->draw_initiator; if (rasterizer) { rasterizer->Draw(false); } break; } case PM4ItOpcode::DispatchDirect: { // const auto* dispatch_direct = reinterpret_cast(header); break; } case PM4ItOpcode::EventWrite: { // const auto* event = reinterpret_cast(header); break; } case PM4ItOpcode::EventWriteEos: { const auto* event_eos = reinterpret_cast(header); event_eos->SignalFence(); break; } case PM4ItOpcode::EventWriteEop: { const auto* event_eop = reinterpret_cast(header); event_eop->SignalFence(); break; } case PM4ItOpcode::DmaData: { const auto* dma_data = reinterpret_cast(header); break; } case PM4ItOpcode::WriteData: { const auto* write_data = reinterpret_cast(header); ASSERT(write_data->dst_sel.Value() == 2 || write_data->dst_sel.Value() == 5); const u32 data_size = (header->type3.count.Value() - 2) * 4; if (!write_data->wr_one_addr.Value()) { std::memcpy(write_data->Address(), write_data->data, data_size); } else { UNREACHABLE(); } break; } case PM4ItOpcode::AcquireMem: { // const auto* acquire_mem = reinterpret_cast(header); break; } case PM4ItOpcode::WaitRegMem: { const auto* wait_reg_mem = reinterpret_cast(header); ASSERT(wait_reg_mem->engine.Value() == PM4CmdWaitRegMem::Engine::Me); while (!wait_reg_mem->Test()) { co_yield {}; } break; } case PM4ItOpcode::IncrementDeCounter: { ++cblock.de_count; break; } case PM4ItOpcode::WaitOnCeCounter: { while (cblock.ce_count <= cblock.de_count) { ce_task.handle.resume(); } break; } default: UNREACHABLE_MSG("Unknown PM4 type 3 opcode {:#x} with count {}", static_cast(opcode), count); } dcb = dcb.subspan(header->type3.NumWords() + 1); } if (ce_task.handle) { ASSERT_MSG(ce_task.handle.done(), "Partially processed CCB"); ce_task.handle.destroy(); } } Liverpool::Task Liverpool::ProcessCompute(std::span acb) { while (!acb.empty()) { const auto* header = reinterpret_cast(acb.data()); const u32 type = header->type; if (type != 3) { // No other types of packets were spotted so far UNREACHABLE_MSG("Invalid PM4 type {}", type); } const u32 count = header->type3.NumWords(); const PM4ItOpcode opcode = header->type3.opcode; const auto* it_body = reinterpret_cast(header) + 1; switch (opcode) { default: UNREACHABLE_MSG("Unknown PM4 type 3 opcode {:#x} with count {}", static_cast(opcode), count); } acb = acb.subspan(header->type3.NumWords() + 1); } return {}; // Not a coroutine yet } void Liverpool::SubmitGfx(std::span dcb, std::span ccb) { static constexpr u32 GfxQueueId = 0u; auto& queue = mapped_queues[GfxQueueId]; auto task = ProcessGraphics(dcb, ccb); { std::unique_lock lock{queue.m_access}; queue.submits.emplace(task.handle); } { std::unique_lock lock{m_submit}; ++num_submits; } cv_submit.notify_one(); } void Liverpool::SubmitAsc(u32 vqid, std::span acb) { ASSERT_MSG(vqid > 0 && vqid < NumTotalQueues, "Invalid virtual ASC queue index"); auto& queue = mapped_queues[vqid]; const auto& task = ProcessCompute(acb); { std::unique_lock lock{queue.m_access}; queue.submits.emplace(task.handle); } { std::unique_lock lock{m_submit}; ++num_submits; } cv_submit.notify_one(); } } // namespace AmdGpu