Handle PM4 type-2 packets (#556)

* video_core: handle PM4 type-2 packets

* video_core: rewrite pm4 comand type handling into a switch statement
This commit is contained in:
Random 2024-08-28 09:53:27 +02:00 committed by GitHub
parent eba0a6a68a
commit cb188ddbbe

View file

@ -175,294 +175,305 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
while (!dcb.empty()) {
const auto* header = reinterpret_cast<const PM4Header*>(dcb.data());
const u32 type = header->type;
if (type != 3) {
// No other types of packets were spotted so far
UNREACHABLE_MSG("Invalid PM4 type {}", type);
}
const u32 count = header->type3.NumWords();
const PM4ItOpcode opcode = header->type3.opcode;
switch (opcode) {
case PM4ItOpcode::Nop: {
const auto* nop = reinterpret_cast<const PM4CmdNop*>(header);
if (nop->header.count.Value() == 0) {
break;
}
switch (type) {
case 0:
case 1:
UNREACHABLE_MSG("Unsupported PM4 type {}", type);
break;
case 2:
// Type-2 packet are used for padding purposes
dcb = dcb.subspan(1);
continue;
case 3:
const u32 count = header->type3.NumWords();
const PM4ItOpcode opcode = header->type3.opcode;
switch (opcode) {
case PM4ItOpcode::Nop: {
const auto* nop = reinterpret_cast<const PM4CmdNop*>(header);
if (nop->header.count.Value() == 0) {
break;
}
switch (nop->data_block[0]) {
case PM4CmdNop::PayloadType::PatchedFlip: {
// There is no evidence that GPU CP drives flip events by parsing
// special NOP packets. For convenience lets assume that it does.
Platform::IrqC::Instance()->Signal(Platform::InterruptId::GfxFlip);
switch (nop->data_block[0]) {
case PM4CmdNop::PayloadType::PatchedFlip: {
// There is no evidence that GPU CP drives flip events by parsing
// special NOP packets. For convenience lets assume that it does.
Platform::IrqC::Instance()->Signal(Platform::InterruptId::GfxFlip);
break;
}
case PM4CmdNop::PayloadType::DebugMarkerPush: {
const auto marker_sz = nop->header.count.Value() * 2;
const std::string_view label{reinterpret_cast<const char*>(&nop->data_block[1]),
marker_sz};
rasterizer->ScopeMarkerBegin(label);
break;
}
case PM4CmdNop::PayloadType::DebugMarkerPop: {
rasterizer->ScopeMarkerEnd();
break;
}
default:
break;
}
break;
}
case PM4CmdNop::PayloadType::DebugMarkerPush: {
const auto marker_sz = nop->header.count.Value() * 2;
const std::string_view label{reinterpret_cast<const char*>(&nop->data_block[1]),
marker_sz};
rasterizer->ScopeMarkerBegin(label);
case PM4ItOpcode::ContextControl: {
break;
}
case PM4CmdNop::PayloadType::DebugMarkerPop: {
rasterizer->ScopeMarkerEnd();
case PM4ItOpcode::ClearState: {
regs.SetDefaults();
break;
}
case PM4ItOpcode::SetConfigReg: {
const auto* set_data = reinterpret_cast<const PM4CmdSetData*>(header);
const auto reg_addr = ConfigRegWordOffset + set_data->reg_offset;
const auto* payload = reinterpret_cast<const u32*>(header + 2);
std::memcpy(&regs.reg_array[reg_addr], payload, (count - 1) * sizeof(u32));
break;
}
case PM4ItOpcode::SetContextReg: {
const auto* set_data = reinterpret_cast<const PM4CmdSetData*>(header);
const auto reg_addr = ContextRegWordOffset + set_data->reg_offset;
const auto* payload = reinterpret_cast<const u32*>(header + 2);
std::memcpy(&regs.reg_array[reg_addr], payload, (count - 1) * sizeof(u32));
// In the case of HW, render target memory has alignment as color block operates on
// tiles. There is no information of actual resource extents stored in CB context
// regs, so any deduction of it from slices/pitch will lead to a larger surface
// created. The same applies to the depth targets. Fortunately, the guest always
// sends a trailing NOP packet right after the context regs setup, so we can use the
// heuristic below and extract the hint to determine actual resource dims.
switch (reg_addr) {
case ContextRegs::CbColor0Base:
case ContextRegs::CbColor1Base:
case ContextRegs::CbColor2Base:
case ContextRegs::CbColor3Base:
case ContextRegs::CbColor4Base:
case ContextRegs::CbColor5Base:
case ContextRegs::CbColor6Base:
case ContextRegs::CbColor7Base: {
const auto col_buf_id = (reg_addr - ContextRegs::CbColor0Base) /
(ContextRegs::CbColor1Base - ContextRegs::CbColor0Base);
ASSERT(col_buf_id < NumColorBuffers);
const auto nop_offset = header->type3.count;
if (nop_offset == 0x0e || nop_offset == 0x0d || nop_offset == 0x0b) {
ASSERT_MSG(payload[nop_offset] == 0xc0001000,
"NOP hint is missing in CB setup sequence");
last_cb_extent[col_buf_id].raw = payload[nop_offset + 1];
} else {
last_cb_extent[col_buf_id].raw = 0;
}
break;
}
case ContextRegs::CbColor0Cmask:
case ContextRegs::CbColor1Cmask:
case ContextRegs::CbColor2Cmask:
case ContextRegs::CbColor3Cmask:
case ContextRegs::CbColor4Cmask:
case ContextRegs::CbColor5Cmask:
case ContextRegs::CbColor6Cmask:
case ContextRegs::CbColor7Cmask: {
const auto col_buf_id =
(reg_addr - ContextRegs::CbColor0Cmask) /
(ContextRegs::CbColor1Cmask - ContextRegs::CbColor0Cmask);
ASSERT(col_buf_id < NumColorBuffers);
const auto nop_offset = header->type3.count;
if (nop_offset == 0x04) {
ASSERT_MSG(payload[nop_offset] == 0xc0001000,
"NOP hint is missing in CB setup sequence");
last_cb_extent[col_buf_id].raw = payload[nop_offset + 1];
}
break;
}
case ContextRegs::DbZInfo: {
if (header->type3.count == 8) {
ASSERT_MSG(payload[20] == 0xc0001000,
"NOP hint is missing in DB setup sequence");
last_db_extent.raw = payload[21];
} else {
last_db_extent.raw = 0;
}
break;
}
default:
break;
}
break;
}
case PM4ItOpcode::SetShReg: {
const auto* set_data = reinterpret_cast<const PM4CmdSetData*>(header);
std::memcpy(&regs.reg_array[ShRegWordOffset + set_data->reg_offset], header + 2,
(count - 1) * sizeof(u32));
break;
}
case PM4ItOpcode::SetUconfigReg: {
const auto* set_data = reinterpret_cast<const PM4CmdSetData*>(header);
std::memcpy(&regs.reg_array[UconfigRegWordOffset + set_data->reg_offset],
header + 2, (count - 1) * sizeof(u32));
break;
}
case PM4ItOpcode::IndexType: {
const auto* index_type = reinterpret_cast<const PM4CmdDrawIndexType*>(header);
regs.index_buffer_type.raw = index_type->raw;
break;
}
case PM4ItOpcode::DrawIndex2: {
const auto* draw_index = reinterpret_cast<const PM4CmdDrawIndex2*>(header);
regs.max_index_size = draw_index->max_size;
regs.index_base_address.base_addr_lo = draw_index->index_base_lo;
regs.index_base_address.base_addr_hi.Assign(draw_index->index_base_hi);
regs.num_indices = draw_index->index_count;
regs.draw_initiator = draw_index->draw_initiator;
if (rasterizer) {
const auto cmd_address = reinterpret_cast<const void*>(header);
rasterizer->ScopeMarkerBegin(fmt::format("dcb:{}:DrawIndex2", cmd_address));
rasterizer->Breadcrumb(u64(cmd_address));
rasterizer->Draw(true);
rasterizer->ScopeMarkerEnd();
}
break;
}
case PM4ItOpcode::DrawIndexOffset2: {
const auto* draw_index_off =
reinterpret_cast<const PM4CmdDrawIndexOffset2*>(header);
regs.max_index_size = draw_index_off->max_size;
regs.num_indices = draw_index_off->index_count;
regs.draw_initiator = draw_index_off->draw_initiator;
if (rasterizer) {
const auto cmd_address = reinterpret_cast<const void*>(header);
rasterizer->ScopeMarkerBegin(
fmt::format("dcb:{}:DrawIndexOffset2", cmd_address));
rasterizer->Breadcrumb(u64(cmd_address));
rasterizer->Draw(true, draw_index_off->index_offset);
rasterizer->ScopeMarkerEnd();
}
break;
}
case PM4ItOpcode::DrawIndexAuto: {
const auto* draw_index = reinterpret_cast<const PM4CmdDrawIndexAuto*>(header);
regs.num_indices = draw_index->index_count;
regs.draw_initiator = draw_index->draw_initiator;
if (rasterizer) {
const auto cmd_address = reinterpret_cast<const void*>(header);
rasterizer->ScopeMarkerBegin(fmt::format("dcb:{}:DrawIndexAuto", cmd_address));
rasterizer->Breadcrumb(u64(cmd_address));
rasterizer->Draw(false);
rasterizer->ScopeMarkerEnd();
}
break;
}
case PM4ItOpcode::DispatchDirect: {
const auto* dispatch_direct = reinterpret_cast<const PM4CmdDispatchDirect*>(header);
regs.cs_program.dim_x = dispatch_direct->dim_x;
regs.cs_program.dim_y = dispatch_direct->dim_y;
regs.cs_program.dim_z = dispatch_direct->dim_z;
regs.cs_program.dispatch_initiator = dispatch_direct->dispatch_initiator;
if (rasterizer && (regs.cs_program.dispatch_initiator & 1)) {
const auto cmd_address = reinterpret_cast<const void*>(header);
rasterizer->ScopeMarkerBegin(fmt::format("dcb:{}:Dispatch", cmd_address));
rasterizer->Breadcrumb(u64(cmd_address));
rasterizer->DispatchDirect();
rasterizer->ScopeMarkerEnd();
}
break;
}
case PM4ItOpcode::NumInstances: {
const auto* num_instances = reinterpret_cast<const PM4CmdDrawNumInstances*>(header);
regs.num_instances.num_instances = num_instances->num_instances;
break;
}
case PM4ItOpcode::IndexBase: {
const auto* index_base = reinterpret_cast<const PM4CmdDrawIndexBase*>(header);
regs.index_base_address.base_addr_lo = index_base->addr_lo;
regs.index_base_address.base_addr_hi.Assign(index_base->addr_hi);
break;
}
case PM4ItOpcode::IndexBufferSize: {
const auto* index_size = reinterpret_cast<const PM4CmdDrawIndexBufferSize*>(header);
regs.num_indices = index_size->num_indices;
break;
}
case PM4ItOpcode::EventWrite: {
// const auto* event = reinterpret_cast<const PM4CmdEventWrite*>(header);
break;
}
case PM4ItOpcode::EventWriteEos: {
const auto* event_eos = reinterpret_cast<const PM4CmdEventWriteEos*>(header);
event_eos->SignalFence();
break;
}
case PM4ItOpcode::EventWriteEop: {
const auto* event_eop = reinterpret_cast<const PM4CmdEventWriteEop*>(header);
event_eop->SignalFence();
break;
}
case PM4ItOpcode::DmaData: {
const auto* dma_data = reinterpret_cast<const PM4DmaData*>(header);
break;
}
case PM4ItOpcode::WriteData: {
const auto* write_data = reinterpret_cast<const PM4CmdWriteData*>(header);
ASSERT(write_data->dst_sel.Value() == 2 || write_data->dst_sel.Value() == 5);
const u32 data_size = (header->type3.count.Value() - 2) * 4;
u64* address = write_data->Address<u64*>();
if (!write_data->wr_one_addr.Value()) {
std::memcpy(address, write_data->data, data_size);
} else {
UNREACHABLE();
}
break;
}
case PM4ItOpcode::AcquireMem: {
// const auto* acquire_mem = reinterpret_cast<PM4CmdAcquireMem*>(header);
break;
}
case PM4ItOpcode::WaitRegMem: {
const auto* wait_reg_mem = reinterpret_cast<const PM4CmdWaitRegMem*>(header);
// ASSERT(wait_reg_mem->engine.Value() == PM4CmdWaitRegMem::Engine::Me);
// Optimization: VO label waits are special because the emulator
// will write to the label when presentation is finished. So if
// there are no other submits to yield to we can sleep the thread
// instead and allow other tasks to run.
const u64* wait_addr = wait_reg_mem->Address<u64*>();
if (vo_port->IsVoLabel(wait_addr) && num_submits == 1) {
vo_port->WaitVoLabel([&] { return wait_reg_mem->Test(); });
}
while (!wait_reg_mem->Test()) {
mapped_queues[GfxQueueId].cs_state = regs.cs_program;
TracyFiberLeave;
co_yield {};
TracyFiberEnter(dcb_task_name);
regs.cs_program = mapped_queues[GfxQueueId].cs_state;
}
break;
}
case PM4ItOpcode::IncrementDeCounter: {
++cblock.de_count;
break;
}
case PM4ItOpcode::WaitOnCeCounter: {
while (cblock.ce_count <= cblock.de_count) {
TracyFiberLeave;
ce_task.handle.resume();
TracyFiberEnter(dcb_task_name);
}
break;
}
case PM4ItOpcode::PfpSyncMe: {
break;
}
default:
break;
UNREACHABLE_MSG("Unknown PM4 type 3 opcode {:#x} with count {}",
static_cast<u32>(opcode), count);
}
dcb = dcb.subspan(header->type3.NumWords() + 1);
break;
}
case PM4ItOpcode::ContextControl: {
break;
}
case PM4ItOpcode::ClearState: {
regs.SetDefaults();
break;
}
case PM4ItOpcode::SetConfigReg: {
const auto* set_data = reinterpret_cast<const PM4CmdSetData*>(header);
const auto reg_addr = ConfigRegWordOffset + set_data->reg_offset;
const auto* payload = reinterpret_cast<const u32*>(header + 2);
std::memcpy(&regs.reg_array[reg_addr], payload, (count - 1) * sizeof(u32));
break;
}
case PM4ItOpcode::SetContextReg: {
const auto* set_data = reinterpret_cast<const PM4CmdSetData*>(header);
const auto reg_addr = ContextRegWordOffset + set_data->reg_offset;
const auto* payload = reinterpret_cast<const u32*>(header + 2);
std::memcpy(&regs.reg_array[reg_addr], payload, (count - 1) * sizeof(u32));
// In the case of HW, render target memory has alignment as color block operates on
// tiles. There is no information of actual resource extents stored in CB context
// regs, so any deduction of it from slices/pitch will lead to a larger surface created.
// The same applies to the depth targets. Fortunately, the guest always sends
// a trailing NOP packet right after the context regs setup, so we can use the heuristic
// below and extract the hint to determine actual resource dims.
switch (reg_addr) {
case ContextRegs::CbColor0Base:
case ContextRegs::CbColor1Base:
case ContextRegs::CbColor2Base:
case ContextRegs::CbColor3Base:
case ContextRegs::CbColor4Base:
case ContextRegs::CbColor5Base:
case ContextRegs::CbColor6Base:
case ContextRegs::CbColor7Base: {
const auto col_buf_id = (reg_addr - ContextRegs::CbColor0Base) /
(ContextRegs::CbColor1Base - ContextRegs::CbColor0Base);
ASSERT(col_buf_id < NumColorBuffers);
const auto nop_offset = header->type3.count;
if (nop_offset == 0x0e || nop_offset == 0x0d || nop_offset == 0x0b) {
ASSERT_MSG(payload[nop_offset] == 0xc0001000,
"NOP hint is missing in CB setup sequence");
last_cb_extent[col_buf_id].raw = payload[nop_offset + 1];
} else {
last_cb_extent[col_buf_id].raw = 0;
}
break;
}
case ContextRegs::CbColor0Cmask:
case ContextRegs::CbColor1Cmask:
case ContextRegs::CbColor2Cmask:
case ContextRegs::CbColor3Cmask:
case ContextRegs::CbColor4Cmask:
case ContextRegs::CbColor5Cmask:
case ContextRegs::CbColor6Cmask:
case ContextRegs::CbColor7Cmask: {
const auto col_buf_id = (reg_addr - ContextRegs::CbColor0Cmask) /
(ContextRegs::CbColor1Cmask - ContextRegs::CbColor0Cmask);
ASSERT(col_buf_id < NumColorBuffers);
const auto nop_offset = header->type3.count;
if (nop_offset == 0x04) {
ASSERT_MSG(payload[nop_offset] == 0xc0001000,
"NOP hint is missing in CB setup sequence");
last_cb_extent[col_buf_id].raw = payload[nop_offset + 1];
}
break;
}
case ContextRegs::DbZInfo: {
if (header->type3.count == 8) {
ASSERT_MSG(payload[20] == 0xc0001000,
"NOP hint is missing in DB setup sequence");
last_db_extent.raw = payload[21];
} else {
last_db_extent.raw = 0;
}
break;
}
default:
break;
}
break;
}
case PM4ItOpcode::SetShReg: {
const auto* set_data = reinterpret_cast<const PM4CmdSetData*>(header);
std::memcpy(&regs.reg_array[ShRegWordOffset + set_data->reg_offset], header + 2,
(count - 1) * sizeof(u32));
break;
}
case PM4ItOpcode::SetUconfigReg: {
const auto* set_data = reinterpret_cast<const PM4CmdSetData*>(header);
std::memcpy(&regs.reg_array[UconfigRegWordOffset + set_data->reg_offset], header + 2,
(count - 1) * sizeof(u32));
break;
}
case PM4ItOpcode::IndexType: {
const auto* index_type = reinterpret_cast<const PM4CmdDrawIndexType*>(header);
regs.index_buffer_type.raw = index_type->raw;
break;
}
case PM4ItOpcode::DrawIndex2: {
const auto* draw_index = reinterpret_cast<const PM4CmdDrawIndex2*>(header);
regs.max_index_size = draw_index->max_size;
regs.index_base_address.base_addr_lo = draw_index->index_base_lo;
regs.index_base_address.base_addr_hi.Assign(draw_index->index_base_hi);
regs.num_indices = draw_index->index_count;
regs.draw_initiator = draw_index->draw_initiator;
if (rasterizer) {
const auto cmd_address = reinterpret_cast<const void*>(header);
rasterizer->ScopeMarkerBegin(fmt::format("dcb:{}:DrawIndex2", cmd_address));
rasterizer->Breadcrumb(u64(cmd_address));
rasterizer->Draw(true);
rasterizer->ScopeMarkerEnd();
}
break;
}
case PM4ItOpcode::DrawIndexOffset2: {
const auto* draw_index_off = reinterpret_cast<const PM4CmdDrawIndexOffset2*>(header);
regs.max_index_size = draw_index_off->max_size;
regs.num_indices = draw_index_off->index_count;
regs.draw_initiator = draw_index_off->draw_initiator;
if (rasterizer) {
const auto cmd_address = reinterpret_cast<const void*>(header);
rasterizer->ScopeMarkerBegin(fmt::format("dcb:{}:DrawIndexOffset2", cmd_address));
rasterizer->Breadcrumb(u64(cmd_address));
rasterizer->Draw(true, draw_index_off->index_offset);
rasterizer->ScopeMarkerEnd();
}
break;
}
case PM4ItOpcode::DrawIndexAuto: {
const auto* draw_index = reinterpret_cast<const PM4CmdDrawIndexAuto*>(header);
regs.num_indices = draw_index->index_count;
regs.draw_initiator = draw_index->draw_initiator;
if (rasterizer) {
const auto cmd_address = reinterpret_cast<const void*>(header);
rasterizer->ScopeMarkerBegin(fmt::format("dcb:{}:DrawIndexAuto", cmd_address));
rasterizer->Breadcrumb(u64(cmd_address));
rasterizer->Draw(false);
rasterizer->ScopeMarkerEnd();
}
break;
}
case PM4ItOpcode::DispatchDirect: {
const auto* dispatch_direct = reinterpret_cast<const PM4CmdDispatchDirect*>(header);
regs.cs_program.dim_x = dispatch_direct->dim_x;
regs.cs_program.dim_y = dispatch_direct->dim_y;
regs.cs_program.dim_z = dispatch_direct->dim_z;
regs.cs_program.dispatch_initiator = dispatch_direct->dispatch_initiator;
if (rasterizer && (regs.cs_program.dispatch_initiator & 1)) {
const auto cmd_address = reinterpret_cast<const void*>(header);
rasterizer->ScopeMarkerBegin(fmt::format("dcb:{}:Dispatch", cmd_address));
rasterizer->Breadcrumb(u64(cmd_address));
rasterizer->DispatchDirect();
rasterizer->ScopeMarkerEnd();
}
break;
}
case PM4ItOpcode::NumInstances: {
const auto* num_instances = reinterpret_cast<const PM4CmdDrawNumInstances*>(header);
regs.num_instances.num_instances = num_instances->num_instances;
break;
}
case PM4ItOpcode::IndexBase: {
const auto* index_base = reinterpret_cast<const PM4CmdDrawIndexBase*>(header);
regs.index_base_address.base_addr_lo = index_base->addr_lo;
regs.index_base_address.base_addr_hi.Assign(index_base->addr_hi);
break;
}
case PM4ItOpcode::IndexBufferSize: {
const auto* index_size = reinterpret_cast<const PM4CmdDrawIndexBufferSize*>(header);
regs.num_indices = index_size->num_indices;
break;
}
case PM4ItOpcode::EventWrite: {
// const auto* event = reinterpret_cast<const PM4CmdEventWrite*>(header);
break;
}
case PM4ItOpcode::EventWriteEos: {
const auto* event_eos = reinterpret_cast<const PM4CmdEventWriteEos*>(header);
event_eos->SignalFence();
break;
}
case PM4ItOpcode::EventWriteEop: {
const auto* event_eop = reinterpret_cast<const PM4CmdEventWriteEop*>(header);
event_eop->SignalFence();
break;
}
case PM4ItOpcode::DmaData: {
const auto* dma_data = reinterpret_cast<const PM4DmaData*>(header);
break;
}
case PM4ItOpcode::WriteData: {
const auto* write_data = reinterpret_cast<const PM4CmdWriteData*>(header);
ASSERT(write_data->dst_sel.Value() == 2 || write_data->dst_sel.Value() == 5);
const u32 data_size = (header->type3.count.Value() - 2) * 4;
u64* address = write_data->Address<u64*>();
if (!write_data->wr_one_addr.Value()) {
std::memcpy(address, write_data->data, data_size);
} else {
UNREACHABLE();
}
break;
}
case PM4ItOpcode::AcquireMem: {
// const auto* acquire_mem = reinterpret_cast<PM4CmdAcquireMem*>(header);
break;
}
case PM4ItOpcode::WaitRegMem: {
const auto* wait_reg_mem = reinterpret_cast<const PM4CmdWaitRegMem*>(header);
// ASSERT(wait_reg_mem->engine.Value() == PM4CmdWaitRegMem::Engine::Me);
// Optimization: VO label waits are special because the emulator
// will write to the label when presentation is finished. So if
// there are no other submits to yield to we can sleep the thread
// instead and allow other tasks to run.
const u64* wait_addr = wait_reg_mem->Address<u64*>();
if (vo_port->IsVoLabel(wait_addr) && num_submits == 1) {
vo_port->WaitVoLabel([&] { return wait_reg_mem->Test(); });
}
while (!wait_reg_mem->Test()) {
mapped_queues[GfxQueueId].cs_state = regs.cs_program;
TracyFiberLeave;
co_yield {};
TracyFiberEnter(dcb_task_name);
regs.cs_program = mapped_queues[GfxQueueId].cs_state;
}
break;
}
case PM4ItOpcode::IncrementDeCounter: {
++cblock.de_count;
break;
}
case PM4ItOpcode::WaitOnCeCounter: {
while (cblock.ce_count <= cblock.de_count) {
TracyFiberLeave;
ce_task.handle.resume();
TracyFiberEnter(dcb_task_name);
}
break;
}
case PM4ItOpcode::PfpSyncMe: {
break;
}
default:
UNREACHABLE_MSG("Unknown PM4 type 3 opcode {:#x} with count {}",
static_cast<u32>(opcode), count);
}
dcb = dcb.subspan(header->type3.NumWords() + 1);
}
if (ce_task.handle) {