From a4d89edd9b9d89f31308ee202f1f20a3ef033f0b Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Tue, 24 Jun 2014 21:27:18 +0200 Subject: [PATCH 01/22] GPU debugger: Don't keep track of debugging data if no debugger views are active. --- src/video_core/gpu_debugger.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/video_core/gpu_debugger.h b/src/video_core/gpu_debugger.h index 5d909bebae..ca1fb22d79 100644 --- a/src/video_core/gpu_debugger.h +++ b/src/video_core/gpu_debugger.h @@ -78,6 +78,9 @@ public: void GXCommandProcessed(u8* command_data) { + if (observers.empty()) + return; + gx_command_history.push_back(GSP_GPU::GXCommand()); GSP_GPU::GXCommand& cmd = gx_command_history[gx_command_history.size()-1]; @@ -91,6 +94,9 @@ public: void CommandListCalled(u32 address, u32* command_list, u32 size_in_words) { + if (observers.empty()) + return; + PicaCommandList cmdlist; for (u32* parse_pointer = command_list; parse_pointer < command_list + size_in_words;) { From 994d29f416ce8d74560650be7a70e9a028c425c9 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Fri, 11 Jul 2014 18:47:09 +0200 Subject: [PATCH 02/22] Use a more compatible choice of initial framebuffer addresses. --- src/core/hw/gpu.h | 30 +++++++++++++++++++++--------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/src/core/hw/gpu.h b/src/core/hw/gpu.h index 3314ba9892..0c7dffec3a 100644 --- a/src/core/hw/gpu.h +++ b/src/core/hw/gpu.h @@ -51,23 +51,35 @@ enum { TOP_WIDTH = 400, BOTTOM_WIDTH = 320, - // Physical addresses in FCRAM used by ARM9 applications - these are correct for real hardware - PADDR_FRAMEBUFFER_SEL = 0x20184E59, - PADDR_TOP_LEFT_FRAME1 = 0x20184E60, + // Physical addresses in FCRAM (chosen arbitrarily) + PADDR_TOP_LEFT_FRAME1 = 0x201D4C00, + PADDR_TOP_LEFT_FRAME2 = 0x202D4C00, + PADDR_TOP_RIGHT_FRAME1 = 0x203D4C00, + PADDR_TOP_RIGHT_FRAME2 = 0x204D4C00, + PADDR_SUB_FRAME1 = 0x205D4C00, + PADDR_SUB_FRAME2 = 0x206D4C00, + // Physical addresses in FCRAM used by ARM9 applications +/* PADDR_TOP_LEFT_FRAME1 = 0x20184E60, PADDR_TOP_LEFT_FRAME2 = 0x201CB370, PADDR_TOP_RIGHT_FRAME1 = 0x20282160, PADDR_TOP_RIGHT_FRAME2 = 0x202C8670, PADDR_SUB_FRAME1 = 0x202118E0, - PADDR_SUB_FRAME2 = 0x20249CF0, + PADDR_SUB_FRAME2 = 0x20249CF0,*/ - // Physical addresses in VRAM - I'm not sure how these are actually allocated (so not real) - PADDR_VRAM_FRAMEBUFFER_SEL = 0x18184E59, - PADDR_VRAM_TOP_LEFT_FRAME1 = 0x18184E60, - PADDR_VRAM_TOP_LEFT_FRAME2 = 0x181CB370, + // Physical addresses in VRAM + // TODO: These should just be deduced from the ones above + PADDR_VRAM_TOP_LEFT_FRAME1 = 0x181D4C00, + PADDR_VRAM_TOP_LEFT_FRAME2 = 0x182D4C00, + PADDR_VRAM_TOP_RIGHT_FRAME1 = 0x183D4C00, + PADDR_VRAM_TOP_RIGHT_FRAME2 = 0x184D4C00, + PADDR_VRAM_SUB_FRAME1 = 0x185D4C00, + PADDR_VRAM_SUB_FRAME2 = 0x186D4C00, + // Physical addresses in VRAM used by ARM9 applications +/* PADDR_VRAM_TOP_LEFT_FRAME2 = 0x181CB370, PADDR_VRAM_TOP_RIGHT_FRAME1 = 0x18282160, PADDR_VRAM_TOP_RIGHT_FRAME2 = 0x182C8670, PADDR_VRAM_SUB_FRAME1 = 0x182118E0, - PADDR_VRAM_SUB_FRAME2 = 0x18249CF0, + PADDR_VRAM_SUB_FRAME2 = 0x18249CF0,*/ }; /// Framebuffer location From cb8f49b7eaeb071d875fa59142124e4a5c1e0f7d Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Sun, 1 Jun 2014 13:58:14 +0200 Subject: [PATCH 03/22] GSP: Implement ReadHWRegs and WriteHWRegs properly. --- src/core/hle/service/gsp.cpp | 83 ++++++++++++++++++++++-------------- 1 file changed, 51 insertions(+), 32 deletions(-) diff --git a/src/core/hle/service/gsp.cpp b/src/core/hle/service/gsp.cpp index 1fdbdf3428..cc111b0bbe 100644 --- a/src/core/hle/service/gsp.cpp +++ b/src/core/hle/service/gsp.cpp @@ -47,11 +47,6 @@ Handle g_shared_memory = 0; u32 g_thread_id = 0; -enum { - REG_FRAMEBUFFER_1 = 0x00400468, - REG_FRAMEBUFFER_2 = 0x00400494, -}; - /// Gets a pointer to the start (header) of a command buffer in GSP shared memory static inline u8* GX_GetCmdBufferPointer(u32 thread_id, u32 offset=0) { return Kernel::GetSharedMemoryPointer(g_shared_memory, 0x800 + (thread_id * 0x200) + offset); @@ -67,38 +62,62 @@ void GX_FinishCommand(u32 thread_id) { // TODO: Increment header->index? } -/// Read a GSP GPU hardware register -void ReadHWRegs(Service::Interface* self) { - static const u32 framebuffer_1[] = {GPU::PADDR_VRAM_TOP_LEFT_FRAME1, GPU::PADDR_VRAM_TOP_RIGHT_FRAME1}; - static const u32 framebuffer_2[] = {GPU::PADDR_VRAM_TOP_LEFT_FRAME2, GPU::PADDR_VRAM_TOP_RIGHT_FRAME2}; - +/// Write a GSP GPU hardware register +void WriteHWRegs(Service::Interface* self) { u32* cmd_buff = Service::GetCommandBuffer(); u32 reg_addr = cmd_buff[1]; u32 size = cmd_buff[2]; - u32* dst = (u32*)Memory::GetPointer(cmd_buff[0x41]); - switch (reg_addr) { - - // NOTE: Calling SetFramebufferLocation here is a hack... Not sure the correct way yet to set - // whether the framebuffers should be in VRAM or GSP heap, but from what I understand, if the - // user application is reading from either of these registers, then its going to be in VRAM. - - // Top framebuffer 1 addresses - case REG_FRAMEBUFFER_1: - GPU::SetFramebufferLocation(GPU::FRAMEBUFFER_LOCATION_VRAM); - memcpy(dst, framebuffer_1, size); - break; - - // Top framebuffer 2 addresses - case REG_FRAMEBUFFER_2: - GPU::SetFramebufferLocation(GPU::FRAMEBUFFER_LOCATION_VRAM); - memcpy(dst, framebuffer_2, size); - break; - - default: - ERROR_LOG(GSP, "unknown register read at address %08X", reg_addr); + // TODO: Return proper error codes + if (reg_addr + size >= 0x420000) { + ERROR_LOG(GPU, "Write address out of range! (address=0x%08x, size=0x%08x)", reg_addr, size); + return; } + // size should be word-aligned + if ((size % 4) != 0) { + ERROR_LOG(GPU, "Invalid size 0x%08x", size); + return; + } + + u32* src = (u32*)Memory::GetPointer(cmd_buff[0x4]); + + while (size > 0) { + GPU::Write(reg_addr + 0x1EB00000, *src); + + size -= 4; + ++src; + reg_addr += 4; + } +} + +/// Read a GSP GPU hardware register +void ReadHWRegs(Service::Interface* self) { + u32* cmd_buff = Service::GetCommandBuffer(); + u32 reg_addr = cmd_buff[1]; + u32 size = cmd_buff[2]; + + // TODO: Return proper error codes + if (reg_addr + size >= 0x420000) { + ERROR_LOG(GPU, "Read address out of range! (address=0x%08x, size=0x%08x)", reg_addr, size); + return; + } + + // size should be word-aligned + if ((size % 4) != 0) { + ERROR_LOG(GPU, "Invalid size 0x%08x", size); + return; + } + + u32* dst = (u32*)Memory::GetPointer(cmd_buff[0x41]); + + while (size > 0) { + GPU::Read(*dst, reg_addr + 0x1EB00000); + + size -= 4; + ++dst; + reg_addr += 4; + } } /** @@ -179,7 +198,7 @@ void TriggerCmdReqQueue(Service::Interface* self) { } const Interface::FunctionInfo FunctionTable[] = { - {0x00010082, nullptr, "WriteHWRegs"}, + {0x00010082, WriteHWRegs, "WriteHWRegs"}, {0x00020084, nullptr, "WriteHWRegsWithMask"}, {0x00030082, nullptr, "WriteHWRegRepeat"}, {0x00040080, ReadHWRegs, "ReadHWRegs"}, From ec9511e1db1f7ff0c2a8f86916937ea5736cdcf6 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Sun, 1 Jun 2014 00:22:40 +0200 Subject: [PATCH 04/22] GSP: HLE GXCommandId::SET_DISPLAY_TRANSFER and GXCommandId::SET_TEXTURE_COPY. --- src/core/hle/service/gsp.cpp | 11 +++++++++-- src/core/hw/gpu.h | 8 ++++++++ 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/src/core/hle/service/gsp.cpp b/src/core/hle/service/gsp.cpp index cc111b0bbe..fea5218913 100644 --- a/src/core/hle/service/gsp.cpp +++ b/src/core/hle/service/gsp.cpp @@ -176,10 +176,17 @@ void TriggerCmdReqQueue(Service::Interface* self) { case GXCommandId::SET_MEMORY_FILL: break; + // TODO: Check if texture copies are implemented correctly.. case GXCommandId::SET_DISPLAY_TRANSFER: - break; - case GXCommandId::SET_TEXTURE_COPY: + GPU::Write(GPU::Registers::DisplayInputBufferAddr, cmd_buff[1] >> 3); + GPU::Write(GPU::Registers::DisplayOutputBufferAddr, cmd_buff[2] >> 3); + GPU::Write(GPU::Registers::DisplayInputBufferSize, cmd_buff[3]); + GPU::Write(GPU::Registers::DisplayOutputBufferSize, cmd_buff[4]); + GPU::Write(GPU::Registers::DisplayTransferFlags, cmd_buff[5]); + + // TODO: GPU::Registers::DisplayTriggerTransfer should be ORed with 1 for texture copies? + GPU::Write(GPU::Registers::DisplayTriggerTransfer, 1); break; case GXCommandId::SET_COMMAND_LIST_FIRST: diff --git a/src/core/hw/gpu.h b/src/core/hw/gpu.h index 0c7dffec3a..58058d7323 100644 --- a/src/core/hw/gpu.h +++ b/src/core/hw/gpu.h @@ -22,6 +22,14 @@ struct Registers { FramebufferSubRight1 = 0x1EF00594, // Sub LCD, unused first framebuffer FramebufferSubRight2 = 0x1EF00598, // Sub LCD, unused second framebuffer + DisplayInputBufferAddr = 0x1EF00C00, + DisplayOutputBufferAddr = 0x1EF00C04, + DisplayOutputBufferSize = 0x1EF00C08, + DisplayInputBufferSize = 0x1EF00C0C, + DisplayTransferFlags = 0x1EF00C10, + // Unknown?? + DisplayTriggerTransfer = 0x1EF00C18, + CommandListSize = 0x1EF018E0, CommandListAddress = 0x1EF018E8, ProcessCommandList = 0x1EF018F0, From 16bbc4f81b89462ff1c9e9364e0ca7ee1289c3b3 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Sun, 1 Jun 2014 00:08:00 +0200 Subject: [PATCH 05/22] GPU: Add display transfer configuration. --- src/core/hw/gpu.cpp | 52 +++++++++++++++++++++++++++++++++++++++++++++ src/core/hw/gpu.h | 40 ++++++++++++++++++++++++++++++++++ 2 files changed, 92 insertions(+) diff --git a/src/core/hw/gpu.cpp b/src/core/hw/gpu.cpp index f0ca4eadad..a400338b57 100644 --- a/src/core/hw/gpu.cpp +++ b/src/core/hw/gpu.cpp @@ -108,6 +108,31 @@ inline void Read(T &var, const u32 addr) { var = g_regs.framebuffer_sub_right_1; break; + case Registers::DisplayInputBufferAddr: + var = g_regs.display_transfer.input_address; + break; + + case Registers::DisplayOutputBufferAddr: + var = g_regs.display_transfer.output_address; + break; + + case Registers::DisplayOutputBufferSize: + var = g_regs.display_transfer.output_size; + break; + + case Registers::DisplayInputBufferSize: + var = g_regs.display_transfer.input_size; + break; + + case Registers::DisplayTransferFlags: + var = g_regs.display_transfer.flags; + break; + + // Not sure if this is supposed to be readable + case Registers::DisplayTriggerTransfer: + var = g_regs.display_transfer.trigger; + break; + case Registers::CommandListSize: var = g_regs.command_list_size; break; @@ -129,6 +154,33 @@ inline void Read(T &var, const u32 addr) { template inline void Write(u32 addr, const T data) { switch (static_cast(addr)) { + case Registers::DisplayInputBufferAddr: + g_regs.display_transfer.input_address = data; + break; + + case Registers::DisplayOutputBufferAddr: + g_regs.display_transfer.output_address = data; + break; + + case Registers::DisplayOutputBufferSize: + g_regs.display_transfer.output_size = data; + break; + + case Registers::DisplayInputBufferSize: + g_regs.display_transfer.input_size = data; + break; + + case Registers::DisplayTransferFlags: + g_regs.display_transfer.flags = data; + break; + + case Registers::DisplayTriggerTransfer: + g_regs.display_transfer.trigger = data; + if (g_regs.display_transfer.trigger & 1) { + // TODO: Perform display transfer! + } + break; + case Registers::CommandListSize: g_regs.command_list_size = data; break; diff --git a/src/core/hw/gpu.h b/src/core/hw/gpu.h index 58058d7323..29eb7ed81c 100644 --- a/src/core/hw/gpu.h +++ b/src/core/hw/gpu.h @@ -5,6 +5,7 @@ #pragma once #include "common/common_types.h" +#include "common/bit_field.h" namespace GPU { @@ -44,6 +45,45 @@ struct Registers { u32 framebuffer_sub_right_1; u32 framebuffer_sub_right_2; + struct { + u32 input_address; + u32 output_address; + + inline u32 GetPhysicalInputAddress() const { + return input_address * 8; + } + + inline u32 GetPhysicalOutputAddress() const { + return output_address * 8; + } + + union { + u32 output_size; + + BitField< 0, 16, u32> output_width; + BitField<16, 16, u32> output_height; + }; + + union { + u32 input_size; + + BitField< 0, 16, u32> input_width; + BitField<16, 16, u32> input_height; + }; + + union { + u32 flags; + + BitField< 0, 1, u32> flip_data; + BitField< 8, 3, u32> input_format; + BitField<12, 3, u32> output_format; + BitField<16, 1, u32> output_tiled; + }; + + u32 unknown; + u32 trigger; + } display_transfer; + u32 command_list_size; u32 command_list_address; u32 command_processing_enabled; From bbc6f314eb56ab1cf0a4b800750130de515cdd0f Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Fri, 11 Jul 2014 19:01:14 +0200 Subject: [PATCH 06/22] GPU: Properly implement display transfers. --- src/core/hw/gpu.cpp | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/src/core/hw/gpu.cpp b/src/core/hw/gpu.cpp index a400338b57..e05e1b023c 100644 --- a/src/core/hw/gpu.cpp +++ b/src/core/hw/gpu.cpp @@ -177,7 +177,25 @@ inline void Write(u32 addr, const T data) { case Registers::DisplayTriggerTransfer: g_regs.display_transfer.trigger = data; if (g_regs.display_transfer.trigger & 1) { - // TODO: Perform display transfer! + u8* source_pointer = Memory::GetPointer(g_regs.display_transfer.GetPhysicalInputAddress()); + u8* dest_pointer = Memory::GetPointer(g_regs.display_transfer.GetPhysicalOutputAddress()); + + + // TODO: Perform display transfer correctly! + for (int y = 0; y < g_regs.display_transfer.output_height; ++y) { + // TODO: Copy size is just guesswork! + memcpy(dest_pointer + y * g_regs.display_transfer.output_width * 4, + source_pointer + y * g_regs.display_transfer.input_width * 4, + g_regs.display_transfer.output_width * 4); + } + + // Clear previous contents until we implement proper buffer clearing + memset(source_pointer, 0x20, g_regs.display_transfer.input_width*g_regs.display_transfer.input_height*4); + DEBUG_LOG(GPU, "DisplayTriggerTransfer: %x bytes from %x(%xx%x)-> %x(%xx%x), dst format %x", + g_regs.display_transfer.output_height * g_regs.display_transfer.output_width * 4, + g_regs.display_transfer.GetPhysicalInputAddress(), (int)g_regs.display_transfer.input_width, (int)g_regs.display_transfer.input_height, + g_regs.display_transfer.GetPhysicalOutputAddress(), (int)g_regs.display_transfer.output_width, (int)g_regs.display_transfer.output_height, + (int)g_regs.display_transfer.output_format); } break; From 0b4055c1520fbe7f697d2f1f93a85b559504cca4 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Fri, 11 Jul 2014 19:10:08 +0200 Subject: [PATCH 07/22] GPU: Add proper framebuffer register handling. --- src/core/hw/gpu.cpp | 53 +++++++++++++++++++++++++++++++++++++- src/core/hw/gpu.h | 63 ++++++++++++++++++++++++++++++++++++++------- 2 files changed, 105 insertions(+), 11 deletions(-) diff --git a/src/core/hw/gpu.cpp b/src/core/hw/gpu.cpp index e05e1b023c..fad3439c8d 100644 --- a/src/core/hw/gpu.cpp +++ b/src/core/hw/gpu.cpp @@ -84,6 +84,10 @@ const u8* GetFramebufferPointer(const u32 address) { template inline void Read(T &var, const u32 addr) { switch (addr) { + case Registers::FramebufferTopSize: + var = g_regs.top_framebuffer.size; + break; + case Registers::FramebufferTopLeft1: var = g_regs.framebuffer_top_left_1; break; @@ -92,6 +96,18 @@ inline void Read(T &var, const u32 addr) { var = g_regs.framebuffer_top_left_2; break; + case Registers::FramebufferTopFormat: + var = g_regs.top_framebuffer.format; + break; + + case Registers::FramebufferTopSwapBuffers: + var = g_regs.top_framebuffer.active_fb; + break; + + case Registers::FramebufferTopStride: + var = g_regs.top_framebuffer.stride; + break; + case Registers::FramebufferTopRight1: var = g_regs.framebuffer_top_right_1; break; @@ -100,6 +116,10 @@ inline void Read(T &var, const u32 addr) { var = g_regs.framebuffer_top_right_2; break; + case Registers::FramebufferSubSize: + var = g_regs.sub_framebuffer.size; + break; + case Registers::FramebufferSubLeft1: var = g_regs.framebuffer_sub_left_1; break; @@ -108,6 +128,26 @@ inline void Read(T &var, const u32 addr) { var = g_regs.framebuffer_sub_right_1; break; + case Registers::FramebufferSubFormat: + var = g_regs.sub_framebuffer.format; + break; + + case Registers::FramebufferSubSwapBuffers: + var = g_regs.sub_framebuffer.active_fb; + break; + + case Registers::FramebufferSubStride: + var = g_regs.sub_framebuffer.stride; + break; + + case Registers::FramebufferSubLeft2: + var = g_regs.framebuffer_sub_left_2; + break; + + case Registers::FramebufferSubRight2: + var = g_regs.framebuffer_sub_right_2; + break; + case Registers::DisplayInputBufferAddr: var = g_regs.display_transfer.input_address; break; @@ -154,6 +194,17 @@ inline void Read(T &var, const u32 addr) { template inline void Write(u32 addr, const T data) { switch (static_cast(addr)) { + // TODO: Framebuffer registers!! + case Registers::FramebufferTopSwapBuffers: + g_regs.top_framebuffer.active_fb = data; + // TODO: Not sure if this should only be done upon a change! + break; + + case Registers::FramebufferSubSwapBuffers: + g_regs.sub_framebuffer.active_fb = data; + // TODO: Not sure if this should only be done upon a change! + break; + case Registers::DisplayInputBufferAddr: g_regs.display_transfer.input_address = data; break; @@ -195,7 +246,7 @@ inline void Write(u32 addr, const T data) { g_regs.display_transfer.output_height * g_regs.display_transfer.output_width * 4, g_regs.display_transfer.GetPhysicalInputAddress(), (int)g_regs.display_transfer.input_width, (int)g_regs.display_transfer.input_height, g_regs.display_transfer.GetPhysicalOutputAddress(), (int)g_regs.display_transfer.output_width, (int)g_regs.display_transfer.output_height, - (int)g_regs.display_transfer.output_format); + (int)g_regs.display_transfer.output_format.Value()); } break; diff --git a/src/core/hw/gpu.h b/src/core/hw/gpu.h index 29eb7ed81c..50c3608145 100644 --- a/src/core/hw/gpu.h +++ b/src/core/hw/gpu.h @@ -14,14 +14,23 @@ static const u32 kFrameTicks = kFrameCycles / 3; ///< Approximate number of i struct Registers { enum Id : u32 { - FramebufferTopLeft1 = 0x1EF00468, // Main LCD, first framebuffer for 3D left - FramebufferTopLeft2 = 0x1EF0046C, // Main LCD, second framebuffer for 3D left - FramebufferTopRight1 = 0x1EF00494, // Main LCD, first framebuffer for 3D right - FramebufferTopRight2 = 0x1EF00498, // Main LCD, second framebuffer for 3D right - FramebufferSubLeft1 = 0x1EF00568, // Sub LCD, first framebuffer - FramebufferSubLeft2 = 0x1EF0056C, // Sub LCD, second framebuffer - FramebufferSubRight1 = 0x1EF00594, // Sub LCD, unused first framebuffer - FramebufferSubRight2 = 0x1EF00598, // Sub LCD, unused second framebuffer + FramebufferTopSize = 0x1EF0045C, + FramebufferTopLeft1 = 0x1EF00468, // Main LCD, first framebuffer for 3D left + FramebufferTopLeft2 = 0x1EF0046C, // Main LCD, second framebuffer for 3D left + FramebufferTopFormat = 0x1EF00470, + FramebufferTopSwapBuffers = 0x1EF00478, + FramebufferTopStride = 0x1EF00490, // framebuffer row stride? + FramebufferTopRight1 = 0x1EF00494, // Main LCD, first framebuffer for 3D right + FramebufferTopRight2 = 0x1EF00498, // Main LCD, second framebuffer for 3D right + + FramebufferSubSize = 0x1EF0055C, + FramebufferSubLeft1 = 0x1EF00568, // Sub LCD, first framebuffer + FramebufferSubLeft2 = 0x1EF0056C, // Sub LCD, second framebuffer + FramebufferSubFormat = 0x1EF00570, + FramebufferSubSwapBuffers = 0x1EF00578, + FramebufferSubStride = 0x1EF00590, // framebuffer row stride? + FramebufferSubRight1 = 0x1EF00594, // Sub LCD, unused first framebuffer + FramebufferSubRight2 = 0x1EF00598, // Sub LCD, unused second framebuffer DisplayInputBufferAddr = 0x1EF00C00, DisplayOutputBufferAddr = 0x1EF00C04, @@ -36,6 +45,15 @@ struct Registers { ProcessCommandList = 0x1EF018F0, }; + enum class FramebufferFormat : u32 { + RGBA8 = 0, + RGB8 = 1, + RGB565 = 2, + RGB5A1 = 3, + RGBA4 = 4, + }; + + // TODO: Move these into the framebuffer struct u32 framebuffer_top_left_1; u32 framebuffer_top_left_2; u32 framebuffer_top_right_1; @@ -45,6 +63,31 @@ struct Registers { u32 framebuffer_sub_right_1; u32 framebuffer_sub_right_2; + struct FrameBufferConfig { + union { + u32 size; + + BitField< 0, 16, u32> width; + BitField<16, 16, u32> height; + }; + + union { + u32 format; + + BitField< 0, 3, FramebufferFormat> color_format; + }; + + union { + u32 active_fb; + + BitField<0, 1, u32> second_fb_active; + }; + + u32 stride; + }; + FrameBufferConfig top_framebuffer; + FrameBufferConfig sub_framebuffer; + struct { u32 input_address; u32 output_address; @@ -75,8 +118,8 @@ struct Registers { u32 flags; BitField< 0, 1, u32> flip_data; - BitField< 8, 3, u32> input_format; - BitField<12, 3, u32> output_format; + BitField< 8, 3, FramebufferFormat> input_format; + BitField<12, 3, FramebufferFormat> output_format; BitField<16, 1, u32> output_tiled; }; From baf0aa04f50dff257b57fa78786e53b97c1e6abb Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Wed, 4 Jun 2014 18:30:23 +0200 Subject: [PATCH 08/22] GPU: Emulate memory fills. --- src/core/hle/service/gsp.cpp | 8 ++++++ src/core/hle/service/gsp.h | 2 +- src/core/hw/gpu.cpp | 56 ++++++++++++++++++++++++++++++++++-- src/core/hw/gpu.h | 26 +++++++++++++++++ 4 files changed, 89 insertions(+), 3 deletions(-) diff --git a/src/core/hle/service/gsp.cpp b/src/core/hle/service/gsp.cpp index fea5218913..5baa7a7a2b 100644 --- a/src/core/hle/service/gsp.cpp +++ b/src/core/hle/service/gsp.cpp @@ -174,6 +174,14 @@ void TriggerCmdReqQueue(Service::Interface* self) { break; case GXCommandId::SET_MEMORY_FILL: + GPU::Write(GPU::Registers::MemoryFillStart1, cmd_buff[1] >> 3); + GPU::Write(GPU::Registers::MemoryFillEnd1, cmd_buff[3] >> 3); + GPU::Write(GPU::Registers::MemoryFillSize1, cmd_buff[3] - cmd_buff[1]); + GPU::Write(GPU::Registers::MemoryFillValue1, cmd_buff[2]); + GPU::Write(GPU::Registers::MemoryFillStart2, cmd_buff[4] >> 3); + GPU::Write(GPU::Registers::MemoryFillEnd2, cmd_buff[6] >> 3); + GPU::Write(GPU::Registers::MemoryFillSize2, cmd_buff[6] - cmd_buff[4]); + GPU::Write(GPU::Registers::MemoryFillValue2, cmd_buff[5]); break; // TODO: Check if texture copies are implemented correctly.. diff --git a/src/core/hle/service/gsp.h b/src/core/hle/service/gsp.h index 214de140f9..fb50a928ae 100644 --- a/src/core/hle/service/gsp.h +++ b/src/core/hle/service/gsp.h @@ -14,7 +14,7 @@ namespace GSP_GPU { enum class GXCommandId : u32 { REQUEST_DMA = 0x00000000, SET_COMMAND_LIST_LAST = 0x00000001, - SET_MEMORY_FILL = 0x00000002, // TODO: Confirm? (lictru uses 0x01000102) + SET_MEMORY_FILL = 0x01000102, // TODO: Confirm? SET_DISPLAY_TRANSFER = 0x00000003, SET_TEXTURE_COPY = 0x00000004, SET_COMMAND_LIST_FIRST = 0x00000005, diff --git a/src/core/hw/gpu.cpp b/src/core/hw/gpu.cpp index fad3439c8d..230a12d469 100644 --- a/src/core/hw/gpu.cpp +++ b/src/core/hw/gpu.cpp @@ -84,6 +84,26 @@ const u8* GetFramebufferPointer(const u32 address) { template inline void Read(T &var, const u32 addr) { switch (addr) { + case Registers::MemoryFillStart1: + case Registers::MemoryFillStart2: + var = g_regs.memory_fill[(addr - Registers::MemoryFillStart1) / 0x10].address_start; + break; + + case Registers::MemoryFillEnd1: + case Registers::MemoryFillEnd2: + var = g_regs.memory_fill[(addr - Registers::MemoryFillEnd1) / 0x10].address_end; + break; + + case Registers::MemoryFillSize1: + case Registers::MemoryFillSize2: + var = g_regs.memory_fill[(addr - Registers::MemoryFillSize1) / 0x10].size; + break; + + case Registers::MemoryFillValue1: + case Registers::MemoryFillValue2: + var = g_regs.memory_fill[(addr - Registers::MemoryFillValue1) / 0x10].value; + break; + case Registers::FramebufferTopSize: var = g_regs.top_framebuffer.size; break; @@ -194,6 +214,40 @@ inline void Read(T &var, const u32 addr) { template inline void Write(u32 addr, const T data) { switch (static_cast(addr)) { + case Registers::MemoryFillStart1: + case Registers::MemoryFillStart2: + g_regs.memory_fill[(addr - Registers::MemoryFillStart1) / 0x10].address_start = data; + break; + + case Registers::MemoryFillEnd1: + case Registers::MemoryFillEnd2: + g_regs.memory_fill[(addr - Registers::MemoryFillEnd1) / 0x10].address_end = data; + break; + + case Registers::MemoryFillSize1: + case Registers::MemoryFillSize2: + g_regs.memory_fill[(addr - Registers::MemoryFillSize1) / 0x10].size = data; + break; + + case Registers::MemoryFillValue1: + case Registers::MemoryFillValue2: + { + Registers::MemoryFillConfig& config = g_regs.memory_fill[(addr - Registers::MemoryFillValue1) / 0x10]; + config.value = data; + + // TODO: Not sure if this check should be done at GSP level instead + if (config.address_start) { + // TODO: Not sure if this algorithm is correct, particularly because it doesn't use the size member at all + u32* start = (u32*)Memory::GetPointer(config.GetStartAddress()); + u32* end = (u32*)Memory::GetPointer(config.GetEndAddress()); + for (u32* ptr = start; ptr < end; ++ptr) + *ptr = bswap32(config.value); // TODO: This is just a workaround to missing framebuffer format emulation + + DEBUG_LOG(GPU, "MemoryFill from %x to %x", config.GetStartAddress(), config.GetEndAddress()); + } + break; + } + // TODO: Framebuffer registers!! case Registers::FramebufferTopSwapBuffers: g_regs.top_framebuffer.active_fb = data; @@ -240,8 +294,6 @@ inline void Write(u32 addr, const T data) { g_regs.display_transfer.output_width * 4); } - // Clear previous contents until we implement proper buffer clearing - memset(source_pointer, 0x20, g_regs.display_transfer.input_width*g_regs.display_transfer.input_height*4); DEBUG_LOG(GPU, "DisplayTriggerTransfer: %x bytes from %x(%xx%x)-> %x(%xx%x), dst format %x", g_regs.display_transfer.output_height * g_regs.display_transfer.output_width * 4, g_regs.display_transfer.GetPhysicalInputAddress(), (int)g_regs.display_transfer.input_width, (int)g_regs.display_transfer.input_height, diff --git a/src/core/hw/gpu.h b/src/core/hw/gpu.h index 50c3608145..47d7fcb263 100644 --- a/src/core/hw/gpu.h +++ b/src/core/hw/gpu.h @@ -14,6 +14,15 @@ static const u32 kFrameTicks = kFrameCycles / 3; ///< Approximate number of i struct Registers { enum Id : u32 { + MemoryFillStart1 = 0x1EF00010, + MemoryFillEnd1 = 0x1EF00014, + MemoryFillSize1 = 0x1EF00018, + MemoryFillValue1 = 0x1EF0001C, + MemoryFillStart2 = 0x1EF00020, + MemoryFillEnd2 = 0x1EF00024, + MemoryFillSize2 = 0x1EF00028, + MemoryFillValue2 = 0x1EF0002C, + FramebufferTopSize = 0x1EF0045C, FramebufferTopLeft1 = 0x1EF00468, // Main LCD, first framebuffer for 3D left FramebufferTopLeft2 = 0x1EF0046C, // Main LCD, second framebuffer for 3D left @@ -53,6 +62,23 @@ struct Registers { RGBA4 = 4, }; + struct MemoryFillConfig { + u32 address_start; + u32 address_end; // ? + u32 size; + u32 value; // ? + + inline u32 GetStartAddress() const { + return address_start * 8; + } + + inline u32 GetEndAddress() const { + return address_end * 8; + } + }; + + MemoryFillConfig memory_fill[2]; + // TODO: Move these into the framebuffer struct u32 framebuffer_top_left_1; u32 framebuffer_top_left_2; From 46950ee4de0b1f2c30c26467b60e38c6a38d19b8 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Fri, 11 Jul 2014 19:14:15 +0200 Subject: [PATCH 09/22] GPU: Initialize GPU registers to some sensible default state. --- src/core/hw/gpu.cpp | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/src/core/hw/gpu.cpp b/src/core/hw/gpu.cpp index 230a12d469..0ee6b7c3b7 100644 --- a/src/core/hw/gpu.cpp +++ b/src/core/hw/gpu.cpp @@ -353,7 +353,22 @@ void Update() { /// Initialize hardware void Init() { g_last_ticks = Core::g_app_core->GetTicks(); - SetFramebufferLocation(FRAMEBUFFER_LOCATION_FCRAM); +// SetFramebufferLocation(FRAMEBUFFER_LOCATION_FCRAM); + SetFramebufferLocation(FRAMEBUFFER_LOCATION_VRAM); + + // TODO: Width should be 240 instead? + g_regs.top_framebuffer.width = 480; + g_regs.top_framebuffer.height = 400; + g_regs.top_framebuffer.stride = 480*3; + g_regs.top_framebuffer.color_format = Registers::FramebufferFormat::RGB8; + g_regs.top_framebuffer.active_fb = 0; + + g_regs.sub_framebuffer.width = 480; + g_regs.sub_framebuffer.height = 400; + g_regs.sub_framebuffer.stride = 480*3; + g_regs.sub_framebuffer.color_format = Registers::FramebufferFormat::RGB8; + g_regs.sub_framebuffer.active_fb = 0; + NOTICE_LOG(GPU, "initialized OK"); } From 9d618d0b705e3b8de5594512a555f469631e274b Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Fri, 11 Jul 2014 19:29:12 +0200 Subject: [PATCH 10/22] GPU: Interface cleanup. --- src/core/hw/gpu.cpp | 29 +++++++++++++++++------------ src/core/hw/gpu.h | 4 +++- 2 files changed, 20 insertions(+), 13 deletions(-) diff --git a/src/core/hw/gpu.cpp b/src/core/hw/gpu.cpp index 0ee6b7c3b7..49fc574bc5 100644 --- a/src/core/hw/gpu.cpp +++ b/src/core/hw/gpu.cpp @@ -53,10 +53,10 @@ void SetFramebufferLocation(const FramebufferLocation mode) { * Gets the location of the framebuffers * @return Location of framebuffers as FramebufferLocation enum */ -const FramebufferLocation GetFramebufferLocation() { - if ((g_regs.framebuffer_top_right_1 & ~Memory::VRAM_MASK) == Memory::VRAM_PADDR) { +FramebufferLocation GetFramebufferLocation(u32 address) { + if ((address & ~Memory::VRAM_MASK) == Memory::VRAM_PADDR) { return FRAMEBUFFER_LOCATION_VRAM; - } else if ((g_regs.framebuffer_top_right_1 & ~Memory::FCRAM_MASK) == Memory::FCRAM_PADDR) { + } else if ((address & ~Memory::FCRAM_MASK) == Memory::FCRAM_PADDR) { return FRAMEBUFFER_LOCATION_FCRAM; } else { ERROR_LOG(GPU, "unknown framebuffer location!"); @@ -64,21 +64,26 @@ const FramebufferLocation GetFramebufferLocation() { return FRAMEBUFFER_LOCATION_UNKNOWN; } +u32 GetFramebufferAddr(const u32 address) { + switch (GetFramebufferLocation(address)) { + case FRAMEBUFFER_LOCATION_FCRAM: + return Memory::VirtualAddressFromPhysical_FCRAM(address); + case FRAMEBUFFER_LOCATION_VRAM: + return Memory::VirtualAddressFromPhysical_VRAM(address); + default: + ERROR_LOG(GPU, "unknown framebuffer location"); + } + return 0; +} + /** * Gets a read-only pointer to a framebuffer in memory * @param address Physical address of framebuffer * @return Returns const pointer to raw framebuffer */ const u8* GetFramebufferPointer(const u32 address) { - switch (GetFramebufferLocation()) { - case FRAMEBUFFER_LOCATION_FCRAM: - return (const u8*)Memory::GetPointer(Memory::VirtualAddressFromPhysical_FCRAM(address)); - case FRAMEBUFFER_LOCATION_VRAM: - return (const u8*)Memory::GetPointer(Memory::VirtualAddressFromPhysical_VRAM(address)); - default: - ERROR_LOG(GPU, "unknown framebuffer location"); - } - return NULL; + u32 addr = GetFramebufferAddr(address); + return (addr != 0) ? Memory::GetPointer(addr) : nullptr; } template diff --git a/src/core/hw/gpu.h b/src/core/hw/gpu.h index 47d7fcb263..b66cf4a377 100644 --- a/src/core/hw/gpu.h +++ b/src/core/hw/gpu.h @@ -219,10 +219,12 @@ void SetFramebufferLocation(const FramebufferLocation mode); */ const u8* GetFramebufferPointer(const u32 address); +u32 GetFramebufferAddr(const u32 address); + /** * Gets the location of the framebuffers */ -const FramebufferLocation GetFramebufferLocation(); +FramebufferLocation GetFramebufferLocation(u32 address); template inline void Read(T &var, const u32 addr); From 9b96407e8e4879663e0678e22df569b9193397b5 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Wed, 16 Jul 2014 09:19:42 +0200 Subject: [PATCH 11/22] Renderer: Add a few TODOs. --- src/video_core/renderer_opengl/renderer_opengl.cpp | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 70af47c597..064f47e3be 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -61,10 +61,11 @@ void RendererOpenGL::FlipFramebuffer(const u8* in, u8* out) { int in_coord = 0; for (int x = 0; x < VideoCore::kScreenTopWidth; x++) { for (int y = VideoCore::kScreenTopHeight-1; y >= 0; y--) { + // TODO: Properly support other framebuffer formats int out_coord = (x + y * VideoCore::kScreenTopWidth) * 3; - out[out_coord] = in[in_coord]; - out[out_coord + 1] = in[in_coord + 1]; - out[out_coord + 2] = in[in_coord + 2]; + out[out_coord] = in[in_coord]; // blue? + out[out_coord + 1] = in[in_coord + 1]; // green? + out[out_coord + 2] = in[in_coord + 2]; // red? in_coord+=3; } } @@ -77,6 +78,12 @@ void RendererOpenGL::FlipFramebuffer(const u8* in, u8* out) { */ void RendererOpenGL::RenderXFB(const common::Rect& src_rect, const common::Rect& dst_rect) { + DEBUG_LOG(GPU, "RenderXFB: %x bytes from %x(%xx%x), fmt %x", + GPU::g_regs.top_framebuffer.stride * GPU::g_regs.top_framebuffer.height, + GPU::GetFramebufferAddr(GPU::g_regs.framebuffer_top_left_1), (int)GPU::g_regs.top_framebuffer.width, + (int)GPU::g_regs.top_framebuffer.height, (int)GPU::g_regs.top_framebuffer.format); + + // TODO: This should consider the GPU registers for framebuffer width, height and stride. FlipFramebuffer(GPU::GetFramebufferPointer(GPU::g_regs.framebuffer_top_left_1), m_xfb_top_flipped); FlipFramebuffer(GPU::GetFramebufferPointer(GPU::g_regs.framebuffer_sub_left_1), m_xfb_bottom_flipped); From cb6f97b2eb129da599f297a605b669b34bccc8e2 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Wed, 16 Jul 2014 09:22:08 +0200 Subject: [PATCH 12/22] Renderer: Respect the active_fb GPU register. --- src/video_core/renderer_opengl/renderer_opengl.cpp | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 064f47e3be..c549f47440 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -78,14 +78,21 @@ void RendererOpenGL::FlipFramebuffer(const u8* in, u8* out) { */ void RendererOpenGL::RenderXFB(const common::Rect& src_rect, const common::Rect& dst_rect) { + const u32 active_fb_top = (GPU::g_regs.top_framebuffer.active_fb == 1) + ? GPU::g_regs.framebuffer_top_left_2 + : GPU::g_regs.framebuffer_top_left_1; + const u32 active_fb_sub = (GPU::g_regs.sub_framebuffer.active_fb == 1) + ? GPU::g_regs.framebuffer_sub_left_2 + : GPU::g_regs.framebuffer_sub_left_1; + DEBUG_LOG(GPU, "RenderXFB: %x bytes from %x(%xx%x), fmt %x", GPU::g_regs.top_framebuffer.stride * GPU::g_regs.top_framebuffer.height, GPU::GetFramebufferAddr(GPU::g_regs.framebuffer_top_left_1), (int)GPU::g_regs.top_framebuffer.width, (int)GPU::g_regs.top_framebuffer.height, (int)GPU::g_regs.top_framebuffer.format); // TODO: This should consider the GPU registers for framebuffer width, height and stride. - FlipFramebuffer(GPU::GetFramebufferPointer(GPU::g_regs.framebuffer_top_left_1), m_xfb_top_flipped); - FlipFramebuffer(GPU::GetFramebufferPointer(GPU::g_regs.framebuffer_sub_left_1), m_xfb_bottom_flipped); + FlipFramebuffer(GPU::GetFramebufferPointer(active_fb_top), m_xfb_top_flipped); + FlipFramebuffer(GPU::GetFramebufferPointer(active_fb_sub), m_xfb_bottom_flipped); // Blit the top framebuffer // ------------------------ From c6fdeb7b23f72b356b58a4573e51fc4ddf6db9ab Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Fri, 11 Jul 2014 19:40:34 +0200 Subject: [PATCH 13/22] Renderer: Fix component order in bottom framebuffer. --- src/video_core/renderer_opengl/renderer_opengl.cpp | 2 +- src/video_core/renderer_opengl/renderer_opengl.h | 7 +++---- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index c549f47440..047c691850 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -124,7 +124,7 @@ void RendererOpenGL::RenderXFB(const common::Rect& src_rect, const common::Rect& // Update textures with contents of XFB in RAM - bottom glBindTexture(GL_TEXTURE_2D, m_xfb_texture_bottom); glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, VideoCore::kScreenTopWidth, VideoCore::kScreenTopHeight, - GL_RGB, GL_UNSIGNED_BYTE, m_xfb_bottom_flipped); + GL_BGR, GL_UNSIGNED_BYTE, m_xfb_bottom_flipped); glBindTexture(GL_TEXTURE_2D, 0); // Render target is destination framebuffer diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h index dd811cad63..30f4febe05 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.h +++ b/src/video_core/renderer_opengl/renderer_opengl.h @@ -84,7 +84,6 @@ private: // "Flipped" framebuffers translate scanlines from native 3DS left-to-right to top-to-bottom // as OpenGL expects them in a texture. There probably is a more efficient way of doing this: - u8 m_xfb_top_flipped[VideoCore::kScreenTopWidth * VideoCore::kScreenTopWidth * 4]; - u8 m_xfb_bottom_flipped[VideoCore::kScreenTopWidth * VideoCore::kScreenTopWidth * 4]; - -}; \ No newline at end of file + u8 m_xfb_top_flipped[VideoCore::kScreenTopWidth * VideoCore::kScreenTopHeight * 4]; + u8 m_xfb_bottom_flipped[VideoCore::kScreenBottomWidth * VideoCore::kScreenBottomHeight * 4]; +}; From 357d893b2642e91d5c44a7da7ccdcbe837f46b0a Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Fri, 11 Jul 2014 19:48:01 +0200 Subject: [PATCH 14/22] GPU: Make framebuffer code format-aware. --- src/core/hw/gpu.cpp | 53 ++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 47 insertions(+), 6 deletions(-) diff --git a/src/core/hw/gpu.cpp b/src/core/hw/gpu.cpp index 49fc574bc5..31989f4450 100644 --- a/src/core/hw/gpu.cpp +++ b/src/core/hw/gpu.cpp @@ -290,13 +290,54 @@ inline void Write(u32 addr, const T data) { u8* source_pointer = Memory::GetPointer(g_regs.display_transfer.GetPhysicalInputAddress()); u8* dest_pointer = Memory::GetPointer(g_regs.display_transfer.GetPhysicalOutputAddress()); - - // TODO: Perform display transfer correctly! for (int y = 0; y < g_regs.display_transfer.output_height; ++y) { - // TODO: Copy size is just guesswork! - memcpy(dest_pointer + y * g_regs.display_transfer.output_width * 4, - source_pointer + y * g_regs.display_transfer.input_width * 4, - g_regs.display_transfer.output_width * 4); + // TODO: Why does the register seem to hold twice the framebuffer width? + for (int x = 0; x < g_regs.display_transfer.output_width / 2; ++x) { + int source[4] = { 0, 0, 0, 0}; // rgba; + + switch (g_regs.display_transfer.input_format) { + case Registers::FramebufferFormat::RGBA8: + { + // TODO: Most likely got the component order messed up. + u8* srcptr = source_pointer + x * 4 + y * g_regs.display_transfer.input_width * 4 / 2; + source[0] = srcptr[0]; // blue + source[1] = srcptr[1]; // green + source[2] = srcptr[2]; // red + source[3] = srcptr[3]; // alpha + break; + } + + default: + ERROR_LOG(GPU, "Unknown source framebuffer format %x", (int)g_regs.display_transfer.input_format.Value()); + break; + } + + switch (g_regs.display_transfer.output_format) { + /*case Registers::FramebufferFormat::RGBA8: + { + // TODO: Untested + u8* dstptr = (u32*)(dest_pointer + x * 4 + y * g_regs.display_transfer.output_width * 4); + dstptr[0] = source[0]; + dstptr[1] = source[1]; + dstptr[2] = source[2]; + dstptr[3] = source[3]; + break; + }*/ + + case Registers::FramebufferFormat::RGB8: + { + u8* dstptr = dest_pointer + x * 3 + y * g_regs.display_transfer.output_width * 3 / 2; + dstptr[0] = source[0]; // blue + dstptr[1] = source[1]; // green + dstptr[2] = source[2]; // red + break; + } + + default: + ERROR_LOG(GPU, "Unknown destination framebuffer format %x", static_cast(g_regs.display_transfer.output_format.Value())); + break; + } + } } DEBUG_LOG(GPU, "DisplayTriggerTransfer: %x bytes from %x(%xx%x)-> %x(%xx%x), dst format %x", From 75775e9ef41248592cb2c27ae69737e46499e705 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Wed, 16 Jul 2014 11:24:09 +0200 Subject: [PATCH 15/22] GPU: Make use of RegisterSet. --- src/core/hle/service/gsp.cpp | 47 ++- src/core/hw/gpu.cpp | 344 ++++++------------ src/core/hw/gpu.h | 130 +++---- .../renderer_opengl/renderer_opengl.cpp | 54 +-- 4 files changed, 225 insertions(+), 350 deletions(-) diff --git a/src/core/hle/service/gsp.cpp b/src/core/hle/service/gsp.cpp index 5baa7a7a2b..053c7dd2ca 100644 --- a/src/core/hle/service/gsp.cpp +++ b/src/core/hle/service/gsp.cpp @@ -139,8 +139,8 @@ void RegisterInterruptRelayQueue(Service::Interface* self) { Kernel::SetEventLocked(g_event, false); - // Hack - This function will permanently set the state of the GSP event such that GPU command - // synchronization barriers always passthrough. Correct solution would be to set this after the + // Hack - This function will permanently set the state of the GSP event such that GPU command + // synchronization barriers always passthrough. Correct solution would be to set this after the // GPU as processed all queued up commands, but due to the emulator being single-threaded they // will always be ready. Kernel::SetPermanentLock(g_event, true); @@ -153,6 +153,12 @@ void RegisterInterruptRelayQueue(Service::Interface* self) { /// This triggers handling of the GX command written to the command buffer in shared memory. void TriggerCmdReqQueue(Service::Interface* self) { + + // Utility function to convert register ID to address + auto WriteGPURegister = [](u32 id, u32 data) { + GPU::Write(0x1EF00000 + 4 * id, data); + }; + GX_CmdBufferHeader* header = (GX_CmdBufferHeader*)GX_GetCmdBufferPointer(g_thread_id); u32* cmd_buff = (u32*)GX_GetCmdBufferPointer(g_thread_id, 0x20 + (header->index * 0x20)); @@ -164,9 +170,9 @@ void TriggerCmdReqQueue(Service::Interface* self) { break; case GXCommandId::SET_COMMAND_LIST_LAST: - GPU::Write(GPU::Registers::CommandListAddress, cmd_buff[1] >> 3); - GPU::Write(GPU::Registers::CommandListSize, cmd_buff[2] >> 3); - GPU::Write(GPU::Registers::ProcessCommandList, 1); // TODO: Not sure if we are supposed to always write this + WriteGPURegister(GPU::Regs::CommandProcessor + 2, cmd_buff[1] >> 3); // command list data address + WriteGPURegister(GPU::Regs::CommandProcessor, cmd_buff[2] >> 3); // command list address + WriteGPURegister(GPU::Regs::CommandProcessor + 4, 1); // TODO: Not sure if we are supposed to always write this .. seems to trigger processing though // TODO: Move this to GPU // TODO: Not sure what units the size is measured in @@ -174,27 +180,28 @@ void TriggerCmdReqQueue(Service::Interface* self) { break; case GXCommandId::SET_MEMORY_FILL: - GPU::Write(GPU::Registers::MemoryFillStart1, cmd_buff[1] >> 3); - GPU::Write(GPU::Registers::MemoryFillEnd1, cmd_buff[3] >> 3); - GPU::Write(GPU::Registers::MemoryFillSize1, cmd_buff[3] - cmd_buff[1]); - GPU::Write(GPU::Registers::MemoryFillValue1, cmd_buff[2]); - GPU::Write(GPU::Registers::MemoryFillStart2, cmd_buff[4] >> 3); - GPU::Write(GPU::Registers::MemoryFillEnd2, cmd_buff[6] >> 3); - GPU::Write(GPU::Registers::MemoryFillSize2, cmd_buff[6] - cmd_buff[4]); - GPU::Write(GPU::Registers::MemoryFillValue2, cmd_buff[5]); + WriteGPURegister(GPU::Regs::MemoryFill, cmd_buff[1] >> 3); // Start 1 + WriteGPURegister(GPU::Regs::MemoryFill + 1, cmd_buff[3] >> 3); // End 1 + WriteGPURegister(GPU::Regs::MemoryFill + 2, cmd_buff[3] - cmd_buff[1]); // Size 1 + WriteGPURegister(GPU::Regs::MemoryFill + 3, cmd_buff[2]); // Value 1 + + WriteGPURegister(GPU::Regs::MemoryFill + 4, cmd_buff[4] >> 3); // Start 2 + WriteGPURegister(GPU::Regs::MemoryFill + 5, cmd_buff[6] >> 3); // End 2 + WriteGPURegister(GPU::Regs::MemoryFill + 6, cmd_buff[6] - cmd_buff[4]); // Size 2 + WriteGPURegister(GPU::Regs::MemoryFill + 7, cmd_buff[5]); // Value 2 break; // TODO: Check if texture copies are implemented correctly.. case GXCommandId::SET_DISPLAY_TRANSFER: case GXCommandId::SET_TEXTURE_COPY: - GPU::Write(GPU::Registers::DisplayInputBufferAddr, cmd_buff[1] >> 3); - GPU::Write(GPU::Registers::DisplayOutputBufferAddr, cmd_buff[2] >> 3); - GPU::Write(GPU::Registers::DisplayInputBufferSize, cmd_buff[3]); - GPU::Write(GPU::Registers::DisplayOutputBufferSize, cmd_buff[4]); - GPU::Write(GPU::Registers::DisplayTransferFlags, cmd_buff[5]); + WriteGPURegister(GPU::Regs::DisplayTransfer, cmd_buff[1] >> 3); // input buffer address + WriteGPURegister(GPU::Regs::DisplayTransfer + 1, cmd_buff[2] >> 3); // output buffer address + WriteGPURegister(GPU::Regs::DisplayTransfer + 3, cmd_buff[3]); // input buffer size + WriteGPURegister(GPU::Regs::DisplayTransfer + 2, cmd_buff[4]); // output buffer size + WriteGPURegister(GPU::Regs::DisplayTransfer + 4, cmd_buff[5]); // transfer flags - // TODO: GPU::Registers::DisplayTriggerTransfer should be ORed with 1 for texture copies? - GPU::Write(GPU::Registers::DisplayTriggerTransfer, 1); + // TODO: Should this only be ORed with 1 for texture copies? + WriteGPURegister(GPU::Regs::DisplayTransfer + 6, 1); // trigger transfer break; case GXCommandId::SET_COMMAND_LIST_FIRST: diff --git a/src/core/hw/gpu.cpp b/src/core/hw/gpu.cpp index 31989f4450..372e4f4cc9 100644 --- a/src/core/hw/gpu.cpp +++ b/src/core/hw/gpu.cpp @@ -15,38 +15,48 @@ namespace GPU { -Registers g_regs; +RegisterSet g_regs; u64 g_last_ticks = 0; ///< Last CPU ticks /** * Sets whether the framebuffers are in the GSP heap (FCRAM) or VRAM - * @param + * @param */ void SetFramebufferLocation(const FramebufferLocation mode) { switch (mode) { case FRAMEBUFFER_LOCATION_FCRAM: - g_regs.framebuffer_top_left_1 = PADDR_TOP_LEFT_FRAME1; - g_regs.framebuffer_top_left_2 = PADDR_TOP_LEFT_FRAME2; - g_regs.framebuffer_top_right_1 = PADDR_TOP_RIGHT_FRAME1; - g_regs.framebuffer_top_right_2 = PADDR_TOP_RIGHT_FRAME2; - g_regs.framebuffer_sub_left_1 = PADDR_SUB_FRAME1; - //g_regs.framebuffer_sub_left_2 = unknown; - g_regs.framebuffer_sub_right_1 = PADDR_SUB_FRAME2; - //g_regs.framebufferr_sub_right_2 = unknown; + { + auto& framebuffer_top = g_regs.Get(); + auto& framebuffer_sub = g_regs.Get(); + + framebuffer_top.data.address_left1 = PADDR_TOP_LEFT_FRAME1; + framebuffer_top.data.address_left2 = PADDR_TOP_LEFT_FRAME2; + framebuffer_top.data.address_right1 = PADDR_TOP_RIGHT_FRAME1; + framebuffer_top.data.address_right2 = PADDR_TOP_RIGHT_FRAME2; + framebuffer_sub.data.address_left1 = PADDR_SUB_FRAME1; + //framebuffer_sub.data.address_left2 = unknown; + framebuffer_sub.data.address_right1 = PADDR_SUB_FRAME2; + //framebuffer_sub.data.address_right2 = unknown; break; + } case FRAMEBUFFER_LOCATION_VRAM: - g_regs.framebuffer_top_left_1 = PADDR_VRAM_TOP_LEFT_FRAME1; - g_regs.framebuffer_top_left_2 = PADDR_VRAM_TOP_LEFT_FRAME2; - g_regs.framebuffer_top_right_1 = PADDR_VRAM_TOP_RIGHT_FRAME1; - g_regs.framebuffer_top_right_2 = PADDR_VRAM_TOP_RIGHT_FRAME2; - g_regs.framebuffer_sub_left_1 = PADDR_VRAM_SUB_FRAME1; - //g_regs.framebuffer_sub_left_2 = unknown; - g_regs.framebuffer_sub_right_1 = PADDR_VRAM_SUB_FRAME2; - //g_regs.framebufferr_sub_right_2 = unknown; + { + auto& framebuffer_top = g_regs.Get(); + auto& framebuffer_sub = g_regs.Get(); + + framebuffer_top.data.address_left1 = PADDR_VRAM_TOP_LEFT_FRAME1; + framebuffer_top.data.address_left2 = PADDR_VRAM_TOP_LEFT_FRAME2; + framebuffer_top.data.address_right1 = PADDR_VRAM_TOP_RIGHT_FRAME1; + framebuffer_top.data.address_right2 = PADDR_VRAM_TOP_RIGHT_FRAME2; + framebuffer_sub.data.address_left1 = PADDR_VRAM_SUB_FRAME1; + //framebuffer_sub.data.address_left2 = unknown; + framebuffer_sub.data.address_right1 = PADDR_VRAM_SUB_FRAME2; + //framebuffer_sub.data.address_right2 = unknown; break; } + } } /** @@ -87,219 +97,73 @@ const u8* GetFramebufferPointer(const u32 address) { } template -inline void Read(T &var, const u32 addr) { - switch (addr) { - case Registers::MemoryFillStart1: - case Registers::MemoryFillStart2: - var = g_regs.memory_fill[(addr - Registers::MemoryFillStart1) / 0x10].address_start; - break; +inline void Read(T &var, const u32 raw_addr) { + u32 addr = raw_addr - 0x1EF00000; + int index = addr / 4; - case Registers::MemoryFillEnd1: - case Registers::MemoryFillEnd2: - var = g_regs.memory_fill[(addr - Registers::MemoryFillEnd1) / 0x10].address_end; - break; - - case Registers::MemoryFillSize1: - case Registers::MemoryFillSize2: - var = g_regs.memory_fill[(addr - Registers::MemoryFillSize1) / 0x10].size; - break; - - case Registers::MemoryFillValue1: - case Registers::MemoryFillValue2: - var = g_regs.memory_fill[(addr - Registers::MemoryFillValue1) / 0x10].value; - break; - - case Registers::FramebufferTopSize: - var = g_regs.top_framebuffer.size; - break; - - case Registers::FramebufferTopLeft1: - var = g_regs.framebuffer_top_left_1; - break; - - case Registers::FramebufferTopLeft2: - var = g_regs.framebuffer_top_left_2; - break; - - case Registers::FramebufferTopFormat: - var = g_regs.top_framebuffer.format; - break; - - case Registers::FramebufferTopSwapBuffers: - var = g_regs.top_framebuffer.active_fb; - break; - - case Registers::FramebufferTopStride: - var = g_regs.top_framebuffer.stride; - break; - - case Registers::FramebufferTopRight1: - var = g_regs.framebuffer_top_right_1; - break; - - case Registers::FramebufferTopRight2: - var = g_regs.framebuffer_top_right_2; - break; - - case Registers::FramebufferSubSize: - var = g_regs.sub_framebuffer.size; - break; - - case Registers::FramebufferSubLeft1: - var = g_regs.framebuffer_sub_left_1; - break; - - case Registers::FramebufferSubRight1: - var = g_regs.framebuffer_sub_right_1; - break; - - case Registers::FramebufferSubFormat: - var = g_regs.sub_framebuffer.format; - break; - - case Registers::FramebufferSubSwapBuffers: - var = g_regs.sub_framebuffer.active_fb; - break; - - case Registers::FramebufferSubStride: - var = g_regs.sub_framebuffer.stride; - break; - - case Registers::FramebufferSubLeft2: - var = g_regs.framebuffer_sub_left_2; - break; - - case Registers::FramebufferSubRight2: - var = g_regs.framebuffer_sub_right_2; - break; - - case Registers::DisplayInputBufferAddr: - var = g_regs.display_transfer.input_address; - break; - - case Registers::DisplayOutputBufferAddr: - var = g_regs.display_transfer.output_address; - break; - - case Registers::DisplayOutputBufferSize: - var = g_regs.display_transfer.output_size; - break; - - case Registers::DisplayInputBufferSize: - var = g_regs.display_transfer.input_size; - break; - - case Registers::DisplayTransferFlags: - var = g_regs.display_transfer.flags; - break; - - // Not sure if this is supposed to be readable - case Registers::DisplayTriggerTransfer: - var = g_regs.display_transfer.trigger; - break; - - case Registers::CommandListSize: - var = g_regs.command_list_size; - break; - - case Registers::CommandListAddress: - var = g_regs.command_list_address; - break; - - case Registers::ProcessCommandList: - var = g_regs.command_processing_enabled; - break; - - default: + // Reads other than u32 are untested, so I'd rather have them abort than silently fail + if (index >= Regs::NumIds || !std::is_same::value) + { ERROR_LOG(GPU, "unknown Read%d @ 0x%08X", sizeof(var) * 8, addr); - break; + return; } + + var = g_regs[static_cast(addr / 4)]; } template inline void Write(u32 addr, const T data) { - switch (static_cast(addr)) { - case Registers::MemoryFillStart1: - case Registers::MemoryFillStart2: - g_regs.memory_fill[(addr - Registers::MemoryFillStart1) / 0x10].address_start = data; - break; + addr -= 0x1EF00000; + int index = addr / 4; - case Registers::MemoryFillEnd1: - case Registers::MemoryFillEnd2: - g_regs.memory_fill[(addr - Registers::MemoryFillEnd1) / 0x10].address_end = data; - break; - - case Registers::MemoryFillSize1: - case Registers::MemoryFillSize2: - g_regs.memory_fill[(addr - Registers::MemoryFillSize1) / 0x10].size = data; - break; - - case Registers::MemoryFillValue1: - case Registers::MemoryFillValue2: + // Writes other than u32 are untested, so I'd rather have them abort than silently fail + if (index >= Regs::NumIds || !std::is_same::value) { - Registers::MemoryFillConfig& config = g_regs.memory_fill[(addr - Registers::MemoryFillValue1) / 0x10]; - config.value = data; + ERROR_LOG(GPU, "unknown Write%d 0x%08X @ 0x%08X", sizeof(data) * 8, data, addr); + return; + } + + g_regs[static_cast(index)] = data; + + switch (static_cast(index)) { + + // Memory fills are triggered once the fill value is written. + // NOTE: This is not verified. + case Regs::MemoryFill + 3: + case Regs::MemoryFill + 7: + { + const auto& config = g_regs.Get(static_cast(index - 3)); // TODO: Not sure if this check should be done at GSP level instead - if (config.address_start) { + if (config.data.address_start) { // TODO: Not sure if this algorithm is correct, particularly because it doesn't use the size member at all - u32* start = (u32*)Memory::GetPointer(config.GetStartAddress()); - u32* end = (u32*)Memory::GetPointer(config.GetEndAddress()); + u32* start = (u32*)Memory::GetPointer(config.data.GetStartAddress()); + u32* end = (u32*)Memory::GetPointer(config.data.GetEndAddress()); for (u32* ptr = start; ptr < end; ++ptr) - *ptr = bswap32(config.value); // TODO: This is just a workaround to missing framebuffer format emulation + *ptr = bswap32(config.data.value); // TODO: This is just a workaround to missing framebuffer format emulation - DEBUG_LOG(GPU, "MemoryFill from %x to %x", config.GetStartAddress(), config.GetEndAddress()); + DEBUG_LOG(GPU, "MemoryFill from %x to %x", config.data.GetStartAddress(), config.data.GetEndAddress()); } break; } - // TODO: Framebuffer registers!! - case Registers::FramebufferTopSwapBuffers: - g_regs.top_framebuffer.active_fb = data; - // TODO: Not sure if this should only be done upon a change! - break; + case Regs::DisplayTransfer + 6: + { + const auto& config = g_regs.Get(); + if (config.data.trigger & 1) { + u8* source_pointer = Memory::GetPointer(config.data.GetPhysicalInputAddress()); + u8* dest_pointer = Memory::GetPointer(config.data.GetPhysicalOutputAddress()); - case Registers::FramebufferSubSwapBuffers: - g_regs.sub_framebuffer.active_fb = data; - // TODO: Not sure if this should only be done upon a change! - break; - - case Registers::DisplayInputBufferAddr: - g_regs.display_transfer.input_address = data; - break; - - case Registers::DisplayOutputBufferAddr: - g_regs.display_transfer.output_address = data; - break; - - case Registers::DisplayOutputBufferSize: - g_regs.display_transfer.output_size = data; - break; - - case Registers::DisplayInputBufferSize: - g_regs.display_transfer.input_size = data; - break; - - case Registers::DisplayTransferFlags: - g_regs.display_transfer.flags = data; - break; - - case Registers::DisplayTriggerTransfer: - g_regs.display_transfer.trigger = data; - if (g_regs.display_transfer.trigger & 1) { - u8* source_pointer = Memory::GetPointer(g_regs.display_transfer.GetPhysicalInputAddress()); - u8* dest_pointer = Memory::GetPointer(g_regs.display_transfer.GetPhysicalOutputAddress()); - - for (int y = 0; y < g_regs.display_transfer.output_height; ++y) { + for (int y = 0; y < config.data.output_height; ++y) { // TODO: Why does the register seem to hold twice the framebuffer width? - for (int x = 0; x < g_regs.display_transfer.output_width / 2; ++x) { + for (int x = 0; x < config.data.output_width / 2; ++x) { int source[4] = { 0, 0, 0, 0}; // rgba; - switch (g_regs.display_transfer.input_format) { - case Registers::FramebufferFormat::RGBA8: + switch (config.data.input_format) { + case Regs::FramebufferFormat::RGBA8: { // TODO: Most likely got the component order messed up. - u8* srcptr = source_pointer + x * 4 + y * g_regs.display_transfer.input_width * 4 / 2; + u8* srcptr = source_pointer + x * 4 + y * config.data.input_width * 4 / 2; source[0] = srcptr[0]; // blue source[1] = srcptr[1]; // green source[2] = srcptr[2]; // red @@ -308,15 +172,15 @@ inline void Write(u32 addr, const T data) { } default: - ERROR_LOG(GPU, "Unknown source framebuffer format %x", (int)g_regs.display_transfer.input_format.Value()); + ERROR_LOG(GPU, "Unknown source framebuffer format %x", config.data.input_format.Value()); break; } - switch (g_regs.display_transfer.output_format) { - /*case Registers::FramebufferFormat::RGBA8: + switch (config.data.output_format) { + /*case Regs::FramebufferFormat::RGBA8: { // TODO: Untested - u8* dstptr = (u32*)(dest_pointer + x * 4 + y * g_regs.display_transfer.output_width * 4); + u8* dstptr = (u32*)(dest_pointer + x * 4 + y * config.data.output_width * 4); dstptr[0] = source[0]; dstptr[1] = source[1]; dstptr[2] = source[2]; @@ -324,9 +188,9 @@ inline void Write(u32 addr, const T data) { break; }*/ - case Registers::FramebufferFormat::RGB8: + case Regs::FramebufferFormat::RGB8: { - u8* dstptr = dest_pointer + x * 3 + y * g_regs.display_transfer.output_width * 3 / 2; + u8* dstptr = dest_pointer + x * 3 + y * config.data.output_width * 3 / 2; dstptr[0] = source[0]; // blue dstptr[1] = source[1]; // green dstptr[2] = source[2]; // red @@ -334,40 +198,34 @@ inline void Write(u32 addr, const T data) { } default: - ERROR_LOG(GPU, "Unknown destination framebuffer format %x", static_cast(g_regs.display_transfer.output_format.Value())); + ERROR_LOG(GPU, "Unknown destination framebuffer format %x", config.data.output_format.Value()); break; } } } DEBUG_LOG(GPU, "DisplayTriggerTransfer: %x bytes from %x(%xx%x)-> %x(%xx%x), dst format %x", - g_regs.display_transfer.output_height * g_regs.display_transfer.output_width * 4, - g_regs.display_transfer.GetPhysicalInputAddress(), (int)g_regs.display_transfer.input_width, (int)g_regs.display_transfer.input_height, - g_regs.display_transfer.GetPhysicalOutputAddress(), (int)g_regs.display_transfer.output_width, (int)g_regs.display_transfer.output_height, - (int)g_regs.display_transfer.output_format.Value()); + config.data.output_height * config.data.output_width * 4, + config.data.GetPhysicalInputAddress(), (int)config.data.input_width, (int)config.data.input_height, + config.data.GetPhysicalOutputAddress(), (int)config.data.output_width, (int)config.data.output_height, + config.data.output_format.Value()); } break; + } - case Registers::CommandListSize: - g_regs.command_list_size = data; - break; - - case Registers::CommandListAddress: - g_regs.command_list_address = data; - break; - - case Registers::ProcessCommandList: - g_regs.command_processing_enabled = data; - if (g_regs.command_processing_enabled & 1) + case Regs::CommandProcessor + 4: + { + const auto& config = g_regs.Get(); + if (config.data.trigger & 1) { - // u32* buffer = (u32*)Memory::GetPointer(g_regs.command_list_address << 3); - ERROR_LOG(GPU, "Beginning %x bytes of commands from address %x", g_regs.command_list_size, g_regs.command_list_address << 3); + // u32* buffer = (u32*)Memory::GetPointer(config.data.address << 3); + ERROR_LOG(GPU, "Beginning %x bytes of commands from address %x", config.data.size, config.data.address << 3); // TODO: Process command list! } break; + } default: - ERROR_LOG(GPU, "unknown Write%d 0x%08X @ 0x%08X", sizeof(data) * 8, data, addr); break; } } @@ -402,18 +260,20 @@ void Init() { // SetFramebufferLocation(FRAMEBUFFER_LOCATION_FCRAM); SetFramebufferLocation(FRAMEBUFFER_LOCATION_VRAM); + auto& framebuffer_top = g_regs.Get(); + auto& framebuffer_sub = g_regs.Get(); // TODO: Width should be 240 instead? - g_regs.top_framebuffer.width = 480; - g_regs.top_framebuffer.height = 400; - g_regs.top_framebuffer.stride = 480*3; - g_regs.top_framebuffer.color_format = Registers::FramebufferFormat::RGB8; - g_regs.top_framebuffer.active_fb = 0; + framebuffer_top.data.width = 480; + framebuffer_top.data.height = 400; + framebuffer_top.data.stride = 480*3; + framebuffer_top.data.color_format = Regs::FramebufferFormat::RGB8; + framebuffer_top.data.active_fb = 0; - g_regs.sub_framebuffer.width = 480; - g_regs.sub_framebuffer.height = 400; - g_regs.sub_framebuffer.stride = 480*3; - g_regs.sub_framebuffer.color_format = Registers::FramebufferFormat::RGB8; - g_regs.sub_framebuffer.active_fb = 0; + framebuffer_sub.data.width = 480; + framebuffer_sub.data.height = 400; + framebuffer_sub.data.stride = 480*3; + framebuffer_sub.data.color_format = Regs::FramebufferFormat::RGB8; + framebuffer_sub.data.active_fb = 0; NOTICE_LOG(GPU, "initialized OK"); } diff --git a/src/core/hw/gpu.h b/src/core/hw/gpu.h index b66cf4a377..ce524bd029 100644 --- a/src/core/hw/gpu.h +++ b/src/core/hw/gpu.h @@ -6,54 +6,31 @@ #include "common/common_types.h" #include "common/bit_field.h" +#include "common/register_set.h" namespace GPU { static const u32 kFrameCycles = 268123480 / 60; ///< 268MHz / 60 frames per second static const u32 kFrameTicks = kFrameCycles / 3; ///< Approximate number of instructions/frame -struct Registers { +// MMIO region 0x1EFxxxxx +struct Regs { enum Id : u32 { - MemoryFillStart1 = 0x1EF00010, - MemoryFillEnd1 = 0x1EF00014, - MemoryFillSize1 = 0x1EF00018, - MemoryFillValue1 = 0x1EF0001C, - MemoryFillStart2 = 0x1EF00020, - MemoryFillEnd2 = 0x1EF00024, - MemoryFillSize2 = 0x1EF00028, - MemoryFillValue2 = 0x1EF0002C, + MemoryFill = 0x00004, // + 5,6,7; second block at 8-11 - FramebufferTopSize = 0x1EF0045C, - FramebufferTopLeft1 = 0x1EF00468, // Main LCD, first framebuffer for 3D left - FramebufferTopLeft2 = 0x1EF0046C, // Main LCD, second framebuffer for 3D left - FramebufferTopFormat = 0x1EF00470, - FramebufferTopSwapBuffers = 0x1EF00478, - FramebufferTopStride = 0x1EF00490, // framebuffer row stride? - FramebufferTopRight1 = 0x1EF00494, // Main LCD, first framebuffer for 3D right - FramebufferTopRight2 = 0x1EF00498, // Main LCD, second framebuffer for 3D right + FramebufferTop = 0x00117, // + 11a,11b,11c,11d(?),11e...126 + FramebufferBottom = 0x00157, // + 15a,15b,15c,15d(?),15e...166 - FramebufferSubSize = 0x1EF0055C, - FramebufferSubLeft1 = 0x1EF00568, // Sub LCD, first framebuffer - FramebufferSubLeft2 = 0x1EF0056C, // Sub LCD, second framebuffer - FramebufferSubFormat = 0x1EF00570, - FramebufferSubSwapBuffers = 0x1EF00578, - FramebufferSubStride = 0x1EF00590, // framebuffer row stride? - FramebufferSubRight1 = 0x1EF00594, // Sub LCD, unused first framebuffer - FramebufferSubRight2 = 0x1EF00598, // Sub LCD, unused second framebuffer + DisplayTransfer = 0x00300, // + 301,302,303,304,305,306 - DisplayInputBufferAddr = 0x1EF00C00, - DisplayOutputBufferAddr = 0x1EF00C04, - DisplayOutputBufferSize = 0x1EF00C08, - DisplayInputBufferSize = 0x1EF00C0C, - DisplayTransferFlags = 0x1EF00C10, - // Unknown?? - DisplayTriggerTransfer = 0x1EF00C18, + CommandProcessor = 0x00638, // + 63a,63c - CommandListSize = 0x1EF018E0, - CommandListAddress = 0x1EF018E8, - ProcessCommandList = 0x1EF018F0, + NumIds = 0x01000 }; + template + union Struct; + enum class FramebufferFormat : u32 { RGBA8 = 0, RGB8 = 1, @@ -62,7 +39,11 @@ struct Registers { RGBA4 = 4, }; - struct MemoryFillConfig { +}; + +template<> +union Regs::Struct { + struct { u32 address_start; u32 address_end; // ? u32 size; @@ -75,21 +56,15 @@ struct Registers { inline u32 GetEndAddress() const { return address_end * 8; } - }; + } data; +}; +static_assert(sizeof(Regs::Struct) == 0x10, "Structure size and register block length don't match"); - MemoryFillConfig memory_fill[2]; +template<> +union Regs::Struct { + using Format = Regs::FramebufferFormat; - // TODO: Move these into the framebuffer struct - u32 framebuffer_top_left_1; - u32 framebuffer_top_left_2; - u32 framebuffer_top_right_1; - u32 framebuffer_top_right_2; - u32 framebuffer_sub_left_1; - u32 framebuffer_sub_left_2; - u32 framebuffer_sub_right_1; - u32 framebuffer_sub_right_2; - - struct FrameBufferConfig { + struct { union { u32 size; @@ -97,22 +72,43 @@ struct Registers { BitField<16, 16, u32> height; }; + u32 pad0[2]; + + u32 address_left1; + u32 address_left2; + union { u32 format; - BitField< 0, 3, FramebufferFormat> color_format; + BitField< 0, 3, Format> color_format; }; + u32 pad1; + union { u32 active_fb; BitField<0, 1, u32> second_fb_active; }; + u32 pad2[5]; + u32 stride; - }; - FrameBufferConfig top_framebuffer; - FrameBufferConfig sub_framebuffer; + + u32 address_right1; + u32 address_right2; + } data; +}; +template<> +union Regs::Struct { + using Type = decltype(Regs::Struct::data); + Type data; +}; +static_assert(sizeof(Regs::Struct) == 0x40, "Structure size and register block length don't match"); + +template<> +union Regs::Struct { + using Format = Regs::FramebufferFormat; struct { u32 input_address; @@ -144,21 +140,31 @@ struct Registers { u32 flags; BitField< 0, 1, u32> flip_data; - BitField< 8, 3, FramebufferFormat> input_format; - BitField<12, 3, FramebufferFormat> output_format; + BitField< 8, 3, Format> input_format; + BitField<12, 3, Format> output_format; BitField<16, 1, u32> output_tiled; }; u32 unknown; u32 trigger; - } display_transfer; - - u32 command_list_size; - u32 command_list_address; - u32 command_processing_enabled; + } data; }; +static_assert(sizeof(Regs::Struct) == 0x1C, "Structure size and register block length don't match"); -extern Registers g_regs; +template<> +union Regs::Struct { + struct { + u32 size; + u32 pad0; + u32 address; + u32 pad1; + u32 trigger; + } data; +}; +static_assert(sizeof(Regs::Struct) == 0x14, "Structure size and register block length don't match"); + + +extern RegisterSet g_regs; enum { TOP_ASPECT_X = 0x5, @@ -208,7 +214,7 @@ enum FramebufferLocation { /** * Sets whether the framebuffers are in the GSP heap (FCRAM) or VRAM - * @param + * @param */ void SetFramebufferLocation(const FramebufferLocation mode); diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 047c691850..8d9d61ae82 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -12,8 +12,8 @@ /// RendererOpenGL constructor RendererOpenGL::RendererOpenGL() { - memset(m_fbo, 0, sizeof(m_fbo)); - memset(m_fbo_rbo, 0, sizeof(m_fbo_rbo)); + memset(m_fbo, 0, sizeof(m_fbo)); + memset(m_fbo_rbo, 0, sizeof(m_fbo_rbo)); memset(m_fbo_depth_buffers, 0, sizeof(m_fbo_depth_buffers)); m_resolution_width = max(VideoCore::kScreenTopWidth, VideoCore::kScreenBottomWidth); @@ -35,7 +35,7 @@ void RendererOpenGL::SwapBuffers() { m_render_window->MakeCurrent(); // EFB->XFB copy - // TODO(bunnei): This is a hack and does not belong here. The copy should be triggered by some + // TODO(bunnei): This is a hack and does not belong here. The copy should be triggered by some // register write We're also treating both framebuffers as a single one in OpenGL. common::Rect framebuffer_size(0, 0, m_resolution_width, m_resolution_height); RenderXFB(framebuffer_size, framebuffer_size); @@ -71,24 +71,26 @@ void RendererOpenGL::FlipFramebuffer(const u8* in, u8* out) { } } -/** +/** * Renders external framebuffer (XFB) * @param src_rect Source rectangle in XFB to copy * @param dst_rect Destination rectangle in output framebuffer to copy to */ void RendererOpenGL::RenderXFB(const common::Rect& src_rect, const common::Rect& dst_rect) { - const u32 active_fb_top = (GPU::g_regs.top_framebuffer.active_fb == 1) - ? GPU::g_regs.framebuffer_top_left_2 - : GPU::g_regs.framebuffer_top_left_1; - const u32 active_fb_sub = (GPU::g_regs.sub_framebuffer.active_fb == 1) - ? GPU::g_regs.framebuffer_sub_left_2 - : GPU::g_regs.framebuffer_sub_left_1; + const auto& framebuffer_top = GPU::g_regs.Get(); + const auto& framebuffer_sub = GPU::g_regs.Get(); + const u32 active_fb_top = (framebuffer_top.data.active_fb == 1) + ? framebuffer_top.data.address_left2 + : framebuffer_top.data.address_left1; + const u32 active_fb_sub = (framebuffer_sub.data.active_fb == 1) + ? framebuffer_sub.data.address_left2 + : framebuffer_sub.data.address_left1; DEBUG_LOG(GPU, "RenderXFB: %x bytes from %x(%xx%x), fmt %x", - GPU::g_regs.top_framebuffer.stride * GPU::g_regs.top_framebuffer.height, - GPU::GetFramebufferAddr(GPU::g_regs.framebuffer_top_left_1), (int)GPU::g_regs.top_framebuffer.width, - (int)GPU::g_regs.top_framebuffer.height, (int)GPU::g_regs.top_framebuffer.format); + framebuffer_top.data.stride * framebuffer_top.data.height, + GPU::GetFramebufferAddr(active_fb_top), (int)framebuffer_top.data.width, + (int)framebuffer_top.data.height, (int)framebuffer_top.data.format); // TODO: This should consider the GPU registers for framebuffer width, height and stride. FlipFramebuffer(GPU::GetFramebufferPointer(active_fb_top), m_xfb_top_flipped); @@ -112,7 +114,7 @@ void RendererOpenGL::RenderXFB(const common::Rect& src_rect, const common::Rect& glReadBuffer(GL_COLOR_ATTACHMENT0); // Blit - glBlitFramebuffer(src_rect.x0_, src_rect.y0_, src_rect.x1_, src_rect.y1_, + glBlitFramebuffer(src_rect.x0_, src_rect.y0_, src_rect.x1_, src_rect.y1_, dst_rect.x0_, dst_rect.y1_, dst_rect.x1_, dst_rect.y0_, GL_COLOR_BUFFER_BIT, GL_LINEAR); @@ -138,7 +140,7 @@ void RendererOpenGL::RenderXFB(const common::Rect& src_rect, const common::Rect& // Blit int offset = (VideoCore::kScreenTopWidth - VideoCore::kScreenBottomWidth) / 2; - glBlitFramebuffer(0,0, VideoCore::kScreenBottomWidth, VideoCore::kScreenBottomHeight, + glBlitFramebuffer(0,0, VideoCore::kScreenBottomWidth, VideoCore::kScreenBottomHeight, offset, VideoCore::kScreenBottomHeight, VideoCore::kScreenBottomWidth + offset, 0, GL_COLOR_BUFFER_BIT, GL_LINEAR); @@ -147,7 +149,7 @@ void RendererOpenGL::RenderXFB(const common::Rect& src_rect, const common::Rect& /// Initialize the FBO void RendererOpenGL::InitFramebuffer() { - // TODO(bunnei): This should probably be implemented with the top screen and bottom screen as + // TODO(bunnei): This should probably be implemented with the top screen and bottom screen as // separate framebuffers // Init the FBOs @@ -160,12 +162,12 @@ void RendererOpenGL::InitFramebuffer() { for (int i = 0; i < kMaxFramebuffers; i++) { // Generate color buffer storage glBindRenderbuffer(GL_RENDERBUFFER, m_fbo_rbo[i]); - glRenderbufferStorage(GL_RENDERBUFFER, GL_RGBA8, VideoCore::kScreenTopWidth, + glRenderbufferStorage(GL_RENDERBUFFER, GL_RGBA8, VideoCore::kScreenTopWidth, VideoCore::kScreenTopHeight + VideoCore::kScreenBottomHeight); // Generate depth buffer storage glBindRenderbuffer(GL_RENDERBUFFER, m_fbo_depth_buffers[i]); - glRenderbufferStorage(GL_RENDERBUFFER, GL_DEPTH_COMPONENT32, VideoCore::kScreenTopWidth, + glRenderbufferStorage(GL_RENDERBUFFER, GL_DEPTH_COMPONENT32, VideoCore::kScreenTopWidth, VideoCore::kScreenTopHeight + VideoCore::kScreenBottomHeight); // Attach the buffers @@ -181,7 +183,7 @@ void RendererOpenGL::InitFramebuffer() { } else { ERROR_LOG(RENDER, "couldn't create OpenGL frame buffer"); exit(1); - } + } } glBindFramebuffer(GL_FRAMEBUFFER, 0); // Unbind our frame buffer(s) @@ -189,8 +191,8 @@ void RendererOpenGL::InitFramebuffer() { // ------------------------------- // Create XFB textures - glGenTextures(1, &m_xfb_texture_top); - glGenTextures(1, &m_xfb_texture_bottom); + glGenTextures(1, &m_xfb_texture_top); + glGenTextures(1, &m_xfb_texture_bottom); // Alocate video memorry for XFB textures glBindTexture(GL_TEXTURE_2D, m_xfb_texture_top); @@ -206,13 +208,13 @@ void RendererOpenGL::InitFramebuffer() { // Create the FBO and attach color/depth textures glGenFramebuffers(1, &m_xfb_top); // Generate framebuffer glBindFramebuffer(GL_DRAW_FRAMEBUFFER, m_xfb_top); - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, m_xfb_texture_top, 0); glBindFramebuffer(GL_FRAMEBUFFER, 0); glGenFramebuffers(1, &m_xfb_bottom); // Generate framebuffer glBindFramebuffer(GL_DRAW_FRAMEBUFFER, m_xfb_bottom); - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, m_xfb_texture_bottom, 0); glBindFramebuffer(GL_FRAMEBUFFER, 0); } @@ -228,7 +230,7 @@ void RendererOpenGL::RenderFramebuffer() { glReadBuffer(GL_COLOR_ATTACHMENT0); // Blit - glBlitFramebuffer(0, 0, m_resolution_width, m_resolution_height, 0, 0, m_resolution_width, + glBlitFramebuffer(0, 0, m_resolution_width, m_resolution_height, 0, 0, m_resolution_width, m_resolution_height, GL_COLOR_BUFFER_BIT, GL_LINEAR); // Update the FPS count @@ -244,7 +246,7 @@ void RendererOpenGL::RenderFramebuffer() { void RendererOpenGL::UpdateFramerate() { } -/** +/** * Set the emulator window to use for renderer * @param window EmuWindow handle to emulator window to use for rendering */ @@ -278,7 +280,7 @@ void RendererOpenGL::Init() { GLenum err = glewInit(); if (GLEW_OK != err) { - ERROR_LOG(RENDER, "Failed to initialize GLEW! Error message: \"%s\". Exiting...", + ERROR_LOG(RENDER, "Failed to initialize GLEW! Error message: \"%s\". Exiting...", glewGetErrorString(err)); exit(-1); } From 246cb75584af281596b938f898e8a3aedbcdb62a Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Wed, 16 Jul 2014 11:27:58 +0200 Subject: [PATCH 16/22] RegisterSet: Simplify code by using structs for register definition instead of unions. --- src/common/register_set.h | 14 +- src/core/hw/gpu.cpp | 100 ++++++------ src/core/hw/gpu.h | 150 ++++++++---------- .../renderer_opengl/renderer_opengl.cpp | 18 +-- 4 files changed, 137 insertions(+), 145 deletions(-) diff --git a/src/common/register_set.h b/src/common/register_set.h index 0418551b34..ba19a2614d 100644 --- a/src/common/register_set.h +++ b/src/common/register_set.h @@ -34,7 +34,7 @@ /* * Standardized way to define a group of registers and corresponding data structures. To define * a new register set, first define struct containing an enumeration called "Id" containing - * all register IDs and a template union called "Struct". Specialize the Struct union for any + * all register IDs and a template struct called "Struct". Specialize the Struct struct for any * register ID which needs to be accessed in a specialized way. You can then declare the object * containing all register values using the RegisterSet type, where * BaseType is the underlying type of each register (e.g. u32). @@ -54,7 +54,7 @@ * * // declare register definition structures * template - * union Struct; + * struct Struct; * }; * * // Define register set object @@ -62,9 +62,11 @@ * * // define register definition structures * template<> - * union Regs::Struct { - * BitField<0, 4, u32> some_field; - * BitField<4, 3, u32> some_other_field; + * struct Regs::Struct { + * union { + * BitField<0, 4, u32> some_field; + * BitField<4, 3, u32> some_other_field; + * }; * }; * * Usage in external code (within SomeNamespace scope): @@ -77,7 +79,7 @@ * * * @tparam BaseType Base type used for storing individual registers, e.g. u32 - * @tparam RegDefinition Class defining an enumeration called "Id" and a template union, as described above. + * @tparam RegDefinition Class defining an enumeration called "Id" and a template struct, as described above. * @note RegDefinition::Id needs to have an enum value called NumIds defining the number of registers to be allocated. */ template diff --git a/src/core/hw/gpu.cpp b/src/core/hw/gpu.cpp index 372e4f4cc9..edffa25c57 100644 --- a/src/core/hw/gpu.cpp +++ b/src/core/hw/gpu.cpp @@ -30,14 +30,14 @@ void SetFramebufferLocation(const FramebufferLocation mode) { auto& framebuffer_top = g_regs.Get(); auto& framebuffer_sub = g_regs.Get(); - framebuffer_top.data.address_left1 = PADDR_TOP_LEFT_FRAME1; - framebuffer_top.data.address_left2 = PADDR_TOP_LEFT_FRAME2; - framebuffer_top.data.address_right1 = PADDR_TOP_RIGHT_FRAME1; - framebuffer_top.data.address_right2 = PADDR_TOP_RIGHT_FRAME2; - framebuffer_sub.data.address_left1 = PADDR_SUB_FRAME1; - //framebuffer_sub.data.address_left2 = unknown; - framebuffer_sub.data.address_right1 = PADDR_SUB_FRAME2; - //framebuffer_sub.data.address_right2 = unknown; + framebuffer_top.address_left1 = PADDR_TOP_LEFT_FRAME1; + framebuffer_top.address_left2 = PADDR_TOP_LEFT_FRAME2; + framebuffer_top.address_right1 = PADDR_TOP_RIGHT_FRAME1; + framebuffer_top.address_right2 = PADDR_TOP_RIGHT_FRAME2; + framebuffer_sub.address_left1 = PADDR_SUB_FRAME1; + //framebuffer_sub.address_left2 = unknown; + framebuffer_sub.address_right1 = PADDR_SUB_FRAME2; + //framebuffer_sub.address_right2 = unknown; break; } @@ -46,14 +46,14 @@ void SetFramebufferLocation(const FramebufferLocation mode) { auto& framebuffer_top = g_regs.Get(); auto& framebuffer_sub = g_regs.Get(); - framebuffer_top.data.address_left1 = PADDR_VRAM_TOP_LEFT_FRAME1; - framebuffer_top.data.address_left2 = PADDR_VRAM_TOP_LEFT_FRAME2; - framebuffer_top.data.address_right1 = PADDR_VRAM_TOP_RIGHT_FRAME1; - framebuffer_top.data.address_right2 = PADDR_VRAM_TOP_RIGHT_FRAME2; - framebuffer_sub.data.address_left1 = PADDR_VRAM_SUB_FRAME1; - //framebuffer_sub.data.address_left2 = unknown; - framebuffer_sub.data.address_right1 = PADDR_VRAM_SUB_FRAME2; - //framebuffer_sub.data.address_right2 = unknown; + framebuffer_top.address_left1 = PADDR_VRAM_TOP_LEFT_FRAME1; + framebuffer_top.address_left2 = PADDR_VRAM_TOP_LEFT_FRAME2; + framebuffer_top.address_right1 = PADDR_VRAM_TOP_RIGHT_FRAME1; + framebuffer_top.address_right2 = PADDR_VRAM_TOP_RIGHT_FRAME2; + framebuffer_sub.address_left1 = PADDR_VRAM_SUB_FRAME1; + //framebuffer_sub.address_left2 = unknown; + framebuffer_sub.address_right1 = PADDR_VRAM_SUB_FRAME2; + //framebuffer_sub.address_right2 = unknown; break; } } @@ -135,14 +135,14 @@ inline void Write(u32 addr, const T data) { const auto& config = g_regs.Get(static_cast(index - 3)); // TODO: Not sure if this check should be done at GSP level instead - if (config.data.address_start) { + if (config.address_start) { // TODO: Not sure if this algorithm is correct, particularly because it doesn't use the size member at all - u32* start = (u32*)Memory::GetPointer(config.data.GetStartAddress()); - u32* end = (u32*)Memory::GetPointer(config.data.GetEndAddress()); + u32* start = (u32*)Memory::GetPointer(config.GetStartAddress()); + u32* end = (u32*)Memory::GetPointer(config.GetEndAddress()); for (u32* ptr = start; ptr < end; ++ptr) - *ptr = bswap32(config.data.value); // TODO: This is just a workaround to missing framebuffer format emulation + *ptr = bswap32(config.value); // TODO: This is just a workaround to missing framebuffer format emulation - DEBUG_LOG(GPU, "MemoryFill from %x to %x", config.data.GetStartAddress(), config.data.GetEndAddress()); + DEBUG_LOG(GPU, "MemoryFill from %x to %x", config.GetStartAddress(), config.GetEndAddress()); } break; } @@ -150,20 +150,20 @@ inline void Write(u32 addr, const T data) { case Regs::DisplayTransfer + 6: { const auto& config = g_regs.Get(); - if (config.data.trigger & 1) { - u8* source_pointer = Memory::GetPointer(config.data.GetPhysicalInputAddress()); - u8* dest_pointer = Memory::GetPointer(config.data.GetPhysicalOutputAddress()); + if (config.trigger & 1) { + u8* source_pointer = Memory::GetPointer(config.GetPhysicalInputAddress()); + u8* dest_pointer = Memory::GetPointer(config.GetPhysicalOutputAddress()); - for (int y = 0; y < config.data.output_height; ++y) { + for (int y = 0; y < config.output_height; ++y) { // TODO: Why does the register seem to hold twice the framebuffer width? - for (int x = 0; x < config.data.output_width / 2; ++x) { + for (int x = 0; x < config.output_width / 2; ++x) { int source[4] = { 0, 0, 0, 0}; // rgba; - switch (config.data.input_format) { + switch (config.input_format) { case Regs::FramebufferFormat::RGBA8: { // TODO: Most likely got the component order messed up. - u8* srcptr = source_pointer + x * 4 + y * config.data.input_width * 4 / 2; + u8* srcptr = source_pointer + x * 4 + y * config.input_width * 4 / 2; source[0] = srcptr[0]; // blue source[1] = srcptr[1]; // green source[2] = srcptr[2]; // red @@ -172,15 +172,15 @@ inline void Write(u32 addr, const T data) { } default: - ERROR_LOG(GPU, "Unknown source framebuffer format %x", config.data.input_format.Value()); + ERROR_LOG(GPU, "Unknown source framebuffer format %x", config.input_format.Value()); break; } - switch (config.data.output_format) { + switch (config.output_format) { /*case Regs::FramebufferFormat::RGBA8: { // TODO: Untested - u8* dstptr = (u32*)(dest_pointer + x * 4 + y * config.data.output_width * 4); + u8* dstptr = (u32*)(dest_pointer + x * 4 + y * config.output_width * 4); dstptr[0] = source[0]; dstptr[1] = source[1]; dstptr[2] = source[2]; @@ -190,7 +190,7 @@ inline void Write(u32 addr, const T data) { case Regs::FramebufferFormat::RGB8: { - u8* dstptr = dest_pointer + x * 3 + y * config.data.output_width * 3 / 2; + u8* dstptr = dest_pointer + x * 3 + y * config.output_width * 3 / 2; dstptr[0] = source[0]; // blue dstptr[1] = source[1]; // green dstptr[2] = source[2]; // red @@ -198,17 +198,17 @@ inline void Write(u32 addr, const T data) { } default: - ERROR_LOG(GPU, "Unknown destination framebuffer format %x", config.data.output_format.Value()); + ERROR_LOG(GPU, "Unknown destination framebuffer format %x", config.output_format.Value()); break; } } } DEBUG_LOG(GPU, "DisplayTriggerTransfer: %x bytes from %x(%xx%x)-> %x(%xx%x), dst format %x", - config.data.output_height * config.data.output_width * 4, - config.data.GetPhysicalInputAddress(), (int)config.data.input_width, (int)config.data.input_height, - config.data.GetPhysicalOutputAddress(), (int)config.data.output_width, (int)config.data.output_height, - config.data.output_format.Value()); + config.output_height * config.output_width * 4, + config.GetPhysicalInputAddress(), (int)config.input_width, (int)config.input_height, + config.GetPhysicalOutputAddress(), (int)config.output_width, (int)config.output_height, + config.output_format.Value()); } break; } @@ -216,10 +216,10 @@ inline void Write(u32 addr, const T data) { case Regs::CommandProcessor + 4: { const auto& config = g_regs.Get(); - if (config.data.trigger & 1) + if (config.trigger & 1) { - // u32* buffer = (u32*)Memory::GetPointer(config.data.address << 3); - ERROR_LOG(GPU, "Beginning %x bytes of commands from address %x", config.data.size, config.data.address << 3); + // u32* buffer = (u32*)Memory::GetPointer(config.address << 3); + ERROR_LOG(GPU, "Beginning %x bytes of commands from address %x", config.size, config.address << 3); // TODO: Process command list! } break; @@ -263,17 +263,17 @@ void Init() { auto& framebuffer_top = g_regs.Get(); auto& framebuffer_sub = g_regs.Get(); // TODO: Width should be 240 instead? - framebuffer_top.data.width = 480; - framebuffer_top.data.height = 400; - framebuffer_top.data.stride = 480*3; - framebuffer_top.data.color_format = Regs::FramebufferFormat::RGB8; - framebuffer_top.data.active_fb = 0; + framebuffer_top.width = 480; + framebuffer_top.height = 400; + framebuffer_top.stride = 480*3; + framebuffer_top.color_format = Regs::FramebufferFormat::RGB8; + framebuffer_top.active_fb = 0; - framebuffer_sub.data.width = 480; - framebuffer_sub.data.height = 400; - framebuffer_sub.data.stride = 480*3; - framebuffer_sub.data.color_format = Regs::FramebufferFormat::RGB8; - framebuffer_sub.data.active_fb = 0; + framebuffer_sub.width = 480; + framebuffer_sub.height = 400; + framebuffer_sub.stride = 480*3; + framebuffer_sub.color_format = Regs::FramebufferFormat::RGB8; + framebuffer_sub.active_fb = 0; NOTICE_LOG(GPU, "initialized OK"); } diff --git a/src/core/hw/gpu.h b/src/core/hw/gpu.h index ce524bd029..4ef0a047fb 100644 --- a/src/core/hw/gpu.h +++ b/src/core/hw/gpu.h @@ -29,7 +29,7 @@ struct Regs { }; template - union Struct; + struct Struct; enum class FramebufferFormat : u32 { RGBA8 = 0, @@ -38,128 +38,118 @@ struct Regs { RGB5A1 = 3, RGBA4 = 4, }; - }; template<> -union Regs::Struct { - struct { - u32 address_start; - u32 address_end; // ? - u32 size; - u32 value; // ? +struct Regs::Struct { + u32 address_start; + u32 address_end; // ? + u32 size; + u32 value; // ? - inline u32 GetStartAddress() const { - return address_start * 8; - } + inline u32 GetStartAddress() const { + return address_start * 8; + } - inline u32 GetEndAddress() const { - return address_end * 8; - } - } data; + inline u32 GetEndAddress() const { + return address_end * 8; + } }; static_assert(sizeof(Regs::Struct) == 0x10, "Structure size and register block length don't match"); template<> -union Regs::Struct { +struct Regs::Struct { using Format = Regs::FramebufferFormat; - struct { - union { - u32 size; + union { + u32 size; - BitField< 0, 16, u32> width; - BitField<16, 16, u32> height; - }; + BitField< 0, 16, u32> width; + BitField<16, 16, u32> height; + }; - u32 pad0[2]; + u32 pad0[2]; - u32 address_left1; - u32 address_left2; + u32 address_left1; + u32 address_left2; - union { - u32 format; + union { + u32 format; - BitField< 0, 3, Format> color_format; - }; + BitField< 0, 3, Format> color_format; + }; - u32 pad1; + u32 pad1; - union { - u32 active_fb; + union { + u32 active_fb; - BitField<0, 1, u32> second_fb_active; - }; + BitField<0, 1, u32> second_fb_active; + }; - u32 pad2[5]; + u32 pad2[5]; - u32 stride; + u32 stride; - u32 address_right1; - u32 address_right2; - } data; + u32 address_right1; + u32 address_right2; }; + template<> -union Regs::Struct { - using Type = decltype(Regs::Struct::data); - Type data; +struct Regs::Struct : public Regs::Struct { }; static_assert(sizeof(Regs::Struct) == 0x40, "Structure size and register block length don't match"); template<> -union Regs::Struct { +struct Regs::Struct { using Format = Regs::FramebufferFormat; - struct { - u32 input_address; - u32 output_address; + u32 input_address; + u32 output_address; - inline u32 GetPhysicalInputAddress() const { - return input_address * 8; - } + inline u32 GetPhysicalInputAddress() const { + return input_address * 8; + } - inline u32 GetPhysicalOutputAddress() const { - return output_address * 8; - } + inline u32 GetPhysicalOutputAddress() const { + return output_address * 8; + } - union { - u32 output_size; + union { + u32 output_size; - BitField< 0, 16, u32> output_width; - BitField<16, 16, u32> output_height; - }; + BitField< 0, 16, u32> output_width; + BitField<16, 16, u32> output_height; + }; - union { - u32 input_size; + union { + u32 input_size; - BitField< 0, 16, u32> input_width; - BitField<16, 16, u32> input_height; - }; + BitField< 0, 16, u32> input_width; + BitField<16, 16, u32> input_height; + }; - union { - u32 flags; + union { + u32 flags; - BitField< 0, 1, u32> flip_data; - BitField< 8, 3, Format> input_format; - BitField<12, 3, Format> output_format; - BitField<16, 1, u32> output_tiled; - }; + BitField< 0, 1, u32> flip_data; + BitField< 8, 3, Format> input_format; + BitField<12, 3, Format> output_format; + BitField<16, 1, u32> output_tiled; + }; - u32 unknown; - u32 trigger; - } data; + u32 unknown; + u32 trigger; }; static_assert(sizeof(Regs::Struct) == 0x1C, "Structure size and register block length don't match"); template<> -union Regs::Struct { - struct { - u32 size; - u32 pad0; - u32 address; - u32 pad1; - u32 trigger; - } data; +struct Regs::Struct { + u32 size; + u32 pad0; + u32 address; + u32 pad1; + u32 trigger; }; static_assert(sizeof(Regs::Struct) == 0x14, "Structure size and register block length don't match"); diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 8d9d61ae82..50f820e4aa 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -80,17 +80,17 @@ void RendererOpenGL::RenderXFB(const common::Rect& src_rect, const common::Rect& const auto& framebuffer_top = GPU::g_regs.Get(); const auto& framebuffer_sub = GPU::g_regs.Get(); - const u32 active_fb_top = (framebuffer_top.data.active_fb == 1) - ? framebuffer_top.data.address_left2 - : framebuffer_top.data.address_left1; - const u32 active_fb_sub = (framebuffer_sub.data.active_fb == 1) - ? framebuffer_sub.data.address_left2 - : framebuffer_sub.data.address_left1; + const u32 active_fb_top = (framebuffer_top.active_fb == 1) + ? framebuffer_top.address_left2 + : framebuffer_top.address_left1; + const u32 active_fb_sub = (framebuffer_sub.active_fb == 1) + ? framebuffer_sub.address_left2 + : framebuffer_sub.address_left1; DEBUG_LOG(GPU, "RenderXFB: %x bytes from %x(%xx%x), fmt %x", - framebuffer_top.data.stride * framebuffer_top.data.height, - GPU::GetFramebufferAddr(active_fb_top), (int)framebuffer_top.data.width, - (int)framebuffer_top.data.height, (int)framebuffer_top.data.format); + framebuffer_top.stride * framebuffer_top.height, + GPU::GetFramebufferAddr(active_fb_top), (int)framebuffer_top.width, + (int)framebuffer_top.height, (int)framebuffer_top.format); // TODO: This should consider the GPU registers for framebuffer width, height and stride. FlipFramebuffer(GPU::GetFramebufferPointer(active_fb_top), m_xfb_top_flipped); From 9b0d0c81a006ebd9e054758bc2c973d67650ca70 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Tue, 22 Jul 2014 12:41:16 +0200 Subject: [PATCH 17/22] GSP: Clean up GX command processing a lot and treat command id as a u8 rather than a u32. Anonymous structs are not standard C++, hence don't use them. --- src/citra_qt/debugger/graphics.cpp | 34 ++++++++-------- src/core/hle/service/gsp.cpp | 62 ++++++++++++++++++------------ src/core/hle/service/gsp.h | 52 +++++++++++++++++++------ src/video_core/gpu_debugger.h | 5 +-- 4 files changed, 98 insertions(+), 55 deletions(-) diff --git a/src/citra_qt/debugger/graphics.cpp b/src/citra_qt/debugger/graphics.cpp index 9aaade8f92..0f911a0155 100644 --- a/src/citra_qt/debugger/graphics.cpp +++ b/src/citra_qt/debugger/graphics.cpp @@ -28,22 +28,24 @@ QVariant GPUCommandStreamItemModel::data(const QModelIndex& index, int role) con const GSP_GPU::GXCommand& command = GetDebugger()->ReadGXCommandHistory(command_index); if (role == Qt::DisplayRole) { - std::map command_names; - command_names[GSP_GPU::GXCommandId::REQUEST_DMA] = "REQUEST_DMA"; - command_names[GSP_GPU::GXCommandId::SET_COMMAND_LIST_FIRST] = "SET_COMMAND_LIST_FIRST"; - command_names[GSP_GPU::GXCommandId::SET_MEMORY_FILL] = "SET_MEMORY_FILL"; - command_names[GSP_GPU::GXCommandId::SET_DISPLAY_TRANSFER] = "SET_DISPLAY_TRANSFER"; - command_names[GSP_GPU::GXCommandId::SET_TEXTURE_COPY] = "SET_TEXTURE_COPY"; - command_names[GSP_GPU::GXCommandId::SET_COMMAND_LIST_LAST] = "SET_COMMAND_LIST_LAST"; - QString str = QString("%1 %2 %3 %4 %5 %6 %7 %8 %9").arg(command_names[static_cast(command.id)]) - .arg(command.data[0], 8, 16, QLatin1Char('0')) - .arg(command.data[1], 8, 16, QLatin1Char('0')) - .arg(command.data[2], 8, 16, QLatin1Char('0')) - .arg(command.data[3], 8, 16, QLatin1Char('0')) - .arg(command.data[4], 8, 16, QLatin1Char('0')) - .arg(command.data[5], 8, 16, QLatin1Char('0')) - .arg(command.data[6], 8, 16, QLatin1Char('0')) - .arg(command.data[7], 8, 16, QLatin1Char('0')); + std::map command_names = { + { GSP_GPU::GXCommandId::REQUEST_DMA, "REQUEST_DMA" }, + { GSP_GPU::GXCommandId::SET_COMMAND_LIST_FIRST, "SET_COMMAND_LIST_FIRST" }, + { GSP_GPU::GXCommandId::SET_MEMORY_FILL, "SET_MEMORY_FILL" }, + { GSP_GPU::GXCommandId::SET_DISPLAY_TRANSFER, "SET_DISPLAY_TRANSFER" }, + { GSP_GPU::GXCommandId::SET_TEXTURE_COPY, "SET_TEXTURE_COPY" }, + { GSP_GPU::GXCommandId::SET_COMMAND_LIST_LAST, "SET_COMMAND_LIST_LAST" } + }; + const u32* command_data = reinterpret_cast(&command); + QString str = QString("%1 %2 %3 %4 %5 %6 %7 %8 %9").arg(command_names[command.id]) + .arg(command_data[0], 8, 16, QLatin1Char('0')) + .arg(command_data[1], 8, 16, QLatin1Char('0')) + .arg(command_data[2], 8, 16, QLatin1Char('0')) + .arg(command_data[3], 8, 16, QLatin1Char('0')) + .arg(command_data[4], 8, 16, QLatin1Char('0')) + .arg(command_data[5], 8, 16, QLatin1Char('0')) + .arg(command_data[6], 8, 16, QLatin1Char('0')) + .arg(command_data[7], 8, 16, QLatin1Char('0')); return QVariant(str); } else diff --git a/src/core/hle/service/gsp.cpp b/src/core/hle/service/gsp.cpp index 053c7dd2ca..05753fa2c4 100644 --- a/src/core/hle/service/gsp.cpp +++ b/src/core/hle/service/gsp.cpp @@ -160,60 +160,72 @@ void TriggerCmdReqQueue(Service::Interface* self) { }; GX_CmdBufferHeader* header = (GX_CmdBufferHeader*)GX_GetCmdBufferPointer(g_thread_id); - u32* cmd_buff = (u32*)GX_GetCmdBufferPointer(g_thread_id, 0x20 + (header->index * 0x20)); + auto& command = *(const GXCommand*)GX_GetCmdBufferPointer(g_thread_id, 0x20 + (header->index * 0x20)); - switch (static_cast(cmd_buff[0])) { + switch (command.id) { // GX request DMA - typically used for copying memory from GSP heap to VRAM case GXCommandId::REQUEST_DMA: - memcpy(Memory::GetPointer(cmd_buff[2]), Memory::GetPointer(cmd_buff[1]), cmd_buff[3]); + memcpy(Memory::GetPointer(command.dma_request.dest_address), + Memory::GetPointer(command.dma_request.source_address), + command.dma_request.size); break; case GXCommandId::SET_COMMAND_LIST_LAST: - WriteGPURegister(GPU::Regs::CommandProcessor + 2, cmd_buff[1] >> 3); // command list data address - WriteGPURegister(GPU::Regs::CommandProcessor, cmd_buff[2] >> 3); // command list address - WriteGPURegister(GPU::Regs::CommandProcessor + 4, 1); // TODO: Not sure if we are supposed to always write this .. seems to trigger processing though + { + auto& params = command.set_command_list_last; + WriteGPURegister(GPU::Regs::CommandProcessor + 2, params.address >> 3); + WriteGPURegister(GPU::Regs::CommandProcessor, params.size >> 3); + WriteGPURegister(GPU::Regs::CommandProcessor + 4, 1); // TODO: Not sure if we are supposed to always write this .. seems to trigger processing though // TODO: Move this to GPU // TODO: Not sure what units the size is measured in - g_debugger.CommandListCalled(cmd_buff[1], (u32*)Memory::GetPointer(cmd_buff[1]), cmd_buff[2]); + g_debugger.CommandListCalled(params.address, + (u32*)Memory::GetPointer(params.address), + params.size); break; + } case GXCommandId::SET_MEMORY_FILL: - WriteGPURegister(GPU::Regs::MemoryFill, cmd_buff[1] >> 3); // Start 1 - WriteGPURegister(GPU::Regs::MemoryFill + 1, cmd_buff[3] >> 3); // End 1 - WriteGPURegister(GPU::Regs::MemoryFill + 2, cmd_buff[3] - cmd_buff[1]); // Size 1 - WriteGPURegister(GPU::Regs::MemoryFill + 3, cmd_buff[2]); // Value 1 + { + auto& params = command.memory_fill; + WriteGPURegister(GPU::Regs::MemoryFill, params.start1 >> 3); + WriteGPURegister(GPU::Regs::MemoryFill + 1, params.end1 >> 3); + WriteGPURegister(GPU::Regs::MemoryFill + 2, params.end1 - params.start1); + WriteGPURegister(GPU::Regs::MemoryFill + 3, params.value1); - WriteGPURegister(GPU::Regs::MemoryFill + 4, cmd_buff[4] >> 3); // Start 2 - WriteGPURegister(GPU::Regs::MemoryFill + 5, cmd_buff[6] >> 3); // End 2 - WriteGPURegister(GPU::Regs::MemoryFill + 6, cmd_buff[6] - cmd_buff[4]); // Size 2 - WriteGPURegister(GPU::Regs::MemoryFill + 7, cmd_buff[5]); // Value 2 + WriteGPURegister(GPU::Regs::MemoryFill + 4, params.start2 >> 3); + WriteGPURegister(GPU::Regs::MemoryFill + 5, params.end2 >> 3); + WriteGPURegister(GPU::Regs::MemoryFill + 6, params.end2 - params.start2); + WriteGPURegister(GPU::Regs::MemoryFill + 7, params.value2); break; + } // TODO: Check if texture copies are implemented correctly.. case GXCommandId::SET_DISPLAY_TRANSFER: case GXCommandId::SET_TEXTURE_COPY: - WriteGPURegister(GPU::Regs::DisplayTransfer, cmd_buff[1] >> 3); // input buffer address - WriteGPURegister(GPU::Regs::DisplayTransfer + 1, cmd_buff[2] >> 3); // output buffer address - WriteGPURegister(GPU::Regs::DisplayTransfer + 3, cmd_buff[3]); // input buffer size - WriteGPURegister(GPU::Regs::DisplayTransfer + 2, cmd_buff[4]); // output buffer size - WriteGPURegister(GPU::Regs::DisplayTransfer + 4, cmd_buff[5]); // transfer flags + { + auto& params = command.image_copy; + WriteGPURegister(GPU::Regs::DisplayTransfer, params.in_buffer_address >> 3); + WriteGPURegister(GPU::Regs::DisplayTransfer + 1, params.out_buffer_address >> 3); + WriteGPURegister(GPU::Regs::DisplayTransfer + 3, params.in_buffer_size); + WriteGPURegister(GPU::Regs::DisplayTransfer + 2, params.out_buffer_size); + WriteGPURegister(GPU::Regs::DisplayTransfer + 4, params.flags); // TODO: Should this only be ORed with 1 for texture copies? - WriteGPURegister(GPU::Regs::DisplayTransfer + 6, 1); // trigger transfer + // trigger transfer + WriteGPURegister(GPU::Regs::DisplayTransfer + 6, 1); break; + } case GXCommandId::SET_COMMAND_LIST_FIRST: { - //u32* buf0_data = (u32*)Memory::GetPointer(cmd_buff[1]); - //u32* buf1_data = (u32*)Memory::GetPointer(cmd_buff[3]); - //u32* buf2_data = (u32*)Memory::GetPointer(cmd_buff[5]); + // TODO break; } default: - ERROR_LOG(GSP, "unknown command 0x%08X", cmd_buff[0]); + ERROR_LOG(GSP, "unknown command 0x%08X", (int)command.id.Value()); } GX_FinishCommand(g_thread_id); diff --git a/src/core/hle/service/gsp.h b/src/core/hle/service/gsp.h index fb50a928ae..f36afb697e 100644 --- a/src/core/hle/service/gsp.h +++ b/src/core/hle/service/gsp.h @@ -4,6 +4,7 @@ #pragma once +#include "common/bit_field.h" #include "core/hle/service/service.h" //////////////////////////////////////////////////////////////////////////////////////////////////// @@ -12,21 +13,50 @@ namespace GSP_GPU { enum class GXCommandId : u32 { - REQUEST_DMA = 0x00000000, - SET_COMMAND_LIST_LAST = 0x00000001, - SET_MEMORY_FILL = 0x01000102, // TODO: Confirm? - SET_DISPLAY_TRANSFER = 0x00000003, - SET_TEXTURE_COPY = 0x00000004, - SET_COMMAND_LIST_FIRST = 0x00000005, + REQUEST_DMA = 0x00, + SET_COMMAND_LIST_LAST = 0x01, + SET_MEMORY_FILL = 0x02, + SET_DISPLAY_TRANSFER = 0x03, + SET_TEXTURE_COPY = 0x04, + SET_COMMAND_LIST_FIRST = 0x05, }; -union GXCommand { - struct { - GXCommandId id; +struct GXCommand { + BitField<0, 8, GXCommandId> id; + + union { + struct { + u32 source_address; + u32 dest_address; + u32 size; + } dma_request; + + struct { + u32 address; + u32 size; + } set_command_list_last; + + struct { + u32 start1; + u32 value1; + u32 end1; + u32 start2; + u32 value2; + u32 end2; + } memory_fill; + + struct { + u32 in_buffer_address; + u32 out_buffer_address; + u32 in_buffer_size; + u32 out_buffer_size; + u32 flags; + } image_copy; + + u8 raw_data[0x1C]; }; - - u32 data[0x20]; }; +static_assert(sizeof(GXCommand) == 0x20, "GXCommand struct has incorrect size"); /// Interface to "srv:" service class Interface : public Service::Interface { diff --git a/src/video_core/gpu_debugger.h b/src/video_core/gpu_debugger.h index ca1fb22d79..d92ceaa726 100644 --- a/src/video_core/gpu_debugger.h +++ b/src/video_core/gpu_debugger.h @@ -50,7 +50,7 @@ public: virtual void GXCommandProcessed(int total_command_count) { const GSP_GPU::GXCommand& cmd = observed->ReadGXCommandHistory(total_command_count-1); - ERROR_LOG(GSP, "Received command: id=%x", cmd.id); + ERROR_LOG(GSP, "Received command: id=%x", (int)cmd.id.Value()); } /** @@ -84,8 +84,7 @@ public: gx_command_history.push_back(GSP_GPU::GXCommand()); GSP_GPU::GXCommand& cmd = gx_command_history[gx_command_history.size()-1]; - const int cmd_length = sizeof(GSP_GPU::GXCommand); - memcpy(cmd.data, command_data, cmd_length); + memcpy(&cmd, command_data, sizeof(GSP_GPU::GXCommand)); ForEachObserver([this](DebuggerObserver* observer) { observer->GXCommandProcessed(this->gx_command_history.size()); From 4b141791ed8bb4c1d80b239a1195897876fa30bb Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Tue, 22 Jul 2014 13:04:16 +0200 Subject: [PATCH 18/22] GSP: Add a few comments. --- src/core/hle/service/gsp.cpp | 8 +++++++- src/core/hle/service/gsp.h | 8 ++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/src/core/hle/service/gsp.cpp b/src/core/hle/service/gsp.cpp index 05753fa2c4..b20203e278 100644 --- a/src/core/hle/service/gsp.cpp +++ b/src/core/hle/service/gsp.cpp @@ -171,6 +171,9 @@ void TriggerCmdReqQueue(Service::Interface* self) { command.dma_request.size); break; + // ctrulib homebrew sends all relevant command list data with this command, + // hence we do all "interesting" stuff here and do nothing in SET_COMMAND_LIST_FIRST. + // TODO: This will need some rework in the future. case GXCommandId::SET_COMMAND_LIST_LAST: { auto& params = command.set_command_list_last; @@ -186,6 +189,8 @@ void TriggerCmdReqQueue(Service::Interface* self) { break; } + // It's assumed that the two "blocks" behave equivalently. + // Presumably this is done simply to allow two memory fills to run in parallel. case GXCommandId::SET_MEMORY_FILL: { auto& params = command.memory_fill; @@ -218,9 +223,10 @@ void TriggerCmdReqQueue(Service::Interface* self) { break; } + // TODO: Figure out what exactly SET_COMMAND_LIST_FIRST and SET_COMMAND_LIST_LAST + // are supposed to do. case GXCommandId::SET_COMMAND_LIST_FIRST: { - // TODO break; } diff --git a/src/core/hle/service/gsp.h b/src/core/hle/service/gsp.h index f36afb697e..a83cb48465 100644 --- a/src/core/hle/service/gsp.h +++ b/src/core/hle/service/gsp.h @@ -15,9 +15,17 @@ namespace GSP_GPU { enum class GXCommandId : u32 { REQUEST_DMA = 0x00, SET_COMMAND_LIST_LAST = 0x01, + + // Fills a given memory range with a particular value SET_MEMORY_FILL = 0x02, + + // Copies an image and optionally performs color-conversion or scaling. + // This is highly similar to the GameCube's EFB copy feature SET_DISPLAY_TRANSFER = 0x03, + + // Conceptionally similar to SET_DISPLAY_TRANSFER and presumable uses the same hardware path SET_TEXTURE_COPY = 0x04, + SET_COMMAND_LIST_FIRST = 0x05, }; From 61e2ffd4483bf2da0862e32449caa9f1cecc5b72 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Tue, 22 Jul 2014 13:21:57 +0200 Subject: [PATCH 19/22] GPU: Add documentation. --- src/core/hw/gpu.h | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/src/core/hw/gpu.h b/src/core/hw/gpu.h index 4ef0a047fb..3078e41423 100644 --- a/src/core/hw/gpu.h +++ b/src/core/hw/gpu.h @@ -84,11 +84,14 @@ struct Regs::Struct { union { u32 active_fb; + // 0: Use parameters ending with "1" + // 1: Use parameters ending with "2" BitField<0, 1, u32> second_fb_active; }; u32 pad2[5]; + // Distance between two pixel rows, in bytes u32 stride; u32 address_right1; @@ -132,23 +135,32 @@ struct Regs::Struct { union { u32 flags; - BitField< 0, 1, u32> flip_data; + BitField< 0, 1, u32> flip_data; // flips input data horizontally (TODO) if true BitField< 8, 3, Format> input_format; BitField<12, 3, Format> output_format; - BitField<16, 1, u32> output_tiled; + BitField<16, 1, u32> output_tiled; // stores output in a tiled format }; u32 unknown; + + // it seems that writing to this field triggers the display transfer u32 trigger; }; static_assert(sizeof(Regs::Struct) == 0x1C, "Structure size and register block length don't match"); template<> struct Regs::Struct { + // command list size u32 size; + u32 pad0; + + // command list address u32 address; + u32 pad1; + + // it seems that writing to this field triggers command list processing u32 trigger; }; static_assert(sizeof(Regs::Struct) == 0x14, "Structure size and register block length don't match"); From 2eb61dafc0c957ad1591150ff1f8cd002b8851bb Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Tue, 22 Jul 2014 13:29:25 +0200 Subject: [PATCH 20/22] GPU: Clarify display transfer code. Also makes the illogical component order more obvious. --- src/core/hw/gpu.cpp | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/src/core/hw/gpu.cpp b/src/core/hw/gpu.cpp index edffa25c57..d18ff76250 100644 --- a/src/core/hw/gpu.cpp +++ b/src/core/hw/gpu.cpp @@ -157,17 +157,19 @@ inline void Write(u32 addr, const T data) { for (int y = 0; y < config.output_height; ++y) { // TODO: Why does the register seem to hold twice the framebuffer width? for (int x = 0; x < config.output_width / 2; ++x) { - int source[4] = { 0, 0, 0, 0}; // rgba; + struct { + int r, g, b, a; + } source_color = { 0, 0, 0, 0 }; switch (config.input_format) { case Regs::FramebufferFormat::RGBA8: { // TODO: Most likely got the component order messed up. u8* srcptr = source_pointer + x * 4 + y * config.input_width * 4 / 2; - source[0] = srcptr[0]; // blue - source[1] = srcptr[1]; // green - source[2] = srcptr[2]; // red - source[3] = srcptr[3]; // alpha + source_color.r = srcptr[0]; // blue + source_color.g = srcptr[1]; // green + source_color.b = srcptr[2]; // red + source_color.a = srcptr[3]; // alpha break; } @@ -181,19 +183,20 @@ inline void Write(u32 addr, const T data) { { // TODO: Untested u8* dstptr = (u32*)(dest_pointer + x * 4 + y * config.output_width * 4); - dstptr[0] = source[0]; - dstptr[1] = source[1]; - dstptr[2] = source[2]; - dstptr[3] = source[3]; + dstptr[0] = source_color.r; + dstptr[1] = source_color.g; + dstptr[2] = source_color.b; + dstptr[3] = source_color.a; break; }*/ case Regs::FramebufferFormat::RGB8: { + // TODO: Most likely got the component order messed up. u8* dstptr = dest_pointer + x * 3 + y * config.output_width * 3 / 2; - dstptr[0] = source[0]; // blue - dstptr[1] = source[1]; // green - dstptr[2] = source[2]; // red + dstptr[0] = source_color.r; // blue + dstptr[1] = source_color.g; // green + dstptr[2] = source_color.b; // red break; } From afcb250b3140fa2f37efa800f5346aabbde5db2a Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Tue, 22 Jul 2014 13:49:25 +0200 Subject: [PATCH 21/22] Fix a few warnings. Templates shouldn't be marked as inline if they aren't defined in the header. --- src/core/hle/config_mem.h | 6 +++--- src/core/hw/gpu.h | 4 ++-- src/core/hw/hw.h | 4 ++-- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/core/hle/config_mem.h b/src/core/hle/config_mem.h index da396a3e6d..fa01b5cdb7 100644 --- a/src/core/hle/config_mem.h +++ b/src/core/hle/config_mem.h @@ -1,10 +1,10 @@ // Copyright 2014 Citra Emulator Project // Licensed under GPLv2 -// Refer to the license.txt file included. +// Refer to the license.txt file included. #pragma once -// Configuration memory stores various hardware/kernel configuration settings. This memory page is +// Configuration memory stores various hardware/kernel configuration settings. This memory page is // read-only for ARM11 processes. I'm guessing this would normally be written to by the firmware/ // bootrom. Because we're not emulating this, and essentially just "stubbing" the functionality, I'm // putting this as a subset of HLE for now. @@ -16,6 +16,6 @@ namespace ConfigMem { template -inline void Read(T &var, const u32 addr); +void Read(T &var, const u32 addr); } // namespace diff --git a/src/core/hw/gpu.h b/src/core/hw/gpu.h index 3078e41423..42f18a0e72 100644 --- a/src/core/hw/gpu.h +++ b/src/core/hw/gpu.h @@ -235,10 +235,10 @@ u32 GetFramebufferAddr(const u32 address); FramebufferLocation GetFramebufferLocation(u32 address); template -inline void Read(T &var, const u32 addr); +void Read(T &var, const u32 addr); template -inline void Write(u32 addr, const T data); +void Write(u32 addr, const T data); /// Update hardware void Update(); diff --git a/src/core/hw/hw.h b/src/core/hw/hw.h index 92e9304cac..1055ed94fc 100644 --- a/src/core/hw/hw.h +++ b/src/core/hw/hw.h @@ -9,10 +9,10 @@ namespace HW { template -inline void Read(T &var, const u32 addr); +void Read(T &var, const u32 addr); template -inline void Write(u32 addr, const T data); +void Write(u32 addr, const T data); /// Update hardware void Update(); From 9fd2537e933b5d36c898d662e29ea57f7ce61e49 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Tue, 22 Jul 2014 23:07:32 +0200 Subject: [PATCH 22/22] Use uniform formatting when printing hexadecimal numbers. --- src/core/hw/gpu.cpp | 6 +++--- src/video_core/renderer_opengl/renderer_opengl.cpp | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/core/hw/gpu.cpp b/src/core/hw/gpu.cpp index d18ff76250..c00be2a83b 100644 --- a/src/core/hw/gpu.cpp +++ b/src/core/hw/gpu.cpp @@ -142,7 +142,7 @@ inline void Write(u32 addr, const T data) { for (u32* ptr = start; ptr < end; ++ptr) *ptr = bswap32(config.value); // TODO: This is just a workaround to missing framebuffer format emulation - DEBUG_LOG(GPU, "MemoryFill from %x to %x", config.GetStartAddress(), config.GetEndAddress()); + DEBUG_LOG(GPU, "MemoryFill from 0x%08x to 0x%08x", config.GetStartAddress(), config.GetEndAddress()); } break; } @@ -207,7 +207,7 @@ inline void Write(u32 addr, const T data) { } } - DEBUG_LOG(GPU, "DisplayTriggerTransfer: %x bytes from %x(%xx%x)-> %x(%xx%x), dst format %x", + DEBUG_LOG(GPU, "DisplayTriggerTransfer: 0x%08x bytes from 0x%08x(%dx%d)-> 0x%08x(%dx%d), dst format %x", config.output_height * config.output_width * 4, config.GetPhysicalInputAddress(), (int)config.input_width, (int)config.input_height, config.GetPhysicalOutputAddress(), (int)config.output_width, (int)config.output_height, @@ -222,7 +222,7 @@ inline void Write(u32 addr, const T data) { if (config.trigger & 1) { // u32* buffer = (u32*)Memory::GetPointer(config.address << 3); - ERROR_LOG(GPU, "Beginning %x bytes of commands from address %x", config.size, config.address << 3); + ERROR_LOG(GPU, "Beginning 0x%08x bytes of commands from address 0x%08x", config.size, config.address << 3); // TODO: Process command list! } break; diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 50f820e4aa..d0a8ec1daf 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -87,7 +87,7 @@ void RendererOpenGL::RenderXFB(const common::Rect& src_rect, const common::Rect& ? framebuffer_sub.address_left2 : framebuffer_sub.address_left1; - DEBUG_LOG(GPU, "RenderXFB: %x bytes from %x(%xx%x), fmt %x", + DEBUG_LOG(GPU, "RenderXFB: 0x%08x bytes from 0x%08x(%dx%d), fmt %x", framebuffer_top.stride * framebuffer_top.height, GPU::GetFramebufferAddr(active_fb_top), (int)framebuffer_top.width, (int)framebuffer_top.height, (int)framebuffer_top.format);