From 80033b84cbb1bd71082b62b45c68fdce89695987 Mon Sep 17 00:00:00 2001
From: GPUCode <47210458+GPUCode@users.noreply.github.com>
Date: Fri, 7 Jul 2023 03:15:59 +0300
Subject: [PATCH] renderer_software: Fix screen rendering (#6664)

---
 .../renderer_software/renderer_software.cpp   | 32 ++++++++-----------
 .../renderer_software/renderer_software.h     |  2 +-
 .../renderer_software/sw_rasterizer.cpp       | 16 ++++------
 .../renderer_software/sw_rasterizer.h         | 12 ++++---
 4 files changed, 28 insertions(+), 34 deletions(-)

diff --git a/src/video_core/renderer_software/renderer_software.cpp b/src/video_core/renderer_software/renderer_software.cpp
index b423ae5ce..7194be8e1 100644
--- a/src/video_core/renderer_software/renderer_software.cpp
+++ b/src/video_core/renderer_software/renderer_software.cpp
@@ -23,10 +23,8 @@ void RendererSoftware::SwapBuffers() {
 }
 
 void RendererSoftware::PrepareRenderTarget() {
-    for (int i : {0, 1, 2}) {
+    for (u32 i = 0; i < 3; i++) {
         const int fb_id = i == 2 ? 1 : 0;
-        const auto& framebuffer = GPU::g_regs.framebuffer_config[fb_id];
-        auto& info = screen_infos[i];
 
         u32 lcd_color_addr =
             (fb_id == 0) ? LCD_REG_INDEX(color_fill_top) : LCD_REG_INDEX(color_fill_bottom);
@@ -35,33 +33,29 @@ void RendererSoftware::PrepareRenderTarget() {
         LCD::Read(color_fill.raw, lcd_color_addr);
 
         if (!color_fill.is_enabled) {
-            const u32 old_width = std::exchange(info.width, framebuffer.width);
-            const u32 old_height = std::exchange(info.height, framebuffer.height);
-            if (framebuffer.width != old_width || framebuffer.height != old_height) [[unlikely]] {
-                info.pixels.resize(framebuffer.width * framebuffer.height * 4);
-            }
-            CopyPixels(i);
+            LoadFBToScreenInfo(i);
         }
     }
 }
 
-void RendererSoftware::CopyPixels(int i) {
+void RendererSoftware::LoadFBToScreenInfo(int i) {
     const u32 fb_id = i == 2 ? 1 : 0;
     const auto& framebuffer = GPU::g_regs.framebuffer_config[fb_id];
+    auto& info = screen_infos[i];
 
     const PAddr framebuffer_addr =
         framebuffer.active_fb == 0 ? framebuffer.address_left1 : framebuffer.address_left2;
     const s32 bpp = GPU::Regs::BytesPerPixel(framebuffer.color_format);
     const u8* framebuffer_data = memory.GetPhysicalPointer(framebuffer_addr);
 
-    const s32 stride = framebuffer.stride;
-    const s32 height = framebuffer.height;
-    ASSERT(stride * height != 0);
+    const s32 pixel_stride = framebuffer.stride / bpp;
+    info.height = framebuffer.height;
+    info.width = pixel_stride;
+    info.pixels.resize(info.width * info.height * 4);
 
-    u32 output_offset = 0;
-    for (u32 y = 0; y < framebuffer.height; y++) {
-        for (u32 x = 0; x < framebuffer.width; x++) {
-            const u8* pixel = framebuffer_data + (y * stride + x) * bpp;
+    for (u32 y = 0; y < info.height; y++) {
+        for (u32 x = 0; x < info.width; x++) {
+            const u8* pixel = framebuffer_data + (y * pixel_stride + pixel_stride - x) * bpp;
             const Common::Vec4 color = [&] {
                 switch (framebuffer.color_format) {
                 case GPU::Regs::PixelFormat::RGBA8:
@@ -77,9 +71,9 @@ void RendererSoftware::CopyPixels(int i) {
                 }
                 UNREACHABLE();
             }();
-            u8* dest = screen_infos[i].pixels.data() + output_offset;
+            const u32 output_offset = (x * info.height + y) * 4;
+            u8* dest = info.pixels.data() + output_offset;
             std::memcpy(dest, color.AsArray(), sizeof(color));
-            output_offset += sizeof(color);
         }
     }
 }
diff --git a/src/video_core/renderer_software/renderer_software.h b/src/video_core/renderer_software/renderer_software.h
index 1c17e321b..6ed86ffa3 100644
--- a/src/video_core/renderer_software/renderer_software.h
+++ b/src/video_core/renderer_software/renderer_software.h
@@ -38,7 +38,7 @@ public:
 
 private:
     void PrepareRenderTarget();
-    void CopyPixels(int i);
+    void LoadFBToScreenInfo(int i);
 
 private:
     Memory::MemorySystem& memory;
diff --git a/src/video_core/renderer_software/sw_rasterizer.cpp b/src/video_core/renderer_software/sw_rasterizer.cpp
index d4c26e496..52d172e6e 100644
--- a/src/video_core/renderer_software/sw_rasterizer.cpp
+++ b/src/video_core/renderer_software/sw_rasterizer.cpp
@@ -399,8 +399,9 @@ void RasterizerSoftware::ProcessTriangle(const Vertex& v0, const Vertex& v1, con
             const f24 tc0_w = get_interpolated_attribute(v0.tc0_w, v1.tc0_w, v2.tc0_w);
             const auto texture_color = TextureColor(uv, textures, tc0_w);
 
-            Common::Vec4<u8> primary_fragment_color{0, 0, 0, 0};
-            Common::Vec4<u8> secondary_fragment_color{0, 0, 0, 0};
+            Common::Vec4<u8> primary_fragment_color = {0, 0, 0, 0};
+            Common::Vec4<u8> secondary_fragment_color = {0, 0, 0, 0};
+
             if (!regs.lighting.disable) {
                 const auto normquat =
                     Common::Quaternion<f32>{
@@ -421,9 +422,8 @@ void RasterizerSoftware::ProcessTriangle(const Vertex& v0, const Vertex& v1, con
             }
 
             // Write the TEV stages.
-            Common::Vec4<u8> combiner_output =
-                WriteTevConfig(texture_color, tev_stages, primary_color, primary_fragment_color,
-                               secondary_fragment_color);
+            WriteTevConfig(texture_color, tev_stages, primary_color, primary_fragment_color,
+                           secondary_fragment_color);
 
             const auto& output_merger = regs.framebuffer.output_merger;
             if (output_merger.fragment_operation_mode ==
@@ -663,11 +663,11 @@ Common::Vec4<u8> RasterizerSoftware::PixelColor(u16 x, u16 y,
     return result;
 }
 
-Common::Vec4<u8> RasterizerSoftware::WriteTevConfig(
+void RasterizerSoftware::WriteTevConfig(
     std::span<const Common::Vec4<u8>, 4> texture_color,
     std::span<const Pica::TexturingRegs::TevStageConfig, 6> tev_stages,
     Common::Vec4<u8> primary_color, Common::Vec4<u8> primary_fragment_color,
-    Common::Vec4<u8> secondary_fragment_color) const {
+    Common::Vec4<u8> secondary_fragment_color) {
     /**
      * Texture environment - consists of 6 stages of color and alpha combining.
      * Color combiners take three input color values from some source (e.g. interpolated
@@ -676,7 +676,6 @@ Common::Vec4<u8> RasterizerSoftware::WriteTevConfig(
      * with some basic arithmetic. Alpha combiners can be configured separately but work
      * analogously.
      **/
-    Common::Vec4<u8> combiner_output;
     Common::Vec4<u8> combiner_buffer = {0, 0, 0, 0};
     Common::Vec4<u8> next_combiner_buffer =
         Common::MakeVec(regs.texturing.tev_combiner_buffer_color.r.Value(),
@@ -767,7 +766,6 @@ Common::Vec4<u8> RasterizerSoftware::WriteTevConfig(
             next_combiner_buffer.a() = combiner_output.a();
         }
     }
-    return combiner_output;
 }
 
 void RasterizerSoftware::WriteFog(Common::Vec4<u8>& combiner_output, float depth) const {
diff --git a/src/video_core/renderer_software/sw_rasterizer.h b/src/video_core/renderer_software/sw_rasterizer.h
index f31f60526..919d862fc 100644
--- a/src/video_core/renderer_software/sw_rasterizer.h
+++ b/src/video_core/renderer_software/sw_rasterizer.h
@@ -55,11 +55,10 @@ private:
     Common::Vec4<u8> PixelColor(u16 x, u16 y, Common::Vec4<u8>& combiner_output) const;
 
     /// Emulates the TEV configuration and returns the combiner output.
-    Common::Vec4<u8> WriteTevConfig(
-        std::span<const Common::Vec4<u8>, 4> texture_color,
-        std::span<const Pica::TexturingRegs::TevStageConfig, 6> tev_stages,
-        Common::Vec4<u8> primary_color, Common::Vec4<u8> primary_fragment_color,
-        Common::Vec4<u8> secondary_fragment_color) const;
+    void WriteTevConfig(std::span<const Common::Vec4<u8>, 4> texture_color,
+                        std::span<const Pica::TexturingRegs::TevStageConfig, 6> tev_stages,
+                        Common::Vec4<u8> primary_color, Common::Vec4<u8> primary_fragment_color,
+                        Common::Vec4<u8> secondary_fragment_color);
 
     /// Blends fog to the combiner output if enabled.
     void WriteFog(Common::Vec4<u8>& combiner_output, float depth) const;
@@ -75,6 +74,9 @@ private:
     Pica::State& state;
     const Pica::Regs& regs;
     Framebuffer fb;
+    // Kirby Blowout Blast relies on the combiner output of a previous draw
+    // in order to render the sky correctly.
+    Common::Vec4<u8> combiner_output{};
 };
 
 } // namespace SwRenderer