From e544cb3cf6702fdec9ce2edc9f601a9e37d91153 Mon Sep 17 00:00:00 2001
From: Maufeat <sahyno1996@gmail.com>
Date: Sat, 7 Feb 2026 22:59:38 +0100
Subject: [PATCH] [nce] add split patch mode to support modules bigger than
 128MB (#3489)

nce patcher was extended to support modules larger than 128MB due to ARM64 branch limit. now added a pre-patch and (existing) post-patch module code. Allowwing MRS/MSR/SVC handler to remain within branch branch range

Reviewed-on: https://git.eden-emu.dev/eden-emu/eden/pulls/3489
Reviewed-by: MaranBr <maranbr@eden-emu.dev>
Reviewed-by: CamilleLaVey <camillelavey99@gmail.com>
Co-authored-by: Maufeat <sahyno1996@gmail.com>
Co-committed-by: Maufeat <sahyno1996@gmail.com>
---
 src/core/arm/nce/patcher.cpp      | 479 ++++++++++++++++++++----------
 src/core/arm/nce/patcher.h        |  54 +++-
 src/core/hle/kernel/code_set.h    |  12 +
 src/core/hle/kernel/k_process.cpp |  11 +-
 src/core/loader/nso.cpp           |  36 ++-
 5 files changed, 424 insertions(+), 168 deletions(-)
diff --git a/src/core/arm/nce/patcher.cpp b/src/core/arm/nce/patcher.cpp
index 28862c80b9..3a620bff12 100644
--- a/src/core/arm/nce/patcher.cpp
+++ b/src/core/arm/nce/patcher.cpp
@@ -1,4 +1,4 @@
-// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
+// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
 // SPDX-License-Identifier: GPL-3.0-or-later
 
 #include <numeric>
@@ -20,9 +20,13 @@ namespace Core::NCE {
 Patcher::Patcher(Patcher&& other) noexcept
     : patch_cache(std::move(other.patch_cache)),
       m_patch_instructions(std::move(other.m_patch_instructions)),
+      m_patch_instructions_pre(std::move(other.m_patch_instructions_pre)),
       c(m_patch_instructions),
+      c_pre(m_patch_instructions_pre),
       m_save_context(other.m_save_context),
       m_load_context(other.m_load_context),
+      m_save_context_pre(other.m_save_context_pre),
+      m_load_context_pre(other.m_load_context_pre),
       mode(other.mode),
       total_program_size(other.total_program_size),
       m_relocate_module_index(other.m_relocate_module_index),
@@ -42,20 +46,25 @@ using NativeExecutionParameters = Kernel::KThread::NativeExecutionParameters;
 constexpr size_t MaxRelativeBranch = 128_MiB;
 constexpr u32 ModuleCodeIndex = 0x24 / sizeof(u32);
 
-Patcher::Patcher() : c(m_patch_instructions) {
+Patcher::Patcher() : c(m_patch_instructions), c_pre(m_patch_instructions_pre) {
     LOG_WARNING(Core_ARM, "Patcher initialized with LRU cache {}",
         patch_cache.isEnabled() ? "enabled" : "disabled");
     // The first word of the patch section is always a branch to the first instruction of the
     // module.
     c.dw(0);
+    c_pre.dw(0);
 
     // Write save context helper function.
     c.l(m_save_context);
     WriteSaveContext();
+    c_pre.l(m_save_context_pre);
+    WriteSaveContext(c_pre);
 
     // Write load context helper function.
     c.l(m_load_context);
     WriteLoadContext();
+    c_pre.l(m_load_context_pre);
+    WriteLoadContext(c_pre);
 }
 
 Patcher::~Patcher() = default;
@@ -64,7 +73,16 @@ bool Patcher::PatchText(const Kernel::PhysicalMemory& program_image,
                         const Kernel::CodeSet::Segment& code) {
     // If we have patched modules but cannot reach the new module, then it needs its own patcher.
     const size_t image_size = program_image.size();
-    if (total_program_size + image_size > MaxRelativeBranch && total_program_size > 0) {
+
+    // Check if we need split mode for large modules. A64 max takes 128MB
+    // tests showed that, with update, some are larger. (In this case 208MB)
+    bool use_split = false;
+    if (image_size > MaxRelativeBranch) {
+        if (total_program_size > 0) {
+            return false;
+        }
+        use_split = true;
+    } else if (total_program_size + image_size > MaxRelativeBranch && total_program_size > 0) {
         return false;
     }
 
@@ -74,7 +92,12 @@ bool Patcher::PatchText(const Kernel::PhysicalMemory& program_image,
 
     // The first word of the patch section is always a branch to the first instruction of the
     // module.
-    curr_patch->m_branch_to_module_relocations.push_back({0, 0});
+    if (use_split) {
+        // curr_patch->m_branch_to_module_relocations.push_back({0, 0});
+        curr_patch->m_branch_to_module_relocations_pre.push_back({0, 0});
+    } else {
+        curr_patch->m_branch_to_module_relocations.push_back({0, 0});
+    }
 
     // Retrieve text segment data.
     const auto text = std::span{program_image}.subspan(code.offset, code.size);
@@ -85,12 +108,18 @@ bool Patcher::PatchText(const Kernel::PhysicalMemory& program_image,
     for (u32 i = ModuleCodeIndex; i < static_cast<u32>(text_words.size()); i++) {
         const u32 inst = text_words[i];
 
-        const auto AddRelocations = [&] {
+        const auto AddRelocations = [&](bool& pre_buffer) {
             const uintptr_t this_offset = i * sizeof(u32);
             const uintptr_t next_offset = this_offset + sizeof(u32);
 
-            // Relocate from here to patch.
-            this->BranchToPatch(this_offset);
+            pre_buffer = use_split && (this_offset < MaxRelativeBranch);
+
+            // Relocate to pre- or post-patch
+            if (pre_buffer) {
+                this->BranchToPatchPre(this_offset);
+            } else {
+                this->BranchToPatch(this_offset);
+            }
 
             // Relocate from patch to next instruction.
             return next_offset;
@@ -98,7 +127,13 @@ bool Patcher::PatchText(const Kernel::PhysicalMemory& program_image,
 
         // SVC
         if (auto svc = SVC{inst}; svc.Verify()) {
-            WriteSvcTrampoline(AddRelocations(), svc.GetValue());
+            bool pre_buffer = false;
+            auto ret = AddRelocations(pre_buffer);
+            if (pre_buffer) {
+                WriteSvcTrampoline(ret, svc.GetValue(), c_pre, m_save_context_pre, m_load_context_pre);
+            } else {
+                WriteSvcTrampoline(ret, svc.GetValue(), c, m_save_context, m_load_context);
+            }
             continue;
         }
 
@@ -109,13 +144,25 @@ bool Patcher::PatchText(const Kernel::PhysicalMemory& program_image,
             const auto src_reg = mrs.GetSystemReg() == TpidrroEl0 ? oaknut::SystemReg::TPIDRRO_EL0
                                                                   : oaknut::SystemReg::TPIDR_EL0;
             const auto dest_reg = oaknut::XReg{static_cast<int>(mrs.GetRt())};
-            WriteMrsHandler(AddRelocations(), dest_reg, src_reg);
+            bool pre_buffer = false;
+            auto ret = AddRelocations(pre_buffer);
+            if (pre_buffer) {
+                WriteMrsHandler(ret, dest_reg, src_reg, c_pre);
+            } else {
+                WriteMrsHandler(ret, dest_reg, src_reg, c);
+            }
             continue;
         }
 
         // MRS Xn, CNTPCT_EL0
         if (auto mrs = MRS{inst}; mrs.Verify() && mrs.GetSystemReg() == CntpctEl0) {
-            WriteCntpctHandler(AddRelocations(), oaknut::XReg{static_cast<int>(mrs.GetRt())});
+            bool pre_buffer = false;
+            auto ret = AddRelocations(pre_buffer);
+            if (pre_buffer) {
+                WriteCntpctHandler(ret, oaknut::XReg{static_cast<int>(mrs.GetRt())}, c_pre);
+            } else {
+                WriteCntpctHandler(ret, oaknut::XReg{static_cast<int>(mrs.GetRt())}, c);
+            }
             continue;
         }
 
@@ -126,7 +173,13 @@ bool Patcher::PatchText(const Kernel::PhysicalMemory& program_image,
 
         // MSR TPIDR_EL0, Xn
         if (auto msr = MSR{inst}; msr.Verify() && msr.GetSystemReg() == TpidrEl0) {
-            WriteMsrHandler(AddRelocations(), oaknut::XReg{static_cast<int>(msr.GetRt())});
+            bool pre_buffer = false;
+            auto ret = AddRelocations(pre_buffer);
+             if (pre_buffer) {
+                WriteMsrHandler(ret, oaknut::XReg{static_cast<int>(msr.GetRt())}, c_pre);
+            } else {
+                WriteMsrHandler(ret, oaknut::XReg{static_cast<int>(msr.GetRt())}, c);
+            }
             continue;
         }
 
@@ -137,7 +190,11 @@ bool Patcher::PatchText(const Kernel::PhysicalMemory& program_image,
 
     // Determine patching mode for the final relocation step
     total_program_size += image_size;
-    this->mode = image_size > MaxRelativeBranch ? PatchMode::PreText : PatchMode::PostData;
+    if (use_split) {
+        this->mode = PatchMode::Split;
+    } else {
+        this->mode = image_size > MaxRelativeBranch ? PatchMode::PreText : PatchMode::PostData;
+    }
     return true;
 }
 
@@ -146,7 +203,9 @@ bool Patcher::RelocateAndCopy(Common::ProcessAddress load_base,
                               Kernel::PhysicalMemory& program_image,
                               EntryTrampolines* out_trampolines) {
     const size_t patch_size = GetSectionSize();
-    const size_t image_size = program_image.size();
+    const size_t pre_patch_size = GetPreSectionSize();
+
+    const size_t image_size = (mode == PatchMode::Split) ? program_image.size() - pre_patch_size : program_image.size();
 
     // Retrieve text segment data.
     const auto text = std::span{program_image}.subspan(code.offset, code.size);
@@ -162,6 +221,16 @@ bool Patcher::RelocateAndCopy(Common::ProcessAddress load_base,
         }
     };
 
+    const auto ApplyBranchToPatchRelocationPre = [&](u32* target, const Relocation& rel) {
+        oaknut::CodeGenerator rc{target};
+        rc.B(static_cast<ptrdiff_t>(rel.patch_offset) - static_cast<ptrdiff_t>(pre_patch_size) - static_cast<ptrdiff_t>(rel.module_offset));
+    };
+
+    const auto ApplyBranchToPatchRelocationPostSplit = [&](u32* target, const Relocation& rel) {
+        oaknut::CodeGenerator rc{target};
+        rc.B(static_cast<ptrdiff_t>(image_size) + static_cast<ptrdiff_t>(rel.patch_offset) - static_cast<ptrdiff_t>(rel.module_offset));
+    };
+
     const auto ApplyBranchToModuleRelocation = [&](u32* target, const Relocation& rel) {
         oaknut::CodeGenerator rc{target};
         if (mode == PatchMode::PreText) {
@@ -171,6 +240,16 @@ bool Patcher::RelocateAndCopy(Common::ProcessAddress load_base,
         }
     };
 
+    const auto ApplyBranchToModuleRelocationPre = [&](u32* target, const Relocation& rel) {
+        oaknut::CodeGenerator rc{target};
+        rc.B(static_cast<ptrdiff_t>(pre_patch_size) + static_cast<ptrdiff_t>(rel.module_offset) - static_cast<ptrdiff_t>(rel.patch_offset));
+    };
+
+     const auto ApplyBranchToModuleRelocationPostSplit = [&](u32* target, const Relocation& rel) {
+        oaknut::CodeGenerator rc{target};
+        rc.B(static_cast<ptrdiff_t>(rel.module_offset) - static_cast<ptrdiff_t>(image_size) - static_cast<ptrdiff_t>(rel.patch_offset));
+    };
+
     const auto RebasePatch = [&](ptrdiff_t patch_offset) {
         if (mode == PatchMode::PreText) {
             return GetInteger(load_base) + patch_offset;
@@ -182,28 +261,87 @@ bool Patcher::RelocateAndCopy(Common::ProcessAddress load_base,
     const auto RebasePc = [&](uintptr_t module_offset) {
         if (mode == PatchMode::PreText) {
             return GetInteger(load_base) + patch_size + module_offset;
-        } else {
-            return GetInteger(load_base) + module_offset;
         }
+        if (mode == PatchMode::Split) {
+            return GetInteger(load_base) + pre_patch_size + module_offset;
+        }
+
+        return GetInteger(load_base) + module_offset;
     };
 
     // We are now ready to relocate!
     auto& patch = modules[m_relocate_module_index++];
-    for (const Relocation& rel : patch.m_branch_to_patch_relocations) {
-        ApplyBranchToPatchRelocation(text_words.data() + rel.module_offset / sizeof(u32), rel);
-    }
-    for (const Relocation& rel : patch.m_branch_to_module_relocations) {
-        ApplyBranchToModuleRelocation(m_patch_instructions.data() + rel.patch_offset / sizeof(u32),
-                                      rel);
-    }
 
-    // Rewrite PC constants and record post trampolines
-    for (const Relocation& rel : patch.m_write_module_pc_relocations) {
-        oaknut::CodeGenerator rc{m_patch_instructions.data() + rel.patch_offset / sizeof(u32)};
-        rc.dx(RebasePc(rel.module_offset));
+    if (mode == PatchMode::Split) {
+        for (const Relocation& rel : patch.m_branch_to_pre_patch_relocations) {
+            ApplyBranchToPatchRelocationPre(text_words.data() + rel.module_offset / sizeof(u32), rel);
+        }
+        LOG_DEBUG(Core_ARM, "applied Pre: {}", patch.m_branch_to_pre_patch_relocations.size());
+
+        for (const Relocation& rel : patch.m_branch_to_patch_relocations) {
+            ApplyBranchToPatchRelocationPostSplit(text_words.data() + rel.module_offset / sizeof(u32), rel);
+        }
+        LOG_DEBUG(Core_ARM, "applied Post: {}", patch.m_branch_to_patch_relocations.size());
+
+        for (const Relocation& rel : patch.m_branch_to_module_relocations_pre) {
+            ApplyBranchToModuleRelocationPre(m_patch_instructions_pre.data() + rel.patch_offset / sizeof(u32), rel);
+        }
+        LOG_DEBUG(Core_ARM, "aplied Pre-module {}", patch.m_branch_to_module_relocations_pre.size());
+
+        for (const Relocation& rel : patch.m_branch_to_module_relocations) {
+            ApplyBranchToModuleRelocationPostSplit(m_patch_instructions.data() + rel.patch_offset / sizeof(u32), rel);
+        }
+        LOG_DEBUG(Core_ARM, "applied Post-module {}", patch.m_branch_to_module_relocations.size());
+
+        // Pre
+        for (const Relocation& rel : patch.m_write_module_pc_relocations_pre) {
+            oaknut::CodeGenerator rc{m_patch_instructions_pre.data() + rel.patch_offset / sizeof(u32)};
+            rc.dx(RebasePc(rel.module_offset));
+        }
+        // Post
+        for (const Relocation& rel : patch.m_write_module_pc_relocations) {
+            oaknut::CodeGenerator rc{m_patch_instructions.data() + rel.patch_offset / sizeof(u32)};
+            rc.dx(RebasePc(rel.module_offset));
+        }
+
+        // Trampolines (split pre + post)
+        for (const Trampoline& rel : patch.m_trampolines_pre) {
+            out_trampolines->insert({RebasePc(rel.module_offset),
+                                     GetInteger(load_base) + rel.patch_offset});
+        }
+        for (const Trampoline& rel : patch.m_trampolines) {
+            out_trampolines->insert({RebasePc(rel.module_offset),
+                                     GetInteger(load_base) + pre_patch_size + image_size + rel.patch_offset});
+        }
+
+        if (!m_patch_instructions_pre.empty()) {
+            u32 insn = m_patch_instructions_pre[0];
+            if ((insn & 0xFC000000) == 0x14000000) {
+                s32 imm26 = insn & 0x3FFFFFF;
+                // Sign extend
+                if (imm26 & 0x2000000) imm26 |= 0xFC000000;
+            }
+        }
+    } else {
+        for (const Relocation& rel : patch.m_branch_to_patch_relocations) {
+            ApplyBranchToPatchRelocation(text_words.data() + rel.module_offset / sizeof(u32), rel);
+        }
+        for (const Relocation& rel : patch.m_branch_to_module_relocations) {
+            ApplyBranchToModuleRelocation(m_patch_instructions.data() + rel.patch_offset / sizeof(u32),
+                                          rel);
+        }
+
+        // Rewrite PC constants
+        for (const Relocation& rel : patch.m_write_module_pc_relocations) {
+            oaknut::CodeGenerator rc{m_patch_instructions.data() + rel.patch_offset / sizeof(u32)};
+            rc.dx(RebasePc(rel.module_offset));
+        }
     }
-    for (const Trampoline& rel : patch.m_trampolines) {
-        out_trampolines->insert({RebasePc(rel.module_offset), RebasePatch(rel.patch_offset)});
+
+    if (mode != PatchMode::Split) {
+        for (const Trampoline& rel : patch.m_trampolines) {
+            out_trampolines->insert({RebasePc(rel.module_offset), RebasePatch(rel.patch_offset)});
+        }
     }
 
     // Cortex-A57 seems to treat all exclusives as ordered, but newer processors do not.
@@ -223,6 +361,15 @@ bool Patcher::RelocateAndCopy(Common::ProcessAddress load_base,
             ASSERT(image_size == total_program_size);
             std::memcpy(program_image.data(), m_patch_instructions.data(),
                         m_patch_instructions.size() * sizeof(u32));
+        } else if (this->mode == PatchMode::Split) {
+            const size_t current_size = program_image.size();
+            program_image.resize(current_size + patch_size);
+            // Copy pre-patch buffer to the beginning
+            std::memcpy(program_image.data(), m_patch_instructions_pre.data(),
+                        m_patch_instructions_pre.size() * sizeof(u32));
+            // Same for post-patch buffer to the end
+            std::memcpy(program_image.data() + current_size, m_patch_instructions.data(),
+                        m_patch_instructions.size() * sizeof(u32));
         } else {
             program_image.resize(image_size + patch_size);
             std::memcpy(program_image.data() + image_size, m_patch_instructions.data(),
@@ -238,202 +385,225 @@ size_t Patcher::GetSectionSize() const noexcept {
     return Common::AlignUp(m_patch_instructions.size() * sizeof(u32), Core::Memory::YUZU_PAGESIZE);
 }
 
-void Patcher::WriteLoadContext() {
+size_t Patcher::GetPreSectionSize() const noexcept {
+    return Common::AlignUp(m_patch_instructions_pre.size() * sizeof(u32), Core::Memory::YUZU_PAGESIZE);
+}
+
+void Patcher::WriteLoadContext(oaknut::VectorCodeGenerator& cg) {
     // This function was called, which modifies X30, so use that as a scratch register.
     // SP contains the guest X30, so save our return X30 to SP + 8, since we have allocated 16 bytes
     // of stack.
-    c.STR(X30, SP, 8);
-    c.MRS(X30, oaknut::SystemReg::TPIDR_EL0);
-    c.LDR(X30, X30, offsetof(NativeExecutionParameters, native_context));
+    cg.STR(X30, SP, 8);
+    cg.MRS(X30, oaknut::SystemReg::TPIDR_EL0);
+    cg.LDR(X30, X30, offsetof(NativeExecutionParameters, native_context));
 
     // Load system registers.
-    c.LDR(W0, X30, offsetof(GuestContext, fpsr));
-    c.MSR(oaknut::SystemReg::FPSR, X0);
-    c.LDR(W0, X30, offsetof(GuestContext, fpcr));
-    c.MSR(oaknut::SystemReg::FPCR, X0);
-    c.LDR(W0, X30, offsetof(GuestContext, nzcv));
-    c.MSR(oaknut::SystemReg::NZCV, X0);
+    cg.LDR(W0, X30, offsetof(GuestContext, fpsr));
+    cg.MSR(oaknut::SystemReg::FPSR, X0);
+    cg.LDR(W0, X30, offsetof(GuestContext, fpcr));
+    cg.MSR(oaknut::SystemReg::FPCR, X0);
+    cg.LDR(W0, X30, offsetof(GuestContext, nzcv));
+    cg.MSR(oaknut::SystemReg::NZCV, X0);
 
     // Load all vector registers.
     static constexpr size_t VEC_OFF = offsetof(GuestContext, vector_registers);
     for (int i = 0; i <= 30; i += 2) {
-        c.LDP(oaknut::QReg{i}, oaknut::QReg{i + 1}, X30, VEC_OFF + 16 * i);
+        cg.LDP(oaknut::QReg{i}, oaknut::QReg{i + 1}, X30, VEC_OFF + 16 * i);
     }
 
     // Load all general-purpose registers except X30.
     for (int i = 0; i <= 28; i += 2) {
-        c.LDP(oaknut::XReg{i}, oaknut::XReg{i + 1}, X30, 8 * i);
+        cg.LDP(oaknut::XReg{i}, oaknut::XReg{i + 1}, X30, 8 * i);
     }
 
     // Reload our return X30 from the stack and return.
     // The patch code will reload the guest X30 for us.
-    c.LDR(X30, SP, 8);
-    c.RET();
+    cg.LDR(X30, SP, 8);
+    cg.RET();
 }
 
-void Patcher::WriteSaveContext() {
+void Patcher::WriteSaveContext(oaknut::VectorCodeGenerator& cg) {
     // This function was called, which modifies X30, so use that as a scratch register.
     // SP contains the guest X30, so save our X30 to SP + 8, since we have allocated 16 bytes of
     // stack.
-    c.STR(X30, SP, 8);
-    c.MRS(X30, oaknut::SystemReg::TPIDR_EL0);
-    c.LDR(X30, X30, offsetof(NativeExecutionParameters, native_context));
+    cg.STR(X30, SP, 8);
+    cg.MRS(X30, oaknut::SystemReg::TPIDR_EL0);
+    cg.LDR(X30, X30, offsetof(NativeExecutionParameters, native_context));
 
     // Store all general-purpose registers except X30.
     for (int i = 0; i <= 28; i += 2) {
-        c.STP(oaknut::XReg{i}, oaknut::XReg{i + 1}, X30, 8 * i);
+        cg.STP(oaknut::XReg{i}, oaknut::XReg{i + 1}, X30, 8 * i);
     }
 
     // Store all vector registers.
     static constexpr size_t VEC_OFF = offsetof(GuestContext, vector_registers);
     for (int i = 0; i <= 30; i += 2) {
-        c.STP(oaknut::QReg{i}, oaknut::QReg{i + 1}, X30, VEC_OFF + 16 * i);
+        cg.STP(oaknut::QReg{i}, oaknut::QReg{i + 1}, X30, VEC_OFF + 16 * i);
     }
 
     // Store guest system registers, X30 and SP, using X0 as a scratch register.
-    c.STR(X0, SP, PRE_INDEXED, -16);
-    c.LDR(X0, SP, 16);
-    c.STR(X0, X30, 8 * 30);
-    c.ADD(X0, SP, 32);
-    c.STR(X0, X30, offsetof(GuestContext, sp));
-    c.MRS(X0, oaknut::SystemReg::FPSR);
-    c.STR(W0, X30, offsetof(GuestContext, fpsr));
-    c.MRS(X0, oaknut::SystemReg::FPCR);
-    c.STR(W0, X30, offsetof(GuestContext, fpcr));
-    c.MRS(X0, oaknut::SystemReg::NZCV);
-    c.STR(W0, X30, offsetof(GuestContext, nzcv));
-    c.LDR(X0, SP, POST_INDEXED, 16);
+    cg.STR(X0, SP, PRE_INDEXED, -16);
+    cg.LDR(X0, SP, 16);
+    cg.STR(X0, X30, 8 * 30);
+    cg.ADD(X0, SP, 32);
+    cg.STR(X0, X30, offsetof(GuestContext, sp));
+    cg.MRS(X0, oaknut::SystemReg::FPSR);
+    cg.STR(W0, X30, offsetof(GuestContext, fpsr));
+    cg.MRS(X0, oaknut::SystemReg::FPCR);
+    cg.STR(W0, X30, offsetof(GuestContext, fpcr));
+    cg.MRS(X0, oaknut::SystemReg::NZCV);
+    cg.STR(W0, X30, offsetof(GuestContext, nzcv));
+    cg.LDR(X0, SP, POST_INDEXED, 16);
 
     // Reload our return X30 from the stack, and return.
-    c.LDR(X30, SP, 8);
-    c.RET();
+    cg.LDR(X30, SP, 8);
+    cg.RET();
 }
 
-void Patcher::WriteSvcTrampoline(ModuleDestLabel module_dest, u32 svc_id) {
+void Patcher::WriteSvcTrampoline(ModuleDestLabel module_dest, u32 svc_id, oaknut::VectorCodeGenerator& cg, oaknut::Label& save_ctx, oaknut::Label& load_ctx) {
+    // Determine if we're writing to the pre-patch buffer
+    const bool is_pre = (&cg == &c_pre);
+
     // We are about to start saving state, so we need to lock the context.
-    this->LockContext();
+    this->LockContext(cg);
 
     // Store guest X30 to the stack. Then, save the context and restore the stack.
     // This will save all registers except PC, but we know PC at patch time.
-    c.STR(X30, SP, PRE_INDEXED, -16);
-    c.BL(m_save_context);
-    c.LDR(X30, SP, POST_INDEXED, 16);
+    cg.STR(X30, SP, PRE_INDEXED, -16);
+    cg.BL(save_ctx);
+    cg.LDR(X30, SP, POST_INDEXED, 16);
 
     // Now that we've saved all registers, we can use any registers as scratch.
     // Store PC + 4 to arm interface, since we know the instruction offset from the entry point.
     oaknut::Label pc_after_svc;
-    c.MRS(X1, oaknut::SystemReg::TPIDR_EL0);
-    c.LDR(X1, X1, offsetof(NativeExecutionParameters, native_context));
-    c.LDR(X2, pc_after_svc);
-    c.STR(X2, X1, offsetof(GuestContext, pc));
+    cg.MRS(X1, oaknut::SystemReg::TPIDR_EL0);
+    cg.LDR(X1, X1, offsetof(NativeExecutionParameters, native_context));
+    cg.LDR(X2, pc_after_svc);
+    cg.STR(X2, X1, offsetof(GuestContext, pc));
 
     // Store SVC number to execute when we return
-    c.MOV(X2, svc_id);
-    c.STR(W2, X1, offsetof(GuestContext, svc));
+    cg.MOV(X2, svc_id);
+    cg.STR(W2, X1, offsetof(GuestContext, svc));
 
     // We are calling a SVC. Clear esr_el1 and return it.
     static_assert(std::is_same_v<std::underlying_type_t<HaltReason>, u64>);
     oaknut::Label retry;
-    c.ADD(X2, X1, offsetof(GuestContext, esr_el1));
-    c.l(retry);
-    c.LDAXR(X0, X2);
-    c.STLXR(W3, XZR, X2);
-    c.CBNZ(W3, retry);
+    cg.ADD(X2, X1, offsetof(GuestContext, esr_el1));
+    cg.l(retry);
+    cg.LDAXR(X0, X2);
+    cg.STLXR(W3, XZR, X2);
+    cg.CBNZ(W3, retry);
 
     // Add "calling SVC" flag. Since this is X0, this is now our return value.
-    c.ORR(X0, X0, static_cast<u64>(HaltReason::SupervisorCall));
+    cg.ORR(X0, X0, static_cast<u64>(HaltReason::SupervisorCall));
 
     // Offset the GuestContext pointer to the HostContext member.
     // STP has limited range of [-512, 504] which we can't reach otherwise
     // NB: Due to this all offsets below are from the start of HostContext.
-    c.ADD(X1, X1, offsetof(GuestContext, host_ctx));
+    cg.ADD(X1, X1, offsetof(GuestContext, host_ctx));
 
     // Reload host TPIDR_EL0 and SP.
     static_assert(offsetof(HostContext, host_sp) + 8 == offsetof(HostContext, host_tpidr_el0));
-    c.LDP(X2, X3, X1, offsetof(HostContext, host_sp));
-    c.MOV(SP, X2);
-    c.MSR(oaknut::SystemReg::TPIDR_EL0, X3);
+    cg.LDP(X2, X3, X1, offsetof(HostContext, host_sp));
+    cg.MOV(SP, X2);
+    cg.MSR(oaknut::SystemReg::TPIDR_EL0, X3);
 
     // Load callee-saved host registers and return to host.
     static constexpr size_t HOST_REGS_OFF = offsetof(HostContext, host_saved_regs);
     static constexpr size_t HOST_VREGS_OFF = offsetof(HostContext, host_saved_vregs);
-    c.LDP(X19, X20, X1, HOST_REGS_OFF);
-    c.LDP(X21, X22, X1, HOST_REGS_OFF + 2 * sizeof(u64));
-    c.LDP(X23, X24, X1, HOST_REGS_OFF + 4 * sizeof(u64));
-    c.LDP(X25, X26, X1, HOST_REGS_OFF + 6 * sizeof(u64));
-    c.LDP(X27, X28, X1, HOST_REGS_OFF + 8 * sizeof(u64));
-    c.LDP(X29, X30, X1, HOST_REGS_OFF + 10 * sizeof(u64));
-    c.LDP(Q8, Q9, X1, HOST_VREGS_OFF);
-    c.LDP(Q10, Q11, X1, HOST_VREGS_OFF + 2 * sizeof(u128));
-    c.LDP(Q12, Q13, X1, HOST_VREGS_OFF + 4 * sizeof(u128));
-    c.LDP(Q14, Q15, X1, HOST_VREGS_OFF + 6 * sizeof(u128));
-    c.RET();
+    cg.LDP(X19, X20, X1, HOST_REGS_OFF);
+    cg.LDP(X21, X22, X1, HOST_REGS_OFF + 2 * sizeof(u64));
+    cg.LDP(X23, X24, X1, HOST_REGS_OFF + 4 * sizeof(u64));
+    cg.LDP(X25, X26, X1, HOST_REGS_OFF + 6 * sizeof(u64));
+    cg.LDP(X27, X28, X1, HOST_REGS_OFF + 8 * sizeof(u64));
+    cg.LDP(X29, X30, X1, HOST_REGS_OFF + 10 * sizeof(u64));
+    cg.LDP(Q8, Q9, X1, HOST_VREGS_OFF);
+    cg.LDP(Q10, Q11, X1, HOST_VREGS_OFF + 2 * sizeof(u128));
+    cg.LDP(Q12, Q13, X1, HOST_VREGS_OFF + 4 * sizeof(u128));
+    cg.LDP(Q14, Q15, X1, HOST_VREGS_OFF + 6 * sizeof(u128));
+    cg.RET();
 
     // Write the post-SVC trampoline address, which will jump back to the guest after restoring its
     // state.
-    curr_patch->m_trampolines.push_back({c.offset(), module_dest});
+    if (is_pre) {
+        curr_patch->m_trampolines_pre.push_back({cg.offset(), module_dest});
+    } else {
+        curr_patch->m_trampolines.push_back({cg.offset(), module_dest});
+    }
 
     // Host called this location. Save the return address so we can
     // unwind the stack properly when jumping back.
-    c.MRS(X2, oaknut::SystemReg::TPIDR_EL0);
-    c.LDR(X2, X2, offsetof(NativeExecutionParameters, native_context));
-    c.ADD(X0, X2, offsetof(GuestContext, host_ctx));
-    c.STR(X30, X0, offsetof(HostContext, host_saved_regs) + 11 * sizeof(u64));
+    cg.MRS(X2, oaknut::SystemReg::TPIDR_EL0);
+    cg.LDR(X2, X2, offsetof(NativeExecutionParameters, native_context));
+    cg.ADD(X0, X2, offsetof(GuestContext, host_ctx));
+    cg.STR(X30, X0, offsetof(HostContext, host_saved_regs) + 11 * sizeof(u64));
 
     // Reload all guest registers except X30 and PC.
     // The function also expects 16 bytes of stack already allocated.
-    c.STR(X30, SP, PRE_INDEXED, -16);
-    c.BL(m_load_context);
-    c.LDR(X30, SP, POST_INDEXED, 16);
+    cg.STR(X30, SP, PRE_INDEXED, -16);
+    cg.BL(load_ctx);
+    cg.LDR(X30, SP, POST_INDEXED, 16);
 
     // Use X1 as a scratch register to restore X30.
-    c.STR(X1, SP, PRE_INDEXED, -16);
-    c.MRS(X1, oaknut::SystemReg::TPIDR_EL0);
-    c.LDR(X1, X1, offsetof(NativeExecutionParameters, native_context));
-    c.LDR(X30, X1, offsetof(GuestContext, cpu_registers) + sizeof(u64) * 30);
-    c.LDR(X1, SP, POST_INDEXED, 16);
+    cg.STR(X1, SP, PRE_INDEXED, -16);
+    cg.MRS(X1, oaknut::SystemReg::TPIDR_EL0);
+    cg.LDR(X1, X1, offsetof(NativeExecutionParameters, native_context));
+    cg.LDR(X30, X1, offsetof(GuestContext, cpu_registers) + sizeof(u64) * 30);
+    cg.LDR(X1, SP, POST_INDEXED, 16);
 
     // Unlock the context.
-    this->UnlockContext();
+    this->UnlockContext(cg);
 
     // Jump back to the instruction after the emulated SVC.
-    this->BranchToModule(module_dest);
+    if (&cg == &c_pre)
+        this->BranchToModulePre(module_dest);
+    else
+        this->BranchToModule(module_dest);
 
     // Store PC after call.
-    c.l(pc_after_svc);
-    this->WriteModulePc(module_dest);
+    cg.l(pc_after_svc);
+    if (&cg == &c_pre)
+        this->WriteModulePcPre(module_dest);
+    else
+        this->WriteModulePc(module_dest);
 }
 
 void Patcher::WriteMrsHandler(ModuleDestLabel module_dest, oaknut::XReg dest_reg,
-                              oaknut::SystemReg src_reg) {
+                              oaknut::SystemReg src_reg, oaknut::VectorCodeGenerator& cg) {
     // Retrieve emulated TLS register from GuestContext.
-    c.MRS(dest_reg, oaknut::SystemReg::TPIDR_EL0);
+    cg.MRS(dest_reg, oaknut::SystemReg::TPIDR_EL0);
     if (src_reg == oaknut::SystemReg::TPIDRRO_EL0) {
-        c.LDR(dest_reg, dest_reg, offsetof(NativeExecutionParameters, tpidrro_el0));
+        cg.LDR(dest_reg, dest_reg, offsetof(NativeExecutionParameters, tpidrro_el0));
     } else {
-        c.LDR(dest_reg, dest_reg, offsetof(NativeExecutionParameters, tpidr_el0));
+        cg.LDR(dest_reg, dest_reg, offsetof(NativeExecutionParameters, tpidr_el0));
     }
 
     // Jump back to the instruction after the emulated MRS.
-    this->BranchToModule(module_dest);
+    if (&cg == &c_pre)
+        this->BranchToModulePre(module_dest);
+    else
+        this->BranchToModule(module_dest);
 }
 
-void Patcher::WriteMsrHandler(ModuleDestLabel module_dest, oaknut::XReg src_reg) {
+void Patcher::WriteMsrHandler(ModuleDestLabel module_dest, oaknut::XReg src_reg, oaknut::VectorCodeGenerator& cg) {
     const auto scratch_reg = src_reg.index() == 0 ? X1 : X0;
-    c.STR(scratch_reg, SP, PRE_INDEXED, -16);
+    cg.STR(scratch_reg, SP, PRE_INDEXED, -16);
 
     // Save guest value to NativeExecutionParameters::tpidr_el0.
-    c.MRS(scratch_reg, oaknut::SystemReg::TPIDR_EL0);
-    c.STR(src_reg, scratch_reg, offsetof(NativeExecutionParameters, tpidr_el0));
+    cg.MRS(scratch_reg, oaknut::SystemReg::TPIDR_EL0);
+    cg.STR(src_reg, scratch_reg, offsetof(NativeExecutionParameters, tpidr_el0));
 
     // Restore scratch register.
-    c.LDR(scratch_reg, SP, POST_INDEXED, 16);
+    cg.LDR(scratch_reg, SP, POST_INDEXED, 16);
 
     // Jump back to the instruction after the emulated MSR.
-    this->BranchToModule(module_dest);
+    if (&cg == &c_pre)
+        this->BranchToModulePre(module_dest);
+    else
+        this->BranchToModule(module_dest);
 }
 
-void Patcher::WriteCntpctHandler(ModuleDestLabel module_dest, oaknut::XReg dest_reg) {
+void Patcher::WriteCntpctHandler(ModuleDestLabel module_dest, oaknut::XReg dest_reg, oaknut::VectorCodeGenerator& cg) {
     static Common::Arm64::NativeClock clock{};
     const auto factor = clock.GetGuestCNTFRQFactor();
     const auto raw_factor = std::bit_cast<std::array<u64, 2>>(factor);
@@ -446,80 +616,83 @@ void Patcher::WriteCntpctHandler(ModuleDestLabel module_dest, oaknut::XReg dest_
     oaknut::Label factorhi;
 
     // Save scratches.
-    c.STP(scratch0, scratch1, SP, PRE_INDEXED, -16);
+    cg.STP(scratch0, scratch1, SP, PRE_INDEXED, -16);
 
     // Load counter value.
-    c.MRS(dest_reg, oaknut::SystemReg::CNTVCT_EL0);
+    cg.MRS(dest_reg, oaknut::SystemReg::CNTVCT_EL0);
 
     // Load scaling factor.
-    c.LDR(scratch0, factorlo);
-    c.LDR(scratch1, factorhi);
+    cg.LDR(scratch0, factorlo);
+    cg.LDR(scratch1, factorhi);
 
     // Multiply low bits and get result.
-    c.UMULH(scratch0, dest_reg, scratch0);
+    cg.UMULH(scratch0, dest_reg, scratch0);
 
     // Multiply high bits and add low bit result.
-    c.MADD(dest_reg, dest_reg, scratch1, scratch0);
+    cg.MADD(dest_reg, dest_reg, scratch1, scratch0);
 
     // Reload scratches.
-    c.LDP(scratch0, scratch1, SP, POST_INDEXED, 16);
+    cg.LDP(scratch0, scratch1, SP, POST_INDEXED, 16);
 
     // Jump back to the instruction after the emulated MRS.
-    this->BranchToModule(module_dest);
+    if (&cg == &c_pre)
+        this->BranchToModulePre(module_dest);
+    else
+        this->BranchToModule(module_dest);
 
     // Scaling factor constant values.
-    c.l(factorlo);
-    c.dx(raw_factor[0]);
-    c.l(factorhi);
-    c.dx(raw_factor[1]);
+    cg.l(factorlo);
+    cg.dx(raw_factor[0]);
+    cg.l(factorhi);
+    cg.dx(raw_factor[1]);
 }
 
-void Patcher::LockContext() {
+void Patcher::LockContext(oaknut::VectorCodeGenerator& cg) {
     oaknut::Label retry;
 
     // Save scratches.
-    c.STP(X0, X1, SP, PRE_INDEXED, -16);
+    cg.STP(X0, X1, SP, PRE_INDEXED, -16);
 
     // Reload lock pointer.
-    c.l(retry);
-    c.CLREX();
-    c.MRS(X0, oaknut::SystemReg::TPIDR_EL0);
-    c.ADD(X0, X0, offsetof(NativeExecutionParameters, lock));
+    cg.l(retry);
+    cg.CLREX();
+    cg.MRS(X0, oaknut::SystemReg::TPIDR_EL0);
+    cg.ADD(X0, X0, offsetof(NativeExecutionParameters, lock));
 
     static_assert(SpinLockLocked == 0);
 
     // Load-linked with acquire ordering.
-    c.LDAXR(W1, X0);
+    cg.LDAXR(W1, X0);
 
     // If the value was SpinLockLocked, clear monitor and retry.
-    c.CBZ(W1, retry);
+    cg.CBZ(W1, retry);
 
     // Store-conditional SpinLockLocked with relaxed ordering.
-    c.STXR(W1, WZR, X0);
+    cg.STXR(W1, WZR, X0);
 
     // If we failed to store, retry.
-    c.CBNZ(W1, retry);
+    cg.CBNZ(W1, retry);
 
     // We succeeded! Reload scratches.
-    c.LDP(X0, X1, SP, POST_INDEXED, 16);
+    cg.LDP(X0, X1, SP, POST_INDEXED, 16);
 }
 
-void Patcher::UnlockContext() {
+void Patcher::UnlockContext(oaknut::VectorCodeGenerator& cg) {
     // Save scratches.
-    c.STP(X0, X1, SP, PRE_INDEXED, -16);
+    cg.STP(X0, X1, SP, PRE_INDEXED, -16);
 
     // Load lock pointer.
-    c.MRS(X0, oaknut::SystemReg::TPIDR_EL0);
-    c.ADD(X0, X0, offsetof(NativeExecutionParameters, lock));
+    cg.MRS(X0, oaknut::SystemReg::TPIDR_EL0);
+    cg.ADD(X0, X0, offsetof(NativeExecutionParameters, lock));
 
     // Load SpinLockUnlocked.
-    c.MOV(W1, SpinLockUnlocked);
+    cg.MOV(W1, SpinLockUnlocked);
 
     // Store value with release ordering.
-    c.STLR(W1, X0);
+    cg.STLR(W1, X0);
 
     // Load scratches.
-    c.LDP(X0, X1, SP, POST_INDEXED, 16);
+    cg.LDP(X0, X1, SP, POST_INDEXED, 16);
 }
 
 } // namespace Core::NCE
diff --git a/src/core/arm/nce/patcher.h b/src/core/arm/nce/patcher.h
index 31b122477f..f2b11fcda0 100644
--- a/src/core/arm/nce/patcher.h
+++ b/src/core/arm/nce/patcher.h
@@ -1,4 +1,4 @@
-// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
+// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
 // SPDX-License-Identifier: GPL-3.0-or-later
 
 #pragma once
@@ -41,6 +41,7 @@ enum class PatchMode : u32 {
     None,
     PreText,  ///< Patch section is inserted before .text
     PostData, ///< Patch section is inserted after .data
+    Split,    ///< Patch sections are inserted before .text and after .data
 };
 
 using ModuleTextAddress = u64;
@@ -63,6 +64,7 @@ public:
     bool RelocateAndCopy(Common::ProcessAddress load_base, const Kernel::CodeSet::Segment& code,
                          Kernel::PhysicalMemory& program_image, EntryTrampolines* out_trampolines);
     size_t GetSectionSize() const noexcept;
+    size_t GetPreSectionSize() const noexcept;
 
     [[nodiscard]] PatchMode GetPatchMode() const noexcept {
         return mode;
@@ -76,15 +78,25 @@ private:
         uintptr_t module_offset;
     };
 
-    void WriteLoadContext();
-    void WriteSaveContext();
-    void LockContext();
-    void UnlockContext();
-    void WriteSvcTrampoline(ModuleDestLabel module_dest, u32 svc_id);
-    void WriteMrsHandler(ModuleDestLabel module_dest, oaknut::XReg dest_reg,
-                         oaknut::SystemReg src_reg);
-    void WriteMsrHandler(ModuleDestLabel module_dest, oaknut::XReg src_reg);
-    void WriteCntpctHandler(ModuleDestLabel module_dest, oaknut::XReg dest_reg);
+    // Core implementations with explicit code generator
+    void WriteLoadContext(oaknut::VectorCodeGenerator& code);
+    void WriteSaveContext(oaknut::VectorCodeGenerator& code);
+    void LockContext(oaknut::VectorCodeGenerator& code);
+    void UnlockContext(oaknut::VectorCodeGenerator& code);
+    void WriteSvcTrampoline(ModuleDestLabel module_dest, u32 svc_id, oaknut::VectorCodeGenerator& code, oaknut::Label& save_ctx, oaknut::Label& load_ctx);
+    void WriteMrsHandler(ModuleDestLabel module_dest, oaknut::XReg dest_reg, oaknut::SystemReg src_reg, oaknut::VectorCodeGenerator& code);
+    void WriteMsrHandler(ModuleDestLabel module_dest, oaknut::XReg src_reg, oaknut::VectorCodeGenerator& code);
+    void WriteCntpctHandler(ModuleDestLabel module_dest, oaknut::XReg dest_reg, oaknut::VectorCodeGenerator& code);
+
+    // Convenience wrappers using default code generator
+    void WriteLoadContext() { WriteLoadContext(c); }
+    void WriteSaveContext() { WriteSaveContext(c); }
+    void LockContext() { LockContext(c); }
+    void UnlockContext() { UnlockContext(c); }
+    void WriteSvcTrampoline(ModuleDestLabel module_dest, u32 svc_id) { WriteSvcTrampoline(module_dest, svc_id, c, m_save_context, m_load_context); }
+    void WriteMrsHandler(ModuleDestLabel module_dest, oaknut::XReg dest_reg, oaknut::SystemReg src_reg) { WriteMrsHandler(module_dest, dest_reg, src_reg, c); }
+    void WriteMsrHandler(ModuleDestLabel module_dest, oaknut::XReg src_reg) { WriteMsrHandler(module_dest, src_reg, c); }
+    void WriteCntpctHandler(ModuleDestLabel module_dest, oaknut::XReg dest_reg) { WriteCntpctHandler(module_dest, dest_reg, c); }
 
 private:
     static constexpr size_t CACHE_SIZE = 16384;  // Cache size for patch entries
@@ -111,19 +123,34 @@ private:
         }
     }
 
+    void BranchToPatchPre(uintptr_t module_dest) {
+         curr_patch->m_branch_to_pre_patch_relocations.push_back({c_pre.offset(), module_dest});
+    }
+
     void BranchToModule(uintptr_t module_dest) {
         curr_patch->m_branch_to_module_relocations.push_back({c.offset(), module_dest});
         c.dw(0);
     }
 
+    void BranchToModulePre(uintptr_t module_dest) {
+        curr_patch->m_branch_to_module_relocations_pre.push_back({c_pre.offset(), module_dest});
+        c_pre.dw(0);
+    }
+
     void WriteModulePc(uintptr_t module_dest) {
         curr_patch->m_write_module_pc_relocations.push_back({c.offset(), module_dest});
         c.dx(0);
     }
 
+    void WriteModulePcPre(uintptr_t module_dest) {
+        curr_patch->m_write_module_pc_relocations_pre.push_back({c_pre.offset(), module_dest});
+        c_pre.dx(0);
+    }
+
 private:
     // List of patch instructions we have generated.
     std::vector<u32> m_patch_instructions{};
+    std::vector<u32> m_patch_instructions_pre{};
 
     // Relocation type for relative branch from module to patch.
     struct Relocation {
@@ -133,15 +160,22 @@ private:
 
     struct ModulePatch {
         std::vector<Trampoline> m_trampolines;
+        std::vector<Trampoline> m_trampolines_pre;
         std::vector<Relocation> m_branch_to_patch_relocations{};
+        std::vector<Relocation> m_branch_to_pre_patch_relocations{};
         std::vector<Relocation> m_branch_to_module_relocations{};
+        std::vector<Relocation> m_branch_to_module_relocations_pre{};
         std::vector<Relocation> m_write_module_pc_relocations{};
+        std::vector<Relocation> m_write_module_pc_relocations_pre{};
         std::vector<ModuleTextAddress> m_exclusives{};
     };
 
     oaknut::VectorCodeGenerator c;
+    oaknut::VectorCodeGenerator c_pre;
     oaknut::Label m_save_context{};
     oaknut::Label m_load_context{};
+    oaknut::Label m_save_context_pre{};
+    oaknut::Label m_load_context_pre{};
     PatchMode mode{PatchMode::None};
     size_t total_program_size{};
     size_t m_relocate_module_index{};
diff --git a/src/core/hle/kernel/code_set.h b/src/core/hle/kernel/code_set.h
index 4d2d0098e7..1416fc52b1 100644
--- a/src/core/hle/kernel/code_set.h
+++ b/src/core/hle/kernel/code_set.h
@@ -1,3 +1,6 @@
+// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
+// SPDX-License-Identifier: GPL-3.0-or-later
+
 // SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
 // SPDX-License-Identifier: GPL-2.0-or-later
 
@@ -83,6 +86,14 @@ struct CodeSet final {
     const Segment& PatchSegment() const {
         return patch_segment;
     }
+
+    Segment& PostPatchSegment() {
+        return post_patch_segment;
+    }
+
+    const Segment& PostPatchSegment() const {
+        return post_patch_segment;
+    }
 #endif
 
     /// The overall data that backs this code set.
@@ -93,6 +104,7 @@ struct CodeSet final {
 
 #ifdef HAS_NCE
     Segment patch_segment;
+    Segment post_patch_segment;
 #endif
 
     /// The entry point address for this code set.
diff --git a/src/core/hle/kernel/k_process.cpp b/src/core/hle/kernel/k_process.cpp
index 082049f957..bd50f74c27 100644
--- a/src/core/hle/kernel/k_process.cpp
+++ b/src/core/hle/kernel/k_process.cpp
@@ -1,4 +1,4 @@
-// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
+// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
 // SPDX-License-Identifier: GPL-3.0-or-later
 
 // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
@@ -1258,6 +1258,7 @@ void KProcess::LoadModule(CodeSet code_set, KProcessAddress base_addr) {
 
 #ifdef HAS_NCE
     const auto& patch = code_set.PatchSegment();
+    const auto& post_patch = code_set.PostPatchSegment();
     if (this->IsApplication() && Settings::IsNceEnabled() && patch.size != 0) {
         auto& buffer = m_kernel.System().DeviceMemory().buffer;
         const auto& code = code_set.CodeSegment();
@@ -1265,7 +1266,15 @@ void KProcess::LoadModule(CodeSet code_set, KProcessAddress base_addr) {
                        Common::MemoryPermission::Read | Common::MemoryPermission::Execute);
         buffer.Protect(GetInteger(base_addr + patch.addr), patch.size,
                        Common::MemoryPermission::Read | Common::MemoryPermission::Execute);
+        // Protect post-patch segment if it exists like abve
+        if (post_patch.size != 0) {
+            buffer.Protect(GetInteger(base_addr + post_patch.addr), post_patch.size,
+                           Common::MemoryPermission::Read | Common::MemoryPermission::Execute);
+        }
         ReprotectSegment(code_set.PatchSegment(), Svc::MemoryPermission::None);
+        if (post_patch.size != 0) {
+            ReprotectSegment(code_set.PostPatchSegment(), Svc::MemoryPermission::None);
+        }
     }
 #endif
 }
diff --git a/src/core/loader/nso.cpp b/src/core/loader/nso.cpp
index e3e3f83ca2..458df110b6 100644
--- a/src/core/loader/nso.cpp
+++ b/src/core/loader/nso.cpp
@@ -1,4 +1,4 @@
-// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
+// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
 // SPDX-License-Identifier: GPL-3.0-or-later
 
 // SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
@@ -102,6 +102,8 @@ std::optional<VAddr> AppLoader_NSO::LoadModule(Kernel::KProcess& process, Core::
             auto* patch = &patches->operator[](patch_index);
             if (patch->GetPatchMode() == Core::NCE::PatchMode::PreText) {
                 return patch->GetSectionSize();
+            } else if (patch->GetPatchMode() == Core::NCE::PatchMode::Split) {
+                return patch->GetPreSectionSize();
             }
         }
 #endif
@@ -178,12 +180,26 @@ std::optional<VAddr> AppLoader_NSO::LoadModule(Kernel::KProcess& process, Core::
         }
     } else if (patch) {
         // Relocate code patch and copy to the program_image.
+        // Save size before RelocateAndCopy (which may resize)
+        const size_t size_before_relocate = program_image.size();
         if (patch->RelocateAndCopy(load_base, code, program_image, &process.GetPostHandlers())) {
             // Update patch section.
             auto& patch_segment = codeset.PatchSegment();
-            patch_segment.addr =
-                patch->GetPatchMode() == Core::NCE::PatchMode::PreText ? 0 : image_size;
-            patch_segment.size = static_cast<u32>(patch->GetSectionSize());
+            auto& post_patch_segment = codeset.PostPatchSegment();
+            const auto patch_mode = patch->GetPatchMode();
+            if (patch_mode == Core::NCE::PatchMode::PreText) {
+                patch_segment.addr = 0;
+                patch_segment.size = static_cast<u32>(patch->GetSectionSize());
+            } else if (patch_mode == Core::NCE::PatchMode::Split) {
+                // For Split-mode, we are using pre-patch buffer at start, post-patch buffer at end
+                patch_segment.addr = 0;
+                patch_segment.size = static_cast<u32>(patch->GetPreSectionSize());
+                post_patch_segment.addr = size_before_relocate;
+                post_patch_segment.size = static_cast<u32>(patch->GetSectionSize());
+            } else {
+                patch_segment.addr = image_size;
+                patch_segment.size = static_cast<u32>(patch->GetSectionSize());
+            }
         }
 
         // Refresh image_size to take account the patch section if it was added by RelocateAndCopy
@@ -193,6 +209,18 @@ std::optional<VAddr> AppLoader_NSO::LoadModule(Kernel::KProcess& process, Core::
 
     // If we aren't actually loading (i.e. just computing the process code layout), we are done
     if (!load_into_process) {
+#ifdef HAS_NCE
+        // Ok, so for Split mode, we need to account for pre-patch and post-patch space
+        // which will be added during RelocateAndCopy in the second pass. Where it crashed
+        // in Android Studio at PreText. May be a better way. Works for now.
+        if (patch && patch->GetPatchMode() == Core::NCE::PatchMode::Split) {
+            return load_base + patch->GetPreSectionSize() + image_size + patch->GetSectionSize();
+        } else if (patch && patch->GetPatchMode() == Core::NCE::PatchMode::PreText) {
+            return load_base + patch->GetSectionSize() + image_size;
+        } else if (patch && patch->GetPatchMode() == Core::NCE::PatchMode::PostData) {
+            return load_base + image_size + patch->GetSectionSize();
+        }
+#endif
         return load_base + image_size;
     }