diff --git a/src/core/arm/arm_interface.h b/src/core/arm/arm_interface.h index 495963eefd..e6aebe002b 100644 --- a/src/core/arm/arm_interface.h +++ b/src/core/arm/arm_interface.h @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + // SPDX-FileCopyrightText: 2014 Citra Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later @@ -31,6 +34,7 @@ using WatchpointArray = std::array args) const = 0; virtual void SetSvcArguments(std::span args) = 0; virtual u32 GetSvcNumber() const = 0; + virtual bool HandleCacheOperation(Kernel::KThread* thread) { + return false; + } void SetWatchpointArray(const WatchpointArray* watchpoints) { m_watchpoints = watchpoints; diff --git a/src/core/arm/dynarmic/arm_dynarmic.h b/src/core/arm/dynarmic/arm_dynarmic.h index 46384f7e6d..79125bb2dc 100644 --- a/src/core/arm/dynarmic/arm_dynarmic.h +++ b/src/core/arm/dynarmic/arm_dynarmic.h @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project @@ -11,6 +11,7 @@ namespace Core { constexpr Dynarmic::HaltReason StepThread = Dynarmic::HaltReason::Step; +constexpr Dynarmic::HaltReason CacheInvalidation = Dynarmic::HaltReason::CacheInvalidation; constexpr Dynarmic::HaltReason DataAbort = Dynarmic::HaltReason::MemoryAbort; constexpr Dynarmic::HaltReason BreakLoop = Dynarmic::HaltReason::UserDefined2; constexpr Dynarmic::HaltReason SupervisorCall = Dynarmic::HaltReason::UserDefined3; @@ -19,6 +20,7 @@ constexpr Dynarmic::HaltReason PrefetchAbort = Dynarmic::HaltReason::UserDefined constexpr HaltReason TranslateHaltReason(Dynarmic::HaltReason hr) { static_assert(u64(HaltReason::StepThread) == u64(StepThread)); + static_assert(u64(HaltReason::CacheInvalidation) == u64(CacheInvalidation)); static_assert(u64(HaltReason::DataAbort) == u64(DataAbort)); static_assert(u64(HaltReason::BreakLoop) == u64(BreakLoop)); static_assert(u64(HaltReason::SupervisorCall) == u64(SupervisorCall)); diff --git a/src/core/arm/nce/arm_nce.cpp b/src/core/arm/nce/arm_nce.cpp index 3140a78be3..2a00729338 100644 --- a/src/core/arm/nce/arm_nce.cpp +++ b/src/core/arm/nce/arm_nce.cpp @@ -43,6 +43,7 @@ fpsimd_context* GetFloatingPointState(mcontext_t& host_ctx) { using namespace Common::Literals; constexpr u32 StackSize = 128_KiB; +constexpr u64 CacheLineSize = 64; constexpr u64 SplitPageAccessWindow = 64; constexpr size_t MaxPreciseAccessPages = 256; constexpr u8 MaxPreciseAccessPageWeight = 4; @@ -378,6 +379,41 @@ void ArmNce::SetSvcArguments(std::span args) { } } +bool ArmNce::HandleCacheOperation(Kernel::KThread* thread) { + const auto op = static_cast(m_guest_ctx.cache_operation); + if (op == CacheOperationKind::None) { + return false; + } + + const u64 cache_line_start = m_guest_ctx.cache_operation_address & ~(CacheLineSize - 1); + auto& memory = thread->GetOwnerProcess()->GetMemory(); + + switch (op) { + case CacheOperationKind::DataCacheInvalidate: { + [[maybe_unused]] auto invalidate_result = + memory.InvalidateDataCache(cache_line_start, CacheLineSize); + break; + } + case CacheOperationKind::DataCacheStore: { + [[maybe_unused]] auto store_result = memory.StoreDataCache(cache_line_start, CacheLineSize); + break; + } + case CacheOperationKind::DataCacheFlush: { + [[maybe_unused]] auto flush_result = memory.FlushDataCache(cache_line_start, CacheLineSize); + break; + } + case CacheOperationKind::InstructionCacheInvalidate: + InvalidateCacheRange(cache_line_start, CacheLineSize); + break; + case CacheOperationKind::None: + break; + } + + m_guest_ctx.cache_operation = static_cast(CacheOperationKind::None); + m_guest_ctx.cache_operation_address = 0; + return true; +} + ArmNce::ArmNce(System& system, bool uses_wall_clock, std::size_t core_index) : ArmInterface{uses_wall_clock}, m_system{system}, m_core_index{core_index} { m_guest_ctx.system = &m_system; diff --git a/src/core/arm/nce/arm_nce.h b/src/core/arm/nce/arm_nce.h index 48c82c8437..4c0ffb6517 100644 --- a/src/core/arm/nce/arm_nce.h +++ b/src/core/arm/nce/arm_nce.h @@ -41,6 +41,7 @@ public: void GetSvcArguments(std::span args) const override; void SetSvcArguments(std::span args) override; u32 GetSvcNumber() const override; + bool HandleCacheOperation(Kernel::KThread* thread) override; void SignalInterrupt(Kernel::KThread* thread) override; void ClearInstructionCache() override; diff --git a/src/core/arm/nce/guest_context.h b/src/core/arm/nce/guest_context.h index a7eadccce5..865e883f27 100644 --- a/src/core/arm/nce/guest_context.h +++ b/src/core/arm/nce/guest_context.h @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later @@ -38,6 +41,9 @@ struct GuestContext { u32 svc{}; System* system{}; ArmNce* parent{}; + u32 cache_operation{}; + u32 cache_operation_reserved{}; + u64 cache_operation_address{}; }; // Verify assembly offsets. diff --git a/src/core/arm/nce/patcher.cpp b/src/core/arm/nce/patcher.cpp index ea77166645..b72e0f4e51 100644 --- a/src/core/arm/nce/patcher.cpp +++ b/src/core/arm/nce/patcher.cpp @@ -26,6 +26,22 @@ using NativeExecutionParameters = Kernel::KThread::NativeExecutionParameters; constexpr size_t MaxRelativeBranch = 128_MiB; constexpr u32 ModuleCodeIndex = 0x24 / sizeof(u32); +[[nodiscard]] std::optional DecodeCacheOperation(u32 inst) { + switch (inst & ~u32{0x1F}) { + case 0xD5087620: + return CacheOperationKind::DataCacheInvalidate; + case 0xD50B7A20: + case 0xD50B7B20: + return CacheOperationKind::DataCacheStore; + case 0xD50B7E20: + return CacheOperationKind::DataCacheFlush; + case 0xD50B7520: + return CacheOperationKind::InstructionCacheInvalidate; + default: + return std::nullopt; + } +} + Patcher::Patcher() : c(m_patch_instructions), c_pre(m_patch_instructions_pre) { // The first word of the patch section is always a branch to the first instruction of the // module. @@ -160,6 +176,20 @@ bool Patcher::PatchText(std::span program_image, const Kernel::CodeSet continue; } + if (auto cache_op = DecodeCacheOperation(inst); cache_op.has_value()) { + bool pre_buffer = false; + auto ret = AddRelocations(pre_buffer); + const auto src_reg = oaknut::XReg{static_cast(inst & 0x1F)}; + if (pre_buffer) { + WriteCacheOperationTrampoline(ret, *cache_op, src_reg, c_pre, m_save_context_pre, + m_load_context_pre); + } else { + WriteCacheOperationTrampoline(ret, *cache_op, src_reg, c, m_save_context, + m_load_context); + } + continue; + } + if (auto exclusive = Exclusive{inst}; exclusive.Verify()) { curr_patch->m_exclusives.push_back(i); } @@ -542,6 +572,96 @@ void Patcher::WriteSvcTrampoline(ModuleDestLabel module_dest, u32 svc_id, oaknut this->WriteModulePc(module_dest); } +void Patcher::WriteCacheOperationTrampoline(ModuleDestLabel module_dest, + CacheOperationKind op_kind, oaknut::XReg src_reg, + oaknut::VectorCodeGenerator& cg, + oaknut::Label& save_ctx, + oaknut::Label& load_ctx) { + const bool is_pre = (&cg == &c_pre); + + this->LockContext(cg); + + cg.STR(X30, SP, PRE_INDEXED, -16); + cg.BL(save_ctx); + cg.LDR(X30, SP, POST_INDEXED, 16); + + oaknut::Label pc_after_cache_op; + cg.MRS(X1, oaknut::SystemReg::TPIDR_EL0); + cg.LDR(X1, X1, offsetof(NativeExecutionParameters, native_context)); + cg.LDR(X2, pc_after_cache_op); + cg.STR(X2, X1, offsetof(GuestContext, pc)); + + cg.MOV(X2, static_cast(op_kind)); + cg.STR(W2, X1, offsetof(GuestContext, cache_operation)); + cg.STR(src_reg, X1, offsetof(GuestContext, cache_operation_address)); + + static_assert(std::is_same_v, u64>); + oaknut::Label retry; + cg.ADD(X2, X1, offsetof(GuestContext, esr_el1)); + cg.l(retry); + cg.LDAXR(X0, X2); + cg.STLXR(W3, XZR, X2); + cg.CBNZ(W3, retry); + cg.ORR(X0, X0, static_cast(HaltReason::CacheInvalidation)); + + cg.ADD(X1, X1, offsetof(GuestContext, host_ctx)); + + static_assert(offsetof(HostContext, host_sp) + 8 == offsetof(HostContext, host_tpidr_el0)); + cg.LDP(X2, X3, X1, offsetof(HostContext, host_sp)); + cg.MOV(SP, X2); + cg.MSR(oaknut::SystemReg::TPIDR_EL0, X3); + + static constexpr size_t HOST_REGS_OFF = offsetof(HostContext, host_saved_regs); + static constexpr size_t HOST_VREGS_OFF = offsetof(HostContext, host_saved_vregs); + cg.LDP(X19, X20, X1, HOST_REGS_OFF); + cg.LDP(X21, X22, X1, HOST_REGS_OFF + 2 * sizeof(u64)); + cg.LDP(X23, X24, X1, HOST_REGS_OFF + 4 * sizeof(u64)); + cg.LDP(X25, X26, X1, HOST_REGS_OFF + 6 * sizeof(u64)); + cg.LDP(X27, X28, X1, HOST_REGS_OFF + 8 * sizeof(u64)); + cg.LDP(X29, X30, X1, HOST_REGS_OFF + 10 * sizeof(u64)); + cg.LDP(Q8, Q9, X1, HOST_VREGS_OFF); + cg.LDP(Q10, Q11, X1, HOST_VREGS_OFF + 2 * sizeof(u128)); + cg.LDP(Q12, Q13, X1, HOST_VREGS_OFF + 4 * sizeof(u128)); + cg.LDP(Q14, Q15, X1, HOST_VREGS_OFF + 6 * sizeof(u128)); + cg.RET(); + + if (is_pre) { + curr_patch->m_trampolines_pre.push_back({cg.offset(), module_dest}); + } else { + curr_patch->m_trampolines.push_back({cg.offset(), module_dest}); + } + + cg.MRS(X2, oaknut::SystemReg::TPIDR_EL0); + cg.LDR(X2, X2, offsetof(NativeExecutionParameters, native_context)); + cg.ADD(X0, X2, offsetof(GuestContext, host_ctx)); + cg.STR(X30, X0, offsetof(HostContext, host_saved_regs) + 11 * sizeof(u64)); + + cg.STR(X30, SP, PRE_INDEXED, -16); + cg.BL(load_ctx); + cg.LDR(X30, SP, POST_INDEXED, 16); + + cg.STR(X1, SP, PRE_INDEXED, -16); + cg.MRS(X1, oaknut::SystemReg::TPIDR_EL0); + cg.LDR(X1, X1, offsetof(NativeExecutionParameters, native_context)); + cg.LDR(X30, X1, offsetof(GuestContext, cpu_registers) + sizeof(u64) * 30); + cg.LDR(X1, SP, POST_INDEXED, 16); + + this->UnlockContext(cg); + + if (is_pre) { + this->BranchToModulePre(module_dest); + } else { + this->BranchToModule(module_dest); + } + + cg.l(pc_after_cache_op); + if (is_pre) { + this->WriteModulePcPre(module_dest); + } else { + this->WriteModulePc(module_dest); + } +} + void Patcher::WriteMrsHandler(ModuleDestLabel module_dest, oaknut::XReg dest_reg, oaknut::SystemReg src_reg, oaknut::VectorCodeGenerator& cg) { // Retrieve emulated TLS register from GuestContext. diff --git a/src/core/arm/nce/patcher.h b/src/core/arm/nce/patcher.h index 499c98c901..534e15119c 100644 --- a/src/core/arm/nce/patcher.h +++ b/src/core/arm/nce/patcher.h @@ -78,6 +78,11 @@ private: void LockContext(oaknut::VectorCodeGenerator& code); void UnlockContext(oaknut::VectorCodeGenerator& code); void WriteSvcTrampoline(ModuleDestLabel module_dest, u32 svc_id, oaknut::VectorCodeGenerator& code, oaknut::Label& save_ctx, oaknut::Label& load_ctx); + void WriteCacheOperationTrampoline(ModuleDestLabel module_dest, CacheOperationKind op_kind, + oaknut::XReg src_reg, + oaknut::VectorCodeGenerator& code, + oaknut::Label& save_ctx, + oaknut::Label& load_ctx); void WriteMrsHandler(ModuleDestLabel module_dest, oaknut::XReg dest_reg, oaknut::SystemReg src_reg, oaknut::VectorCodeGenerator& code); void WriteMsrHandler(ModuleDestLabel module_dest, oaknut::XReg src_reg, oaknut::VectorCodeGenerator& code); void WriteCntpctHandler(ModuleDestLabel module_dest, oaknut::XReg dest_reg, oaknut::VectorCodeGenerator& code); @@ -88,6 +93,11 @@ private: void LockContext() { LockContext(c); } void UnlockContext() { UnlockContext(c); } void WriteSvcTrampoline(ModuleDestLabel module_dest, u32 svc_id) { WriteSvcTrampoline(module_dest, svc_id, c, m_save_context, m_load_context); } + void WriteCacheOperationTrampoline(ModuleDestLabel module_dest, CacheOperationKind op_kind, + oaknut::XReg src_reg) { + WriteCacheOperationTrampoline(module_dest, op_kind, src_reg, c, m_save_context, + m_load_context); + } void WriteMrsHandler(ModuleDestLabel module_dest, oaknut::XReg dest_reg, oaknut::SystemReg src_reg) { WriteMrsHandler(module_dest, dest_reg, src_reg, c); } void WriteMsrHandler(ModuleDestLabel module_dest, oaknut::XReg src_reg) { WriteMsrHandler(module_dest, src_reg, c); } void WriteCntpctHandler(ModuleDestLabel module_dest, oaknut::XReg dest_reg) { WriteCntpctHandler(module_dest, dest_reg, c); } diff --git a/src/core/hle/kernel/physical_core.cpp b/src/core/hle/kernel/physical_core.cpp index 77cdab76d7..2078472e3d 100644 --- a/src/core/hle/kernel/physical_core.cpp +++ b/src/core/hle/kernel/physical_core.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later // SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project @@ -97,6 +97,7 @@ void PhysicalCore::RunThread(Kernel::KThread* thread) { } // Determine why we stopped. + const bool cache_invalidation = True(hr & Core::HaltReason::CacheInvalidation); const bool supervisor_call = True(hr & Core::HaltReason::SupervisorCall); const bool prefetch_abort = True(hr & Core::HaltReason::PrefetchAbort); const bool breakpoint = True(hr & Core::HaltReason::InstructionBreakpoint); @@ -151,6 +152,11 @@ void PhysicalCore::RunThread(Kernel::KThread* thread) { return; } + if (cache_invalidation) { + interface->HandleCacheOperation(thread); + continue; + } + // Handle external interrupt sources. if (interrupt || m_is_single_core) { return;