From 84cf3e8c84b785363fd89464ae9bb22fd3cf007e Mon Sep 17 00:00:00 2001 From: lizzie Date: Sat, 8 Nov 2025 00:42:10 +0100 Subject: [PATCH] [nce] remove software prefetching instances (#2857) May be a complete hit or miss on performance with NCE Signed-off-by: lizzie Reviewed-on: https://git.eden-emu.dev/eden-emu/eden/pulls/2857 Reviewed-by: crueter Reviewed-by: Caio Oliveira Co-authored-by: lizzie Co-committed-by: lizzie --- src/core/arm/nce/arm_nce.cpp | 22 ---------------- src/core/arm/nce/interpreter_visitor.cpp | 32 ------------------------ 2 files changed, 54 deletions(-) diff --git a/src/core/arm/nce/arm_nce.cpp b/src/core/arm/nce/arm_nce.cpp index 0e0d72fc8a..f0b61f8042 100644 --- a/src/core/arm/nce/arm_nce.cpp +++ b/src/core/arm/nce/arm_nce.cpp @@ -388,14 +388,6 @@ void ArmNce::SignalInterrupt(Kernel::KThread* thread) { const std::size_t CACHE_PAGE_SIZE = 4096; void ArmNce::ClearInstructionCache() { -#if defined(__GNUC__) || defined(__clang__) - void* start = (void*)((uintptr_t)__builtin_return_address(0) & ~(CACHE_PAGE_SIZE - 1)); - void* end = - (void*)((uintptr_t)start + CACHE_PAGE_SIZE * 2); // Clear two pages for better coverage - // Prefetch next likely pages - __builtin_prefetch((void*)((uintptr_t)end), 1, 3); - __builtin___clear_cache(static_cast(start), static_cast(end)); -#endif #ifdef __aarch64__ // Ensure all previous memory operations complete asm volatile("dmb ish" ::: "memory"); @@ -405,20 +397,6 @@ void ArmNce::ClearInstructionCache() { } void ArmNce::InvalidateCacheRange(u64 addr, std::size_t size) { - #if defined(__GNUC__) || defined(__clang__) - // Align the start address to cache line boundary for better performance - const size_t CACHE_LINE_SIZE = 64; - addr &= ~(CACHE_LINE_SIZE - 1); - - // Round up size to nearest cache line - size = (size + CACHE_LINE_SIZE - 1) & ~(CACHE_LINE_SIZE - 1); - - // Prefetch the range to be invalidated - for (size_t offset = 0; offset < size; offset += CACHE_LINE_SIZE) { - __builtin_prefetch((void*)(addr + offset), 1, 3); - } - #endif - this->ClearInstructionCache(); } diff --git a/src/core/arm/nce/interpreter_visitor.cpp b/src/core/arm/nce/interpreter_visitor.cpp index aa796ed929..78b78f8067 100644 --- a/src/core/arm/nce/interpreter_visitor.cpp +++ b/src/core/arm/nce/interpreter_visitor.cpp @@ -181,14 +181,6 @@ bool InterpreterVisitor::Ordered(size_t size, bool L, bool o0, Reg Rn, Reg Rt) { const size_t dbytes = datasize / 8; u64 address = (Rn == Reg::SP) ? this->GetSp() : this->GetReg(Rn); - - // Conservative prefetch for atomic ops - if (memop == MemOp::Load) { - __builtin_prefetch(reinterpret_cast(address), 0, 1); - } else { - __builtin_prefetch(reinterpret_cast(address), 1, 1); - } - switch (memop) { case MemOp::Store: { std::atomic_thread_fence(std::memory_order_seq_cst); @@ -435,21 +427,6 @@ bool InterpreterVisitor::RegisterImmediate(bool wback, bool postindex, size_t sc if (!postindex) address += offset; - // Optimized prefetch for loads - if (memop == MemOp::Load) { - const size_t access_size = datasize / 8; - const bool is_aligned = (address % access_size) == 0; - - if (is_aligned) { - __builtin_prefetch(reinterpret_cast(address), 0, 3); - if (access_size >= 8 && access_size <= 32) { - __builtin_prefetch(reinterpret_cast(address + PREFETCH_STRIDE), 0, 3); - } - } else { - __builtin_prefetch(reinterpret_cast(address), 0, 1); - } - } - switch (memop) { case MemOp::Store: { u64 data = this->GetReg(Rt); @@ -516,15 +493,6 @@ bool InterpreterVisitor::SIMDImmediate(bool wback, bool postindex, size_t scale, if (!postindex) address += offset; - // Aggressive prefetch for SIMD - if (memop == MemOp::Load) { - __builtin_prefetch(reinterpret_cast(address), 0, 3); - __builtin_prefetch(reinterpret_cast(address + CACHE_LINE_SIZE), 0, 3); - if (datasize >= SIMD_PREFETCH_THRESHOLD) { - __builtin_prefetch(reinterpret_cast(address + PREFETCH_STRIDE), 0, 3); - } - } - switch (memop) { case MemOp::Store: { u128 data = VectorGetElement(this->GetVec(Vt), datasize);