From 754883db977216f7cf3d1fdd290e397eed04bad6 Mon Sep 17 00:00:00 2001 From: lizzie Date: Sat, 29 Nov 2025 10:29:10 +0000 Subject: [PATCH] [core] pin core threads to logical CPUs 0-3 this basically allows the threads to exist in these logical CPUs, undisturbed, and without trashing each other's cache this could improve performance, very tricky thing to pull off correctly, but again, this is mostly an experiment will mainly benefit: Linux, Android, FreeBSD, Windows (not ARM) Additionally, this means no context trashing :) Signed-off-by: lizzie --- src/common/thread.cpp | 48 ++++++++++++++++++++++++++-------------- src/common/thread.h | 2 +- src/core/cpu_manager.cpp | 16 +++++--------- 3 files changed, 38 insertions(+), 28 deletions(-) diff --git a/src/common/thread.cpp b/src/common/thread.cpp index 482848b244..465ee64d11 100644 --- a/src/common/thread.cpp +++ b/src/common/thread.cpp @@ -5,9 +5,13 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include +#include +#include +#include #include "common/error.h" #include "common/logging/log.h" +#include "common/assert.h" #include "common/thread.h" #ifdef __APPLE__ #include @@ -28,7 +32,7 @@ #endif #ifdef __FreeBSD__ -#define cpu_set_t cpuset_t +# define cpu_set_t cpuset_t #endif namespace Common { @@ -77,22 +81,14 @@ void SetCurrentThreadPriority(ThreadPriority new_priority) { #endif } -#ifdef _MSC_VER - -// Sets the debugger-visible name of the current thread. void SetCurrentThreadName(const char* name) { - static auto pf = (decltype(&SetThreadDescription))(void*)GetProcAddress(GetModuleHandle(TEXT("KernelBase.dll")), "SetThreadDescription"); - if (pf) +#ifdef _MSC_VER + // Sets the debugger-visible name of the current thread. + if (auto pf = (decltype(&SetThreadDescription))(void*)GetProcAddress(GetModuleHandle(TEXT("KernelBase.dll")), "SetThreadDescription"); pf) pf(GetCurrentThread(), UTF8ToUTF16W(name).data()); // Windows 10+ -} - -#else // !MSVC_VER, so must be POSIX threads - -// MinGW with the POSIX threading model does not support pthread_setname_np -void SetCurrentThreadName(const char* name) { - // See for reference - // https://gitlab.freedesktop.org/mesa/mesa/-/blame/main/src/util/u_thread.c?ref_type=heads#L75 -#ifdef __APPLE__ + else + ; // No-op +#elif defined(__APPLE__) pthread_setname_np(name); #elif defined(__HAIKU__) rename_thread(find_thread(NULL), name); @@ -112,13 +108,33 @@ void SetCurrentThreadName(const char* name) { pthread_setname_np(pthread_self(), buf); } #elif defined(_WIN32) - // mingw stub + // MinGW with the POSIX threading model does not support pthread_setname_np + // See for reference + // https://gitlab.freedesktop.org/mesa/mesa/-/blame/main/src/util/u_thread.c?ref_type=heads#L75 (void)name; #else pthread_setname_np(pthread_self(), name); #endif } +void PinCurrentThreadToPerformanceCore(size_t core_id) { + ASSERT(core_id >= 0 && core_id < 4); + // If we set a flag for a CPU that doesn't exist, the thread may not be allowed to + // run in ANY processor! + auto const total_cores = std::thread::hardware_concurrency(); + if (core_id < total_cores) { +#if defined(__linux__) || defined(__FreeBSD__) + cpu_set_t set; + CPU_ZERO(&set); + CPU_SET(core_id, &set); + pthread_setaffinity_np(pthread_self(), sizeof(set), &set); +#elif defined(_WIN32) + DWORD set = 1UL << core_id; + SetThreadAffinityMask(GetCurrentThread(), set); +#else + // No pin functionality implemented #endif + } +} } // namespace Common diff --git a/src/common/thread.h b/src/common/thread.h index 5ab495baaa..82b1372e7e 100644 --- a/src/common/thread.h +++ b/src/common/thread.h @@ -106,7 +106,7 @@ enum class ThreadPriority : u32 { }; void SetCurrentThreadPriority(ThreadPriority new_priority); - void SetCurrentThreadName(const char* name); +void PinCurrentThreadToPerformanceCore(size_t core_id); } // namespace Common diff --git a/src/core/cpu_manager.cpp b/src/core/cpu_manager.cpp index 1a80fcfaa4..e7e4566f97 100644 --- a/src/core/cpu_manager.cpp +++ b/src/core/cpu_manager.cpp @@ -7,6 +7,7 @@ #include "common/fiber.h" #include "common/scope_exit.h" #include "common/thread.h" +#include "common/settings.h" #include "core/core.h" #include "core/core_timing.h" #include "core/cpu_manager.h" @@ -25,11 +26,8 @@ CpuManager::~CpuManager() = default; void CpuManager::Initialize() { num_cores = is_multicore ? Core::Hardware::NUM_CPU_CORES : 1; gpu_barrier = std::make_unique(num_cores + 1); - - for (std::size_t core = 0; core < num_cores; core++) { - core_data[core].host_thread = - std::jthread([this, core](std::stop_token token) { RunThread(token, core); }); - } + for (std::size_t core = 0; core < num_cores; core++) + core_data[core].host_thread = std::jthread([this, core](std::stop_token token) { RunThread(token, core); }); } void CpuManager::Shutdown() { @@ -188,14 +186,10 @@ void CpuManager::ShutdownThread() { void CpuManager::RunThread(std::stop_token token, std::size_t core) { /// Initialization system.RegisterCoreThread(core); - std::string name; - if (is_multicore) { - name = "CPUCore_" + std::to_string(core); - } else { - name = "CPUThread"; - } + std::string name = is_multicore ? ("CPUCore_" + std::to_string(core)) : std::string{"CPUThread"}; Common::SetCurrentThreadName(name.c_str()); Common::SetCurrentThreadPriority(Common::ThreadPriority::Critical); + Common::PinCurrentThreadToPerformanceCore(core); auto& data = core_data[core]; data.host_context = Common::Fiber::ThreadToFiber();