Browse Source

[core/hle/kernel] coalesce TLS from KernelCore to reduce query times (#3283)

- each time you reference TLS data the compiler generates calls to register _atexit() for them
- it also uses `mov %fs:%rax` or whatever, segmented moves are EXPENSIVE since they break pipeline
- occupies less TLS slots for windows :)

Signed-off-by: lizzie <lizzie@eden-emu.dev>

Reviewed-on: https://git.eden-emu.dev/eden-emu/eden/pulls/3283
Reviewed-by: DraVee <dravee@eden-emu.dev>
Reviewed-by: Maufeat <sahyno1996@gmail.com>
Co-authored-by: lizzie <lizzie@eden-emu.dev>
Co-committed-by: lizzie <lizzie@eden-emu.dev>
pull/3302/head
lizzie 5 days ago
committed by crueter
parent
commit
c21f92340b
No known key found for this signature in database GPG Key ID: 425ACD2D4830EBC6
  1. 161
      src/core/hle/kernel/kernel.cpp

161
src/core/hle/kernel/kernel.cpp

@ -50,13 +50,38 @@
namespace Kernel {
// Can only be used by a single implementation PER THREAD
struct ThreadLocalData {
std::optional<KThread> raw_thread;
KThread* current_thread = nullptr;
KThread* thread = nullptr;
u8 host_thread_id = UINT8_MAX;
bool is_phantom_mode_for_singlecore = false;
bool lock = false;
};
struct KernelCore::Impl {
static constexpr size_t ApplicationMemoryBlockSlabHeapSize = 20000;
static constexpr size_t SystemMemoryBlockSlabHeapSize = 10000;
static constexpr size_t BlockInfoSlabHeapSize = 4000;
static constexpr size_t ReservedDynamicPageCount = 64;
explicit Impl(Core::System& system_, KernelCore& kernel_) : system{system_} {}
// Be very careful when handling TLS data
// We do not want to concern ourselves with the appropriate way to manage them
// across **all** threads, we just need these for a few spare threads (+host/guest threads)
//
// Do not just read straight from here, use a reference beforehand, the cost of reading
// from TLS is greater than the cost of reading normal variables.
// But account that this Impl() is instanced once per program, and shared across threads
// so we can't use a reference for now.
//
// And we have the guarantee that the data won't move out of the way so we can safely
// take a reference to it. This isn't always universally true but this is "global" data
// so it will be statically given a TLS slot anyways.
static inline thread_local ThreadLocalData tls_data = {};
explicit Impl(Core::System& system_, KernelCore& kernel_) : system{system_} {
tls_data.lock = true;
}
void SetMulticore(bool is_multi) {
is_multicore = is_multi;
@ -69,8 +94,6 @@ struct KernelCore::Impl {
global_object_list_container = std::make_unique<KAutoObjectWithListContainer>(kernel);
global_scheduler_context = std::make_unique<Kernel::GlobalSchedulerContext>(kernel);
is_phantom_mode_for_singlecore = false;
// Derive the initial memory layout from the emulated board
Init::InitializeSlabResourceCounts(kernel);
DeriveInitialMemoryLayout();
@ -88,9 +111,7 @@ struct KernelCore::Impl {
{
const auto& pt_heap_region = memory_layout->GetPageTableHeapRegion();
ASSERT(pt_heap_region.GetEndAddress() != 0);
InitializeResourceManagers(kernel, pt_heap_region.GetAddress(),
pt_heap_region.GetSize());
InitializeResourceManagers(kernel, pt_heap_region.GetAddress(), pt_heap_region.GetSize());
}
InitializeHackSharedMemory(kernel);
@ -222,17 +243,11 @@ struct KernelCore::Impl {
const auto kernel_size{sizes.second};
// If setting the default system values fails, then something seriously wrong has occurred.
ASSERT(
system_resource_limit->SetLimitValue(LimitableResource::PhysicalMemoryMax, total_size)
.IsSuccess());
ASSERT(system_resource_limit->SetLimitValue(LimitableResource::ThreadCountMax, 800)
.IsSuccess());
ASSERT(system_resource_limit->SetLimitValue(LimitableResource::EventCountMax, 900)
.IsSuccess());
ASSERT(system_resource_limit->SetLimitValue(LimitableResource::TransferMemoryCountMax, 200)
.IsSuccess());
ASSERT(system_resource_limit->SetLimitValue(LimitableResource::SessionCountMax, 1133)
.IsSuccess());
ASSERT(system_resource_limit->SetLimitValue(LimitableResource::PhysicalMemoryMax, total_size).IsSuccess());
ASSERT(system_resource_limit->SetLimitValue(LimitableResource::ThreadCountMax, 800).IsSuccess());
ASSERT(system_resource_limit->SetLimitValue(LimitableResource::EventCountMax, 900).IsSuccess());
ASSERT(system_resource_limit->SetLimitValue(LimitableResource::TransferMemoryCountMax, 200).IsSuccess());
ASSERT(system_resource_limit->SetLimitValue(LimitableResource::SessionCountMax, 1133).IsSuccess());
system_resource_limit->Reserve(LimitableResource::PhysicalMemoryMax, kernel_size);
// Reserve secure applet memory, introduced in firmware 5.0.0
@ -242,16 +257,13 @@ struct KernelCore::Impl {
}
void InitializePreemption(KernelCore& kernel) {
preemption_event = Core::Timing::CreateEvent(
"PreemptionCallback",
[this, &kernel](s64 time,
std::chrono::nanoseconds) -> std::optional<std::chrono::nanoseconds> {
{
KScopedSchedulerLock lock(kernel);
global_scheduler_context->PreemptThreads();
}
return std::nullopt;
});
preemption_event = Core::Timing::CreateEvent("PreemptionCallback", [this, &kernel](s64 time, std::chrono::nanoseconds) -> std::optional<std::chrono::nanoseconds> {
{
KScopedSchedulerLock lock(kernel);
global_scheduler_context->PreemptThreads();
}
return std::nullopt;
});
const auto time_interval = std::chrono::nanoseconds{std::chrono::milliseconds(10)};
system.CoreTiming().ScheduleLoopingEvent(time_interval, time_interval, preemption_event);
@ -263,15 +275,13 @@ struct KernelCore::Impl {
ASSERT(Common::IsAligned(size, PageSize));
// Ensure that we have space for our reference counts.
const size_t rc_size =
Common::AlignUp(KPageTableSlabHeap::CalculateReferenceCountSize(size), PageSize);
const size_t rc_size = Common::AlignUp(KPageTableSlabHeap::CalculateReferenceCountSize(size), PageSize);
ASSERT(rc_size < size);
size -= rc_size;
// Initialize the resource managers' shared page manager.
resource_manager_page_manager = std::make_unique<KDynamicPageManager>();
resource_manager_page_manager->Initialize(
address, size, std::max<size_t>(PageSize, KPageBufferSlabHeap::BufferSize));
resource_manager_page_manager->Initialize(address, size, std::max<size_t>(PageSize, KPageBufferSlabHeap::BufferSize));
// Initialize the KPageBuffer slab heap.
page_buffer_slab_heap.Initialize(system);
@ -280,16 +290,12 @@ struct KernelCore::Impl {
app_memory_block_heap = std::make_unique<KMemoryBlockSlabHeap>();
sys_memory_block_heap = std::make_unique<KMemoryBlockSlabHeap>();
block_info_heap = std::make_unique<KBlockInfoSlabHeap>();
app_memory_block_heap->Initialize(resource_manager_page_manager.get(),
ApplicationMemoryBlockSlabHeapSize);
sys_memory_block_heap->Initialize(resource_manager_page_manager.get(),
SystemMemoryBlockSlabHeapSize);
app_memory_block_heap->Initialize(resource_manager_page_manager.get(), ApplicationMemoryBlockSlabHeapSize);
sys_memory_block_heap->Initialize(resource_manager_page_manager.get(), SystemMemoryBlockSlabHeapSize);
block_info_heap->Initialize(resource_manager_page_manager.get(), BlockInfoSlabHeapSize);
// Reserve all but a fixed number of remaining pages for the page table heap.
const size_t num_pt_pages = resource_manager_page_manager->GetCount() -
resource_manager_page_manager->GetUsed() -
ReservedDynamicPageCount;
const size_t num_pt_pages = resource_manager_page_manager->GetCount() - resource_manager_page_manager->GetUsed() - ReservedDynamicPageCount;
page_table_heap = std::make_unique<KPageTableSlabHeap>();
// TODO(bunnei): Pass in address once we support kernel virtual memory allocations.
@ -301,8 +307,8 @@ struct KernelCore::Impl {
KDynamicPageManager* const app_dynamic_page_manager = nullptr;
KDynamicPageManager* const sys_dynamic_page_manager =
/*KTargetSystem::IsDynamicResourceLimitsEnabled()*/ true
? resource_manager_page_manager.get()
: nullptr;
? resource_manager_page_manager.get()
: nullptr;
app_memory_block_manager = std::make_unique<KMemoryBlockSlabManager>();
sys_memory_block_manager = std::make_unique<KMemoryBlockSlabManager>();
app_block_info_manager = std::make_unique<KBlockInfoManager>();
@ -320,9 +326,7 @@ struct KernelCore::Impl {
sys_page_table_manager->Initialize(sys_dynamic_page_manager, page_table_heap.get());
// Check that we have the correct number of dynamic pages available.
ASSERT(resource_manager_page_manager->GetCount() -
resource_manager_page_manager->GetUsed() ==
ReservedDynamicPageCount);
ASSERT(resource_manager_page_manager->GetCount() - resource_manager_page_manager->GetUsed() == ReservedDynamicPageCount);
// Create the system page table managers.
app_system_resource = std::make_unique<KSystemResource>(kernel);
@ -331,18 +335,15 @@ struct KernelCore::Impl {
KAutoObject::Create(std::addressof(*sys_system_resource));
// Set the managers for the system resources.
app_system_resource->SetManagers(*app_memory_block_manager, *app_block_info_manager,
*app_page_table_manager);
sys_system_resource->SetManagers(*sys_memory_block_manager, *sys_block_info_manager,
*sys_page_table_manager);
app_system_resource->SetManagers(*app_memory_block_manager, *app_block_info_manager, *app_page_table_manager);
sys_system_resource->SetManagers(*sys_memory_block_manager, *sys_block_info_manager, *sys_page_table_manager);
}
void InitializeShutdownThreads() {
for (u32 core_id = 0; core_id < Core::Hardware::NUM_CPU_CORES; core_id++) {
shutdown_threads[core_id] = KThread::Create(system.Kernel());
ASSERT(KThread::InitializeHighPriorityThread(system, shutdown_threads[core_id], {}, {},
core_id)
.IsSuccess());
ASSERT(KThread::InitializeHighPriorityThread(system, shutdown_threads[core_id], {}, {}, core_id)
.IsSuccess());
KThread::Register(system.Kernel(), shutdown_threads[core_id]);
}
}
@ -356,83 +357,77 @@ struct KernelCore::Impl {
application_process->Open();
}
static inline thread_local u8 host_thread_id = UINT8_MAX;
/// Sets the host thread ID for the caller.
u32 SetHostThreadId(std::size_t core_id) {
// This should only be called during core init.
ASSERT(host_thread_id == UINT8_MAX);
ASSERT(tls_data.host_thread_id == UINT8_MAX);
// The first four slots are reserved for CPU core threads
ASSERT(core_id < Core::Hardware::NUM_CPU_CORES);
host_thread_id = static_cast<u8>(core_id);
return host_thread_id;
tls_data.host_thread_id = u8(core_id);
return tls_data.host_thread_id;
}
/// Gets the host thread ID for the caller
u32 GetHostThreadId() const {
return host_thread_id;
return tls_data.host_thread_id;
}
// Gets the dummy KThread for the caller, allocating a new one if this is the first time
KThread* GetHostDummyThread(KThread* existing_thread) {
const auto initialize{[](KThread* thread) {
ASSERT(KThread::InitializeDummyThread(thread, nullptr).IsSuccess());
return thread;
}};
thread_local KThread raw_thread{system.Kernel()};
thread_local KThread* thread = existing_thread ? existing_thread : initialize(&raw_thread);
return thread;
if (tls_data.thread == nullptr) {
auto const initialize{[](KThread* thread) {
ASSERT(KThread::InitializeDummyThread(thread, nullptr).IsSuccess());
return thread;
}};
tls_data.raw_thread.emplace(system.Kernel());
tls_data.thread = existing_thread ? existing_thread : initialize(&*tls_data.raw_thread);
ASSERT(tls_data.thread != nullptr);
}
return tls_data.thread;
}
/// Registers a CPU core thread by allocating a host thread ID for it
void RegisterCoreThread(std::size_t core_id) {
ASSERT(core_id < Core::Hardware::NUM_CPU_CORES);
const auto this_id = SetHostThreadId(core_id);
if (!is_multicore) {
if (!is_multicore)
single_core_thread_id = this_id;
}
}
/// Registers a new host thread by allocating a host thread ID for it
void RegisterHostThread(KThread* existing_thread) {
[[maybe_unused]] const auto dummy_thread = GetHostDummyThread(existing_thread);
(void)GetHostDummyThread(existing_thread);
}
[[nodiscard]] u32 GetCurrentHostThreadID() {
const auto this_id = GetHostThreadId();
if (!is_multicore && single_core_thread_id == this_id) {
return static_cast<u32>(system.GetCpuManager().CurrentCore());
}
auto const this_id = GetHostThreadId();
if (!is_multicore && single_core_thread_id == this_id)
return u32(system.GetCpuManager().CurrentCore());
return this_id;
}
static inline thread_local bool is_phantom_mode_for_singlecore{false};
// Forces singlecore
bool IsPhantomModeForSingleCore() const {
return is_phantom_mode_for_singlecore;
return tls_data.is_phantom_mode_for_singlecore;
}
void SetIsPhantomModeForSingleCore(bool value) {
ASSERT(!is_multicore);
is_phantom_mode_for_singlecore = value;
tls_data.is_phantom_mode_for_singlecore = value;
}
bool IsShuttingDown() const {
return is_shutting_down.load(std::memory_order_relaxed);
}
static inline thread_local KThread* current_thread{nullptr};
KThread* GetCurrentEmuThread() {
if (!current_thread) {
current_thread = GetHostDummyThread(nullptr);
}
return current_thread;
if (!tls_data.current_thread)
tls_data.current_thread = GetHostDummyThread(nullptr);
return tls_data.current_thread;
}
void SetCurrentEmuThread(KThread* thread) {
current_thread = thread;
tls_data.current_thread = thread;
}
void DeriveInitialMemoryLayout() {

Loading…
Cancel
Save