Browse Source
[core/core_timing] better MWAITX and WAITPKG delays (#3984)
[core/core_timing] better MWAITX and WAITPKG delays (#3984)
This implements MWAITX and WAITPKG extensions (umonitor, mwait) for CPUs that support them. Reduces wait times and bypasses the timing stuff from the OS that is slow (windows notably). generally it should answer within 0.2 to 0.5 microsecs (since most requests wait for that long). Also does a general rework of static ctors and stuff Signed-off-by: lizzie <lizzie@eden-emu.dev> Reviewed-on: https://git.eden-emu.dev/eden-emu/eden/pulls/3984 Reviewed-by: MaranBr <maranbr@eden-emu.dev> Reviewed-by: crueter <crueter@eden-emu.dev>pull/4029/head
committed by
crueter
No known key found for this signature in database
GPG Key ID: 425ACD2D4830EBC6
19 changed files with 472 additions and 528 deletions
-
11src/common/CMakeLists.txt
-
316src/common/cpu_features.cpp
-
90src/common/cpu_features.h
-
106src/common/thread.cpp
-
14src/common/thread.h
-
197src/common/wall_clock.cpp
-
82src/common/x64/cpu_detect.h
-
75src/common/x64/cpu_wait.cpp
-
10src/common/x64/cpu_wait.h
-
2src/core/arm/nce/patcher.cpp
-
38src/core/core_timing.cpp
-
27src/core/core_timing.h
-
4src/core/hle/service/nvnflinger/buffer_queue_producer.cpp
-
3src/core/hle/service/nvnflinger/buffer_queue_producer.h
-
4src/core/hle/service/psc/time/common.h
-
7src/qt_common/qt_common.cpp
-
7src/video_core/host1x/vic.cpp
-
3src/video_core/shader_environment.cpp
-
4src/yuzu/main_window.cpp
@ -1,197 +0,0 @@ |
|||
// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
|
|||
// SPDX-License-Identifier: GPL-3.0-or-later
|
|||
|
|||
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
|
|||
// SPDX-License-Identifier: GPL-2.0-or-later
|
|||
|
|||
#include "common/steady_clock.h"
|
|||
#include "common/uint128.h"
|
|||
#include "common/wall_clock.h"
|
|||
|
|||
#ifdef __ANDROID__
|
|||
#include <sys/system_properties.h>
|
|||
#endif
|
|||
#ifdef ARCHITECTURE_x86_64
|
|||
#include "common/x64/cpu_detect.h"
|
|||
#include "common/x64/rdtsc.h"
|
|||
#endif
|
|||
|
|||
namespace Common { |
|||
|
|||
#if defined(ARCHITECTURE_x86_64)
|
|||
WallClock::WallClock(bool invariant_, u64 rdtsc_frequency_) noexcept |
|||
: invariant{invariant_} |
|||
, rdtsc_frequency{rdtsc_frequency_} |
|||
, ns_rdtsc_factor{invariant_ ? 0 : GetFixedPoint64Factor(NsRatio::den, rdtsc_frequency_)} |
|||
, us_rdtsc_factor{invariant_ ? 0 : GetFixedPoint64Factor(UsRatio::den, rdtsc_frequency_)} |
|||
, ms_rdtsc_factor{invariant_ ? 0 : GetFixedPoint64Factor(MsRatio::den, rdtsc_frequency_)} |
|||
, cntpct_rdtsc_factor{invariant_ ? 0 : GetFixedPoint64Factor(CNTFRQ, rdtsc_frequency_)} |
|||
, gputick_rdtsc_factor{invariant_ ? 0 : GetFixedPoint64Factor(GPUTickFreq, rdtsc_frequency_)} |
|||
{} |
|||
|
|||
std::chrono::nanoseconds WallClock::GetTimeNS() const { |
|||
if (invariant) |
|||
return std::chrono::duration_cast<std::chrono::nanoseconds>(std::chrono::system_clock::now().time_since_epoch()); |
|||
return std::chrono::nanoseconds{MultiplyHigh(GetUptime(), ns_rdtsc_factor)}; |
|||
} |
|||
|
|||
std::chrono::microseconds WallClock::GetTimeUS() const { |
|||
if (invariant) |
|||
return std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::system_clock::now().time_since_epoch()); |
|||
return std::chrono::microseconds{MultiplyHigh(GetUptime(), us_rdtsc_factor)}; |
|||
} |
|||
|
|||
std::chrono::milliseconds WallClock::GetTimeMS() const { |
|||
if (invariant) |
|||
return std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::system_clock::now().time_since_epoch()); |
|||
return std::chrono::milliseconds{MultiplyHigh(GetUptime(), ms_rdtsc_factor)}; |
|||
} |
|||
|
|||
s64 WallClock::GetCNTPCT() const { |
|||
if (invariant) |
|||
return GetUptime() * NsToCNTPCTRatio::num / NsToCNTPCTRatio::den; |
|||
return MultiplyHigh(GetUptime(), cntpct_rdtsc_factor); |
|||
} |
|||
|
|||
s64 WallClock::GetGPUTick() const { |
|||
if (invariant) |
|||
return GetUptime() * NsToGPUTickRatio::num / NsToGPUTickRatio::den; |
|||
return MultiplyHigh(GetUptime(), gputick_rdtsc_factor); |
|||
} |
|||
|
|||
s64 WallClock::GetUptime() const { |
|||
if (invariant) |
|||
return std::chrono::duration_cast<std::chrono::nanoseconds>(std::chrono::steady_clock::now().time_since_epoch()).count(); |
|||
return s64(Common::X64::FencedRDTSC()); |
|||
} |
|||
|
|||
bool WallClock::IsNative() const { |
|||
if (invariant) |
|||
return false; |
|||
return true; |
|||
} |
|||
#elif defined(HAS_NCE)
|
|||
namespace { |
|||
|
|||
[[nodiscard]] WallClock::FactorType GetFixedPointFactor(u64 num, u64 den) noexcept { |
|||
return (WallClock::FactorType(num) << 64) / den; |
|||
} |
|||
|
|||
[[nodiscard]] u64 MultiplyHigh(u64 m, WallClock::FactorType factor) noexcept { |
|||
return static_cast<u64>((m * factor) >> 64); |
|||
} |
|||
|
|||
[[nodiscard]] s64 GetHostCNTFRQ() noexcept { |
|||
u64 cntfrq_el0 = 0; |
|||
#ifdef ANDROID
|
|||
std::string_view board{""}; |
|||
char buffer[PROP_VALUE_MAX]; |
|||
int len{__system_property_get("ro.product.board", buffer)}; |
|||
board = std::string_view(buffer, static_cast<size_t>(len)); |
|||
if (board == "s5e9925") { // Exynos 2200
|
|||
cntfrq_el0 = 25600000; |
|||
} else if (board == "exynos2100") { // Exynos 2100
|
|||
cntfrq_el0 = 26000000; |
|||
} else if (board == "exynos9810") { // Exynos 9810
|
|||
cntfrq_el0 = 26000000; |
|||
} else if (board == "s5e8825") { // Exynos 1280
|
|||
cntfrq_el0 = 26000000; |
|||
} else { |
|||
asm volatile("mrs %[cntfrq_el0], cntfrq_el0" : [cntfrq_el0] "=r"(cntfrq_el0)); |
|||
} |
|||
return cntfrq_el0; |
|||
#else
|
|||
asm volatile("mrs %[cntfrq_el0], cntfrq_el0" : [cntfrq_el0] "=r"(cntfrq_el0)); |
|||
return cntfrq_el0; |
|||
#endif
|
|||
} |
|||
|
|||
} // namespace
|
|||
|
|||
WallClock::WallClock(bool invariant_, u64 rdtsc_frequency_) noexcept { |
|||
const u64 host_cntfrq = std::max<u64>(GetHostCNTFRQ(), 1); |
|||
ns_cntfrq_factor = GetFixedPointFactor(NsRatio::den, host_cntfrq); |
|||
us_cntfrq_factor = GetFixedPointFactor(UsRatio::den, host_cntfrq); |
|||
ms_cntfrq_factor = GetFixedPointFactor(MsRatio::den, host_cntfrq); |
|||
guest_cntfrq_factor = GetFixedPointFactor(CNTFRQ, host_cntfrq); |
|||
gputick_cntfrq_factor = GetFixedPointFactor(GPUTickFreq, host_cntfrq); |
|||
} |
|||
|
|||
std::chrono::nanoseconds WallClock::GetTimeNS() const { |
|||
return std::chrono::nanoseconds{MultiplyHigh(GetUptime(), ns_cntfrq_factor)}; |
|||
} |
|||
|
|||
std::chrono::microseconds WallClock::GetTimeUS() const { |
|||
return std::chrono::microseconds{MultiplyHigh(GetUptime(), us_cntfrq_factor)}; |
|||
} |
|||
|
|||
std::chrono::milliseconds WallClock::GetTimeMS() const { |
|||
return std::chrono::milliseconds{MultiplyHigh(GetUptime(), ms_cntfrq_factor)}; |
|||
} |
|||
|
|||
s64 WallClock::GetCNTPCT() const { |
|||
return MultiplyHigh(GetUptime(), guest_cntfrq_factor); |
|||
} |
|||
|
|||
s64 WallClock::GetGPUTick() const { |
|||
return MultiplyHigh(GetUptime(), gputick_cntfrq_factor); |
|||
} |
|||
|
|||
s64 WallClock::GetUptime() const { |
|||
s64 cntvct_el0 = 0; |
|||
asm volatile( |
|||
"dsb ish\n\t" |
|||
"mrs %[cntvct_el0], cntvct_el0\n\t" |
|||
"dsb ish\n\t" |
|||
: [cntvct_el0] "=r"(cntvct_el0) |
|||
); |
|||
return cntvct_el0; |
|||
} |
|||
|
|||
bool WallClock::IsNative() const { |
|||
return true; |
|||
} |
|||
#else
|
|||
WallClock::WallClock(bool invariant_, u64 rdtsc_frequency_) noexcept {} |
|||
|
|||
std::chrono::nanoseconds WallClock::GetTimeNS() const { |
|||
return std::chrono::duration_cast<std::chrono::nanoseconds>(std::chrono::system_clock::now().time_since_epoch()); |
|||
} |
|||
|
|||
std::chrono::microseconds WallClock::GetTimeUS() const { |
|||
return std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::system_clock::now().time_since_epoch()); |
|||
} |
|||
|
|||
std::chrono::milliseconds WallClock::GetTimeMS() const { |
|||
return std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::system_clock::now().time_since_epoch()); |
|||
} |
|||
|
|||
s64 WallClock::GetCNTPCT() const { |
|||
return GetUptime() * NsToCNTPCTRatio::num / NsToCNTPCTRatio::den; |
|||
} |
|||
|
|||
s64 WallClock::GetGPUTick() const { |
|||
return GetUptime() * NsToGPUTickRatio::num / NsToGPUTickRatio::den; |
|||
} |
|||
|
|||
s64 WallClock::GetUptime() const { |
|||
return std::chrono::duration_cast<std::chrono::nanoseconds>(std::chrono::steady_clock::now().time_since_epoch()).count(); |
|||
} |
|||
|
|||
bool WallClock::IsNative() const { |
|||
return false; |
|||
} |
|||
#endif
|
|||
|
|||
WallClock CreateOptimalClock() noexcept { |
|||
#if defined(ARCHITECTURE_x86_64)
|
|||
auto const& caps = GetCPUCaps(); |
|||
return WallClock(!(caps.invariant_tsc && caps.tsc_frequency >= std::nano::den), caps.tsc_frequency); |
|||
#elif defined(HAS_NCE)
|
|||
return WallClock(false, 1); |
|||
#else
|
|||
return WallClock(true, 1); |
|||
#endif
|
|||
} |
|||
|
|||
} // namespace Common
|
|||
@ -1,82 +0,0 @@ |
|||
// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project |
|||
// SPDX-License-Identifier: GPL-3.0-or-later |
|||
|
|||
// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project |
|||
// SPDX-FileCopyrightText: Copyright 2013 Dolphin Emulator Project / 2015 Citra Emulator Project |
|||
// SPDX-License-Identifier: GPL-2.0-or-later |
|||
|
|||
#pragma once |
|||
|
|||
#include <optional> |
|||
#include <string_view> |
|||
#include "common/common_types.h" |
|||
|
|||
namespace Common { |
|||
|
|||
/// x86/x64 CPU capabilities that may be detected by this module |
|||
struct CPUCaps { |
|||
|
|||
enum class Manufacturer : u8 { |
|||
Unknown = 0, |
|||
Intel = 1, |
|||
AMD = 2, |
|||
Hygon = 3, |
|||
}; |
|||
|
|||
static Manufacturer ParseManufacturer(std::string_view brand_string); |
|||
|
|||
Manufacturer manufacturer; |
|||
char brand_string[13]; |
|||
|
|||
char cpu_string[48]; |
|||
|
|||
u32 base_frequency; |
|||
u32 max_frequency; |
|||
u32 bus_frequency; |
|||
|
|||
u32 tsc_crystal_ratio_denominator; |
|||
u32 tsc_crystal_ratio_numerator; |
|||
u32 crystal_frequency; |
|||
u64 tsc_frequency; // Derived from the above three values |
|||
|
|||
bool sse3 : 1; |
|||
bool ssse3 : 1; |
|||
bool sse4_1 : 1; |
|||
bool sse4_2 : 1; |
|||
|
|||
bool avx : 1; |
|||
bool avx2 : 1; |
|||
bool avx512f : 1; |
|||
bool avx512dq : 1; |
|||
bool avx512cd : 1; |
|||
bool avx512bw : 1; |
|||
bool avx512vl : 1; |
|||
bool avx512vbmi : 1; |
|||
bool avx512bitalg : 1; |
|||
|
|||
bool aes : 1; |
|||
bool bmi1 : 1; |
|||
bool bmi2 : 1; |
|||
bool f16c : 1; |
|||
bool fma : 1; |
|||
bool gfni : 1; |
|||
bool invariant_tsc : 1; |
|||
bool lzcnt : 1; |
|||
bool monitorx : 1; |
|||
bool movbe : 1; |
|||
bool pclmulqdq : 1; |
|||
bool popcnt : 1; |
|||
bool sha : 1; |
|||
bool waitpkg : 1; |
|||
}; |
|||
|
|||
/** |
|||
* Gets the supported capabilities of the host CPU |
|||
* @return Reference to a CPUCaps struct with the detected host CPU capabilities |
|||
*/ |
|||
const CPUCaps& GetCPUCaps(); |
|||
|
|||
/// Detects CPU core count |
|||
std::optional<int> GetProcessorCount(); |
|||
|
|||
} // namespace Common |
|||
@ -1,75 +0,0 @@ |
|||
// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
|
|||
// SPDX-License-Identifier: GPL-3.0-or-later
|
|||
|
|||
#include <thread>
|
|||
|
|||
#ifdef _MSC_VER
|
|||
#include <intrin.h>
|
|||
#endif
|
|||
|
|||
#include "common/x64/cpu_detect.h"
|
|||
#include "common/x64/cpu_wait.h"
|
|||
#include "common/x64/rdtsc.h"
|
|||
|
|||
namespace Common::X64 { |
|||
|
|||
namespace { |
|||
|
|||
// 100,000 cycles is a reasonable amount of time to wait to save on CPU resources.
|
|||
// For reference:
|
|||
// At 1 GHz, 100K cycles is 100us
|
|||
// At 2 GHz, 100K cycles is 50us
|
|||
// At 4 GHz, 100K cycles is 25us
|
|||
constexpr auto PauseCycles = 100'000U; |
|||
|
|||
} // Anonymous namespace
|
|||
|
|||
#if defined(_MSC_VER) && !defined(__clang__)
|
|||
__forceinline static void TPAUSE() { |
|||
static constexpr auto RequestC02State = 0U; |
|||
_tpause(RequestC02State, FencedRDTSC() + PauseCycles); |
|||
} |
|||
|
|||
__forceinline static void MWAITX() { |
|||
static constexpr auto EnableWaitTimeFlag = 1U << 1; |
|||
static constexpr auto RequestC1State = 0U; |
|||
|
|||
// monitor_var should be aligned to a cache line.
|
|||
alignas(64) u64 monitor_var{}; |
|||
_mm_monitorx(&monitor_var, 0, 0); |
|||
_mm_mwaitx(EnableWaitTimeFlag, RequestC1State, PauseCycles); |
|||
} |
|||
#else
|
|||
static void TPAUSE() { |
|||
static constexpr auto RequestC02State = 0U; |
|||
const auto tsc = FencedRDTSC() + PauseCycles; |
|||
const auto eax = static_cast<u32>(tsc & 0xFFFFFFFF); |
|||
const auto edx = static_cast<u32>(tsc >> 32); |
|||
asm volatile("tpause %0" : : "r"(RequestC02State), "d"(edx), "a"(eax)); |
|||
} |
|||
|
|||
static void MWAITX() { |
|||
static constexpr auto EnableWaitTimeFlag = 1U << 1; |
|||
static constexpr auto RequestC1State = 0U; |
|||
|
|||
// monitor_var should be aligned to a cache line.
|
|||
alignas(64) u64 monitor_var{}; |
|||
asm volatile("monitorx" : : "a"(&monitor_var), "c"(0), "d"(0)); |
|||
asm volatile("mwaitx" : : "a"(RequestC1State), "b"(PauseCycles), "c"(EnableWaitTimeFlag)); |
|||
} |
|||
#endif
|
|||
|
|||
void MicroSleep() { |
|||
static const bool has_waitpkg = GetCPUCaps().waitpkg; |
|||
static const bool has_monitorx = GetCPUCaps().monitorx; |
|||
|
|||
if (has_waitpkg) { |
|||
TPAUSE(); |
|||
} else if (has_monitorx) { |
|||
MWAITX(); |
|||
} else { |
|||
std::this_thread::yield(); |
|||
} |
|||
} |
|||
|
|||
} // namespace Common::X64
|
|||
@ -1,10 +0,0 @@ |
|||
// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project |
|||
// SPDX-License-Identifier: GPL-3.0-or-later |
|||
|
|||
#pragma once |
|||
|
|||
namespace Common::X64 { |
|||
|
|||
void MicroSleep(); |
|||
|
|||
} // namespace Common::X64 |
|||
Write
Preview
Loading…
Cancel
Save
Reference in new issue