|
|
@ -13,36 +13,60 @@ |
|
|
|
|
|
|
|
|
namespace Common::X64 { |
|
|
namespace Common::X64 { |
|
|
|
|
|
|
|
|
|
|
|
namespace { |
|
|
|
|
|
|
|
|
|
|
|
// 100,000 cycles is a reasonable amount of time to wait to save on CPU resources.
|
|
|
|
|
|
// For reference:
|
|
|
|
|
|
// At 1 GHz, 100K cycles is 100us
|
|
|
|
|
|
// At 2 GHz, 100K cycles is 50us
|
|
|
|
|
|
// At 4 GHz, 100K cycles is 25us
|
|
|
|
|
|
constexpr auto PauseCycles = 100'000U; |
|
|
|
|
|
|
|
|
|
|
|
} // Anonymous namespace
|
|
|
|
|
|
|
|
|
#ifdef _MSC_VER
|
|
|
#ifdef _MSC_VER
|
|
|
__forceinline static void TPAUSE() { |
|
|
__forceinline static void TPAUSE() { |
|
|
// 100,000 cycles is a reasonable amount of time to wait to save on CPU resources.
|
|
|
|
|
|
// For reference:
|
|
|
|
|
|
// At 1 GHz, 100K cycles is 100us
|
|
|
|
|
|
// At 2 GHz, 100K cycles is 50us
|
|
|
|
|
|
// At 4 GHz, 100K cycles is 25us
|
|
|
|
|
|
static constexpr auto PauseCycles = 100'000; |
|
|
|
|
|
_tpause(0, FencedRDTSC() + PauseCycles); |
|
|
|
|
|
|
|
|
static constexpr auto RequestC02State = 0U; |
|
|
|
|
|
_tpause(RequestC02State, FencedRDTSC() + PauseCycles); |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
__forceinline static void MWAITX() { |
|
|
|
|
|
static constexpr auto EnableWaitTimeFlag = 1U << 1; |
|
|
|
|
|
static constexpr auto RequestC1State = 0U; |
|
|
|
|
|
|
|
|
|
|
|
// monitor_var should be aligned to a cache line.
|
|
|
|
|
|
alignas(64) u64 monitor_var{}; |
|
|
|
|
|
_mm_monitorx(&monitor_var, 0, 0); |
|
|
|
|
|
_mm_mwaitx(EnableWaitTimeFlag, RequestC1State, PauseCycles); |
|
|
} |
|
|
} |
|
|
#else
|
|
|
#else
|
|
|
static void TPAUSE() { |
|
|
static void TPAUSE() { |
|
|
// 100,000 cycles is a reasonable amount of time to wait to save on CPU resources.
|
|
|
|
|
|
// For reference:
|
|
|
|
|
|
// At 1 GHz, 100K cycles is 100us
|
|
|
|
|
|
// At 2 GHz, 100K cycles is 50us
|
|
|
|
|
|
// At 4 GHz, 100K cycles is 25us
|
|
|
|
|
|
static constexpr auto PauseCycles = 100'000; |
|
|
|
|
|
|
|
|
static constexpr auto RequestC02State = 0U; |
|
|
const auto tsc = FencedRDTSC() + PauseCycles; |
|
|
const auto tsc = FencedRDTSC() + PauseCycles; |
|
|
const auto eax = static_cast<u32>(tsc & 0xFFFFFFFF); |
|
|
const auto eax = static_cast<u32>(tsc & 0xFFFFFFFF); |
|
|
const auto edx = static_cast<u32>(tsc >> 32); |
|
|
const auto edx = static_cast<u32>(tsc >> 32); |
|
|
asm volatile("tpause %0" : : "r"(0), "d"(edx), "a"(eax)); |
|
|
|
|
|
|
|
|
asm volatile("tpause %0" : : "r"(RequestC02State), "d"(edx), "a"(eax)); |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
static void MWAITX() { |
|
|
|
|
|
static constexpr auto EnableWaitTimeFlag = 1U << 1; |
|
|
|
|
|
static constexpr auto RequestC1State = 0U; |
|
|
|
|
|
|
|
|
|
|
|
// monitor_var should be aligned to a cache line.
|
|
|
|
|
|
alignas(64) u64 monitor_var{}; |
|
|
|
|
|
asm volatile("monitorx" : : "a"(&monitor_var), "c"(0), "d"(0)); |
|
|
|
|
|
asm volatile("mwaitx" : : "a"(RequestC1State), "b"(PauseCycles), "c"(EnableWaitTimeFlag)); |
|
|
} |
|
|
} |
|
|
#endif
|
|
|
#endif
|
|
|
|
|
|
|
|
|
void MicroSleep() { |
|
|
void MicroSleep() { |
|
|
static const bool has_waitpkg = GetCPUCaps().waitpkg; |
|
|
static const bool has_waitpkg = GetCPUCaps().waitpkg; |
|
|
|
|
|
static const bool has_monitorx = GetCPUCaps().monitorx; |
|
|
|
|
|
|
|
|
if (has_waitpkg) { |
|
|
if (has_waitpkg) { |
|
|
TPAUSE(); |
|
|
TPAUSE(); |
|
|
|
|
|
} else if (has_monitorx) { |
|
|
|
|
|
MWAITX(); |
|
|
} else { |
|
|
} else { |
|
|
std::this_thread::yield(); |
|
|
std::this_thread::yield(); |
|
|
} |
|
|
} |
|
|
|