|
|
@ -2,12 +2,17 @@ |
|
|
// Licensed under GPLv2 or any later version
|
|
|
// Licensed under GPLv2 or any later version
|
|
|
// Refer to the license.txt file included.
|
|
|
// Refer to the license.txt file included.
|
|
|
|
|
|
|
|
|
|
|
|
#include <array>
|
|
|
#include <chrono>
|
|
|
#include <chrono>
|
|
|
|
|
|
#include <limits>
|
|
|
#include <mutex>
|
|
|
#include <mutex>
|
|
|
#include <thread>
|
|
|
#include <thread>
|
|
|
|
|
|
|
|
|
#ifdef _MSC_VER
|
|
|
#ifdef _MSC_VER
|
|
|
#include <intrin.h>
|
|
|
#include <intrin.h>
|
|
|
|
|
|
|
|
|
|
|
|
#pragma intrinsic(__umulh)
|
|
|
|
|
|
#pragma intrinsic(_udiv128)
|
|
|
#else
|
|
|
#else
|
|
|
#include <x86intrin.h>
|
|
|
#include <x86intrin.h>
|
|
|
#endif
|
|
|
#endif
|
|
|
@ -15,6 +20,55 @@ |
|
|
#include "common/uint128.h"
|
|
|
#include "common/uint128.h"
|
|
|
#include "common/x64/native_clock.h"
|
|
|
#include "common/x64/native_clock.h"
|
|
|
|
|
|
|
|
|
|
|
|
namespace { |
|
|
|
|
|
|
|
|
|
|
|
[[nodiscard]] u64 GetFixedPoint64Factor(u64 numerator, u64 divisor) { |
|
|
|
|
|
#ifdef __SIZEOF_INT128__
|
|
|
|
|
|
const auto base = static_cast<unsigned __int128>(numerator) << 64ULL; |
|
|
|
|
|
return static_cast<u64>(base / divisor); |
|
|
|
|
|
#elif defined(_M_X64) || defined(_M_ARM64)
|
|
|
|
|
|
std::array<u64, 2> r = {0, numerator}; |
|
|
|
|
|
u64 remainder; |
|
|
|
|
|
#if _MSC_VER < 1923
|
|
|
|
|
|
return udiv128(r[1], r[0], divisor, &remainder); |
|
|
|
|
|
#else
|
|
|
|
|
|
return _udiv128(r[1], r[0], divisor, &remainder); |
|
|
|
|
|
#endif
|
|
|
|
|
|
#else
|
|
|
|
|
|
// This one is bit more inaccurate.
|
|
|
|
|
|
return MultiplyAndDivide64(std::numeric_limits<u64>::max(), numerator, divisor); |
|
|
|
|
|
#endif
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
[[nodiscard]] u64 MultiplyHigh(u64 a, u64 b) { |
|
|
|
|
|
#ifdef __SIZEOF_INT128__
|
|
|
|
|
|
return (static_cast<unsigned __int128>(a) * static_cast<unsigned __int128>(b)) >> 64; |
|
|
|
|
|
#elif defined(_M_X64) || defined(_M_ARM64)
|
|
|
|
|
|
return __umulh(a, b); // MSVC
|
|
|
|
|
|
#else
|
|
|
|
|
|
// Generic fallback
|
|
|
|
|
|
const u64 a_lo = u32(a); |
|
|
|
|
|
const u64 a_hi = a >> 32; |
|
|
|
|
|
const u64 b_lo = u32(b); |
|
|
|
|
|
const u64 b_hi = b >> 32; |
|
|
|
|
|
|
|
|
|
|
|
const u64 a_x_b_hi = a_hi * b_hi; |
|
|
|
|
|
const u64 a_x_b_mid = a_hi * b_lo; |
|
|
|
|
|
const u64 b_x_a_mid = b_hi * a_lo; |
|
|
|
|
|
const u64 a_x_b_lo = a_lo * b_lo; |
|
|
|
|
|
|
|
|
|
|
|
const u64 carry_bit = (static_cast<u64>(static_cast<u32>(a_x_b_mid)) + |
|
|
|
|
|
static_cast<u64>(static_cast<u32>(b_x_a_mid)) + (a_x_b_lo >> 32)) >> |
|
|
|
|
|
32; |
|
|
|
|
|
|
|
|
|
|
|
const u64 multhi = a_x_b_hi + (a_x_b_mid >> 32) + (b_x_a_mid >> 32) + carry_bit; |
|
|
|
|
|
|
|
|
|
|
|
return multhi; |
|
|
|
|
|
#endif
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
} // namespace
|
|
|
|
|
|
|
|
|
namespace Common { |
|
|
namespace Common { |
|
|
|
|
|
|
|
|
u64 EstimateRDTSCFrequency() { |
|
|
u64 EstimateRDTSCFrequency() { |
|
|
@ -50,6 +104,11 @@ NativeClock::NativeClock(u64 emulated_cpu_frequency_, u64 emulated_clock_frequen |
|
|
_mm_mfence(); |
|
|
_mm_mfence(); |
|
|
last_measure = __rdtsc(); |
|
|
last_measure = __rdtsc(); |
|
|
accumulated_ticks = 0U; |
|
|
accumulated_ticks = 0U; |
|
|
|
|
|
ns_rtsc_factor = GetFixedPoint64Factor(1000000000, rtsc_frequency); |
|
|
|
|
|
us_rtsc_factor = GetFixedPoint64Factor(1000000, rtsc_frequency); |
|
|
|
|
|
ms_rtsc_factor = GetFixedPoint64Factor(1000, rtsc_frequency); |
|
|
|
|
|
clock_rtsc_factor = GetFixedPoint64Factor(emulated_clock_frequency, rtsc_frequency); |
|
|
|
|
|
cpu_rtsc_factor = GetFixedPoint64Factor(emulated_cpu_frequency, rtsc_frequency); |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
u64 NativeClock::GetRTSC() { |
|
|
u64 NativeClock::GetRTSC() { |
|
|
@ -75,27 +134,27 @@ void NativeClock::Pause(bool is_paused) { |
|
|
|
|
|
|
|
|
std::chrono::nanoseconds NativeClock::GetTimeNS() { |
|
|
std::chrono::nanoseconds NativeClock::GetTimeNS() { |
|
|
const u64 rtsc_value = GetRTSC(); |
|
|
const u64 rtsc_value = GetRTSC(); |
|
|
return std::chrono::nanoseconds{MultiplyAndDivide64(rtsc_value, 1000000000, rtsc_frequency)}; |
|
|
|
|
|
|
|
|
return std::chrono::nanoseconds{MultiplyHigh(rtsc_value, ns_rtsc_factor)}; |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
std::chrono::microseconds NativeClock::GetTimeUS() { |
|
|
std::chrono::microseconds NativeClock::GetTimeUS() { |
|
|
const u64 rtsc_value = GetRTSC(); |
|
|
const u64 rtsc_value = GetRTSC(); |
|
|
return std::chrono::microseconds{MultiplyAndDivide64(rtsc_value, 1000000, rtsc_frequency)}; |
|
|
|
|
|
|
|
|
return std::chrono::microseconds{MultiplyHigh(rtsc_value, us_rtsc_factor)}; |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
std::chrono::milliseconds NativeClock::GetTimeMS() { |
|
|
std::chrono::milliseconds NativeClock::GetTimeMS() { |
|
|
const u64 rtsc_value = GetRTSC(); |
|
|
const u64 rtsc_value = GetRTSC(); |
|
|
return std::chrono::milliseconds{MultiplyAndDivide64(rtsc_value, 1000, rtsc_frequency)}; |
|
|
|
|
|
|
|
|
return std::chrono::milliseconds{MultiplyHigh(rtsc_value, ms_rtsc_factor)}; |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
u64 NativeClock::GetClockCycles() { |
|
|
u64 NativeClock::GetClockCycles() { |
|
|
const u64 rtsc_value = GetRTSC(); |
|
|
const u64 rtsc_value = GetRTSC(); |
|
|
return MultiplyAndDivide64(rtsc_value, emulated_clock_frequency, rtsc_frequency); |
|
|
|
|
|
|
|
|
return MultiplyHigh(rtsc_value, clock_rtsc_factor); |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
u64 NativeClock::GetCPUCycles() { |
|
|
u64 NativeClock::GetCPUCycles() { |
|
|
const u64 rtsc_value = GetRTSC(); |
|
|
const u64 rtsc_value = GetRTSC(); |
|
|
return MultiplyAndDivide64(rtsc_value, emulated_cpu_frequency, rtsc_frequency); |
|
|
|
|
|
|
|
|
return MultiplyHigh(rtsc_value, cpu_rtsc_factor); |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
} // namespace X64
|
|
|
} // namespace X64
|
|
|
|