Browse Source

[dynarmic] backport WAITPKG based spinlocks

Signed-off-by: lizzie <lizzie@eden-emu.dev>
liz-dynarmic-backport-waitpkg
lizzie 3 months ago
committed by Caio Oliveira
parent
commit
e19ca201e3
No known key found for this signature in database GPG Key ID: AAAE6C7FD4186B0C
  1. 2
      src/dynarmic/src/dynarmic/backend/x64/block_of_code.cpp
  2. 3
      src/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.cpp.inc
  3. 2
      src/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.h
  4. 3
      src/dynarmic/src/dynarmic/backend/x64/host_feature.h
  5. 35
      src/dynarmic/src/dynarmic/common/spin_lock_x64.cpp
  6. 5
      src/dynarmic/src/dynarmic/common/spin_lock_x64.h

2
src/dynarmic/src/dynarmic/backend/x64/block_of_code.cpp

@ -194,6 +194,8 @@ HostFeature GetHostFeatures() {
features |= HostFeature::LZCNT; features |= HostFeature::LZCNT;
if (cpu_info.has(Cpu::tGFNI)) if (cpu_info.has(Cpu::tGFNI))
features |= HostFeature::GFNI; features |= HostFeature::GFNI;
if (cpu_info.has(Cpu::tWAITPKG))
features |= HostFeature::WAITPKG;
if (cpu_info.has(Cpu::tBMI2)) { if (cpu_info.has(Cpu::tBMI2)) {
// BMI2 instructions such as pdep and pext have been very slow up until Zen 3. // BMI2 instructions such as pdep and pext have been very slow up until Zen 3.

3
src/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.cpp.inc

@ -420,10 +420,11 @@ void AxxEmitX64::EmitExclusiveWriteMemoryInline(AxxEmitContext& ctx, IR::Inst* i
const Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(code, args[1]); const Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(code, args[1]);
const Xbyak::Reg32 status = ctx.reg_alloc.ScratchGpr(code).cvt32(); const Xbyak::Reg32 status = ctx.reg_alloc.ScratchGpr(code).cvt32();
const Xbyak::Reg64 tmp = ctx.reg_alloc.ScratchGpr(code); const Xbyak::Reg64 tmp = ctx.reg_alloc.ScratchGpr(code);
const Xbyak::Reg64 tmp2 = ctx.reg_alloc.ScratchGpr(code);
const auto wrapped_fn = exclusive_write_fallbacks[std::make_tuple(ordered, bitsize, vaddr.getIdx(), value.getIdx())]; const auto wrapped_fn = exclusive_write_fallbacks[std::make_tuple(ordered, bitsize, vaddr.getIdx(), value.getIdx())];
EmitExclusiveLock(code, conf, tmp, eax);
EmitExclusiveLock(code, conf, tmp, tmp2);
SharedLabel end = GenSharedLabel(); SharedLabel end = GenSharedLabel();

2
src/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.h

@ -346,7 +346,7 @@ void EmitExclusiveLock(BlockOfCode& code, const UserConfig& conf, Xbyak::Reg64 p
} }
code.mov(pointer, std::bit_cast<u64>(GetExclusiveMonitorLockPointer(conf.global_monitor))); code.mov(pointer, std::bit_cast<u64>(GetExclusiveMonitorLockPointer(conf.global_monitor)));
EmitSpinLockLock(code, pointer, tmp);
EmitSpinLockLock(code, pointer, tmp, code.HasHostFeature(HostFeature::WAITPKG));
} }
template<typename UserConfig> template<typename UserConfig>

3
src/dynarmic/src/dynarmic/backend/x64/host_feature.h

@ -35,9 +35,10 @@ enum class HostFeature : u64 {
BMI2 = 1ULL << 19, BMI2 = 1ULL << 19,
LZCNT = 1ULL << 20, LZCNT = 1ULL << 20,
GFNI = 1ULL << 21, GFNI = 1ULL << 21,
WAITPKG = 1ULL << 22,
// Zen-based BMI2 // Zen-based BMI2
FastBMI2 = 1ULL << 22,
FastBMI2 = 1ULL << 23,
// Orthographic AVX512 features on 128 and 256 vectors // Orthographic AVX512 features on 128 and 256 vectors
AVX512_Ortho = AVX512F | AVX512VL, AVX512_Ortho = AVX512F | AVX512VL,

35
src/dynarmic/src/dynarmic/common/spin_lock_x64.cpp

@ -22,17 +22,46 @@ static const auto default_cg_mode = nullptr; //Allow RWE
namespace Dynarmic { namespace Dynarmic {
void EmitSpinLockLock(Xbyak::CodeGenerator& code, Xbyak::Reg64 ptr, Xbyak::Reg32 tmp) {
void EmitSpinLockLock(Xbyak::CodeGenerator& code, Xbyak::Reg64 ptr, Xbyak::Reg32 tmp, bool waitpkg) {
// TODO: this is because we lack regalloc - so better to be safe :(
if (waitpkg) {
code.push(Xbyak::util::eax);
code.push(Xbyak::util::ebx);
code.push(Xbyak::util::edx);
}
Xbyak::Label start, loop; Xbyak::Label start, loop;
code.jmp(start, code.T_NEAR); code.jmp(start, code.T_NEAR);
code.L(loop); code.L(loop);
if (waitpkg) {
// TODO: This clobbers EAX and EDX did we tell the regalloc?
// ARM ptr for address-monitoring
code.umonitor(ptr);
// tmp.bit[0] = 0: C0.1 | Slow Wakup | Better Savings
// tmp.bit[0] = 1: C0.2 | Fast Wakup | Lesser Savings
// edx:eax is implicitly used as a 64-bit deadline timestamp
// Use the maximum so that we use the operating system's maximum
// allowed wait time within the IA32_UMWAIT_CONTROL register
// Enter power state designated by tmp and wait for a write to lock_ptr
code.mov(Xbyak::util::eax, 0xFFFFFFFF);
code.mov(Xbyak::util::edx, Xbyak::util::eax);
// TODO: We can only be here because tmp is 1 already - however we repeatedly overwrite it...
code.mov(Xbyak::util::ebx, 1);
code.umwait(Xbyak::util::ebx);
// CF == 1 if we hit the OS-timeout in IA32_UMWAIT_CONTROL without a write
// CF == 0 if we exited the wait for any other reason
} else {
code.pause(); code.pause();
}
code.L(start); code.L(start);
code.mov(tmp, 1); code.mov(tmp, 1);
/*code.lock();*/ code.xchg(code.dword[ptr], tmp); /*code.lock();*/ code.xchg(code.dword[ptr], tmp);
code.test(tmp, tmp); code.test(tmp, tmp);
code.jnz(loop, code.T_NEAR); code.jnz(loop, code.T_NEAR);
if (waitpkg) {
code.pop(Xbyak::util::edx);
code.pop(Xbyak::util::ebx);
code.pop(Xbyak::util::eax);
}
} }
void EmitSpinLockUnlock(Xbyak::CodeGenerator& code, Xbyak::Reg64 ptr, Xbyak::Reg32 tmp) { void EmitSpinLockUnlock(Xbyak::CodeGenerator& code, Xbyak::Reg64 ptr, Xbyak::Reg32 tmp) {
@ -60,7 +89,7 @@ void SpinLockImpl::Initialize() noexcept {
Xbyak::Reg64 const ABI_PARAM1 = Backend::X64::HostLocToReg64(Backend::X64::ABI_PARAM1); Xbyak::Reg64 const ABI_PARAM1 = Backend::X64::HostLocToReg64(Backend::X64::ABI_PARAM1);
code.align(); code.align();
lock = code.getCurr<void (*)(volatile int*)>(); lock = code.getCurr<void (*)(volatile int*)>();
EmitSpinLockLock(code, ABI_PARAM1, code.eax);
EmitSpinLockLock(code, ABI_PARAM1, code.eax, false);
code.ret(); code.ret();
code.align(); code.align();
unlock = code.getCurr<void (*)(volatile int*)>(); unlock = code.getCurr<void (*)(volatile int*)>();

5
src/dynarmic/src/dynarmic/common/spin_lock_x64.h

@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
/* This file is part of the dynarmic project. /* This file is part of the dynarmic project.
* Copyright (c) 2022 MerryMage * Copyright (c) 2022 MerryMage
* SPDX-License-Identifier: 0BSD * SPDX-License-Identifier: 0BSD
@ -9,7 +12,7 @@
namespace Dynarmic { namespace Dynarmic {
void EmitSpinLockLock(Xbyak::CodeGenerator& code, Xbyak::Reg64 ptr, Xbyak::Reg32 tmp);
void EmitSpinLockLock(Xbyak::CodeGenerator& code, Xbyak::Reg64 ptr, Xbyak::Reg32 tmp, bool waitpkg);
void EmitSpinLockUnlock(Xbyak::CodeGenerator& code, Xbyak::Reg64 ptr, Xbyak::Reg32 tmp); void EmitSpinLockUnlock(Xbyak::CodeGenerator& code, Xbyak::Reg64 ptr, Xbyak::Reg32 tmp);
} // namespace Dynarmic } // namespace Dynarmic
Loading…
Cancel
Save