Browse Source

[dynarmic] force devirtualisation of terminal handlers (#3033)

Should result in very marginally small performance gains. Basically removes the deref of vtable for EmitX64 on lto builds, so in THEORY it should be better than having to defer w.r.t all terminal handlers.

aka. we just like, inline them in one big function and keep CPU away from deference hell.

Signed-off-by: lizzie lizzie@eden-emu.dev

Reviewed-on: https://git.eden-emu.dev/eden-emu/eden/pulls/3033
Reviewed-by: Caio Oliveira <caiooliveirafarias0@gmail.com>
Reviewed-by: MaranBr <maranbr@eden-emu.dev>
Co-authored-by: lizzie <lizzie@eden-emu.dev>
Co-committed-by: lizzie <lizzie@eden-emu.dev>
pull/3102/head
lizzie 4 weeks ago
committed by crueter
parent
commit
41af6ea645
No known key found for this signature in database GPG Key ID: 425ACD2D4830EBC6
  1. 156
      src/dynarmic/src/dynarmic/backend/x64/a32_emit_x64.cpp
  2. 23
      src/dynarmic/src/dynarmic/backend/x64/a32_emit_x64.h
  3. 139
      src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.cpp
  4. 15
      src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.h
  5. 24
      src/dynarmic/src/dynarmic/backend/x64/emit_x64.cpp
  6. 18
      src/dynarmic/src/dynarmic/backend/x64/emit_x64.h

156
src/dynarmic/src/dynarmic/backend/x64/a32_emit_x64.cpp

@ -162,7 +162,7 @@ A32EmitX64::BlockDescriptor A32EmitX64::Emit(IR::Block& block) {
if (conf.enable_cycle_counting) { if (conf.enable_cycle_counting) {
EmitAddCycles(block.CycleCount()); EmitAddCycles(block.CycleCount());
} }
EmitX64::EmitTerminal(block.GetTerminal(), ctx.Location().SetSingleStepping(false), ctx.IsSingleStep());
EmitTerminal(block.GetTerminal(), ctx.Location().SetSingleStepping(false), ctx.IsSingleStep());
code.int3(); code.int3();
for (auto& deferred_emit : ctx.deferred_emits) { for (auto& deferred_emit : ctx.deferred_emits) {
@ -1124,26 +1124,9 @@ std::string A32EmitX64::LocationDescriptorToFriendlyName(const IR::LocationDescr
descriptor.FPSCR().Value()); descriptor.FPSCR().Value());
} }
void A32EmitX64::EmitTerminalImpl(IR::Term::Interpret terminal, IR::LocationDescriptor initial_location, bool) {
ASSERT(A32::LocationDescriptor{terminal.next}.TFlag() == A32::LocationDescriptor{initial_location}.TFlag() && "Unimplemented");
ASSERT(A32::LocationDescriptor{terminal.next}.EFlag() == A32::LocationDescriptor{initial_location}.EFlag() && "Unimplemented");
ASSERT(terminal.num_instructions == 1 && "Unimplemented");
code.mov(code.ABI_PARAM2.cvt32(), A32::LocationDescriptor{terminal.next}.PC());
code.mov(code.ABI_PARAM3.cvt32(), 1);
code.mov(MJitStateReg(A32::Reg::PC), code.ABI_PARAM2.cvt32());
code.SwitchMxcsrOnExit();
Devirtualize<&A32::UserCallbacks::InterpreterFallback>(conf.callbacks).EmitCall(code);
code.ReturnFromRunCode(true); // TODO: Check cycles
}
void A32EmitX64::EmitTerminalImpl(IR::Term::ReturnToDispatch, IR::LocationDescriptor, bool) {
code.ReturnFromRunCode();
}
void A32EmitX64::EmitSetUpperLocationDescriptor(IR::LocationDescriptor new_location, IR::LocationDescriptor old_location) { void A32EmitX64::EmitSetUpperLocationDescriptor(IR::LocationDescriptor new_location, IR::LocationDescriptor old_location) {
auto get_upper = [](const IR::LocationDescriptor& desc) -> u32 { auto get_upper = [](const IR::LocationDescriptor& desc) -> u32 {
return static_cast<u32>(A32::LocationDescriptor{desc}.SetSingleStepping(false).UniqueHash() >> 32);
return u32(A32::LocationDescriptor{desc}.SetSingleStepping(false).UniqueHash() >> 32);
}; };
const u32 old_upper = get_upper(old_location); const u32 old_upper = get_upper(old_location);
@ -1157,90 +1140,115 @@ void A32EmitX64::EmitSetUpperLocationDescriptor(IR::LocationDescriptor new_locat
} }
} }
void A32EmitX64::EmitTerminalImpl(IR::Term::LinkBlock terminal, IR::LocationDescriptor initial_location, bool is_single_step) {
EmitSetUpperLocationDescriptor(terminal.next, initial_location);
namespace {
void EmitTerminalImpl(A32EmitX64& e, IR::Term::Interpret terminal, IR::LocationDescriptor initial_location, bool) {
ASSERT(A32::LocationDescriptor{terminal.next}.TFlag() == A32::LocationDescriptor{initial_location}.TFlag() && "Unimplemented");
ASSERT(A32::LocationDescriptor{terminal.next}.EFlag() == A32::LocationDescriptor{initial_location}.EFlag() && "Unimplemented");
ASSERT(terminal.num_instructions == 1 && "Unimplemented");
if (!conf.HasOptimization(OptimizationFlag::BlockLinking) || is_single_step) {
code.mov(MJitStateReg(A32::Reg::PC), A32::LocationDescriptor{terminal.next}.PC());
code.ReturnFromRunCode();
e.code.mov(e.code.ABI_PARAM2.cvt32(), A32::LocationDescriptor{terminal.next}.PC());
e.code.mov(e.code.ABI_PARAM3.cvt32(), 1);
e.code.mov(MJitStateReg(A32::Reg::PC), e.code.ABI_PARAM2.cvt32());
e.code.SwitchMxcsrOnExit();
Devirtualize<&A32::UserCallbacks::InterpreterFallback>(e.conf.callbacks).EmitCall(e.code);
e.code.ReturnFromRunCode(true); // TODO: Check cycles
}
void EmitTerminalImpl(A32EmitX64& e, IR::Term::ReturnToDispatch, IR::LocationDescriptor, bool) {
e.code.ReturnFromRunCode();
}
void EmitTerminalImpl(A32EmitX64& e, IR::Term::LinkBlock terminal, IR::LocationDescriptor initial_location, bool is_single_step) {
e.EmitSetUpperLocationDescriptor(terminal.next, initial_location);
if (!e.conf.HasOptimization(OptimizationFlag::BlockLinking) || is_single_step) {
e.code.mov(MJitStateReg(A32::Reg::PC), A32::LocationDescriptor{terminal.next}.PC());
e.code.ReturnFromRunCode();
} else { } else {
if (conf.enable_cycle_counting) {
code.cmp(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)], 0);
patch_information[terminal.next].jg.push_back(code.getCurr());
if (const auto next_bb = GetBasicBlock(terminal.next)) {
EmitPatchJg(terminal.next, next_bb->entrypoint);
if (e.conf.enable_cycle_counting) {
e.code.cmp(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)], 0);
e.patch_information[terminal.next].jg.push_back(e.code.getCurr());
if (const auto next_bb = e.GetBasicBlock(terminal.next)) {
e.EmitPatchJg(terminal.next, next_bb->entrypoint);
} else { } else {
EmitPatchJg(terminal.next);
e.EmitPatchJg(terminal.next);
} }
} else { } else {
code.cmp(dword[code.ABI_JIT_PTR + offsetof(A32JitState, halt_reason)], 0);
patch_information[terminal.next].jz.push_back(code.getCurr());
if (const auto next_bb = GetBasicBlock(terminal.next)) {
EmitPatchJz(terminal.next, next_bb->entrypoint);
e.code.cmp(dword[e.code.ABI_JIT_PTR + offsetof(A32JitState, halt_reason)], 0);
e.patch_information[terminal.next].jz.push_back(e.code.getCurr());
if (const auto next_bb = e.GetBasicBlock(terminal.next)) {
e.EmitPatchJz(terminal.next, next_bb->entrypoint);
} else { } else {
EmitPatchJz(terminal.next);
e.EmitPatchJz(terminal.next);
} }
} }
code.mov(MJitStateReg(A32::Reg::PC), A32::LocationDescriptor{terminal.next}.PC());
PushRSBHelper(rax, rbx, terminal.next);
code.ForceReturnFromRunCode();
e.code.mov(MJitStateReg(A32::Reg::PC), A32::LocationDescriptor{terminal.next}.PC());
e.PushRSBHelper(rax, rbx, terminal.next);
e.code.ForceReturnFromRunCode();
} }
} }
void A32EmitX64::EmitTerminalImpl(IR::Term::LinkBlockFast terminal, IR::LocationDescriptor initial_location, bool is_single_step) {
EmitSetUpperLocationDescriptor(terminal.next, initial_location);
if (!conf.HasOptimization(OptimizationFlag::BlockLinking) || is_single_step) {
code.mov(MJitStateReg(A32::Reg::PC), A32::LocationDescriptor{terminal.next}.PC());
code.ReturnFromRunCode();
void EmitTerminalImpl(A32EmitX64& e, IR::Term::LinkBlockFast terminal, IR::LocationDescriptor initial_location, bool is_single_step) {
e.EmitSetUpperLocationDescriptor(terminal.next, initial_location);
if (!e.conf.HasOptimization(OptimizationFlag::BlockLinking) || is_single_step) {
e.code.mov(MJitStateReg(A32::Reg::PC), A32::LocationDescriptor{terminal.next}.PC());
e.code.ReturnFromRunCode();
} else { } else {
patch_information[terminal.next].jmp.push_back(code.getCurr());
if (const auto next_bb = GetBasicBlock(terminal.next)) {
EmitPatchJmp(terminal.next, next_bb->entrypoint);
e.patch_information[terminal.next].jmp.push_back(e.code.getCurr());
if (const auto next_bb = e.GetBasicBlock(terminal.next)) {
e.EmitPatchJmp(terminal.next, next_bb->entrypoint);
} else { } else {
EmitPatchJmp(terminal.next);
e.EmitPatchJmp(terminal.next);
} }
} }
} }
void A32EmitX64::EmitTerminalImpl(IR::Term::PopRSBHint, IR::LocationDescriptor, bool is_single_step) {
if (!conf.HasOptimization(OptimizationFlag::ReturnStackBuffer) || is_single_step) {
code.ReturnFromRunCode();
return;
void EmitTerminalImpl(A32EmitX64& e, IR::Term::PopRSBHint, IR::LocationDescriptor, bool is_single_step) {
if (!e.conf.HasOptimization(OptimizationFlag::ReturnStackBuffer) || is_single_step) {
e.code.ReturnFromRunCode();
} else {
e.code.jmp(e.terminal_handler_pop_rsb_hint);
}
} }
code.jmp(terminal_handler_pop_rsb_hint);
void EmitTerminalImpl(A32EmitX64& e, IR::Term::FastDispatchHint, IR::LocationDescriptor, bool is_single_step) {
if (!e.conf.HasOptimization(OptimizationFlag::FastDispatch) || is_single_step) {
e.code.ReturnFromRunCode();
} else {
e.code.jmp(e.terminal_handler_fast_dispatch_hint);
}
} }
void A32EmitX64::EmitTerminalImpl(IR::Term::FastDispatchHint, IR::LocationDescriptor, bool is_single_step) {
if (!conf.HasOptimization(OptimizationFlag::FastDispatch) || is_single_step) {
code.ReturnFromRunCode();
return;
void EmitTerminalImpl(A32EmitX64& e, IR::Term::If terminal, IR::LocationDescriptor initial_location, bool is_single_step) {
Xbyak::Label pass = e.EmitCond(terminal.if_);
e.EmitTerminal(terminal.else_, initial_location, is_single_step);
e.code.L(pass);
e.EmitTerminal(terminal.then_, initial_location, is_single_step);
} }
code.jmp(terminal_handler_fast_dispatch_hint);
void EmitTerminalImpl(A32EmitX64& e, IR::Term::CheckBit terminal, IR::LocationDescriptor initial_location, bool is_single_step) {
Xbyak::Label fail;
e.code.cmp(e.code.byte[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, check_bit)], u8(0));
e.code.jz(fail);
e.EmitTerminal(terminal.then_, initial_location, is_single_step);
e.code.L(fail);
e.EmitTerminal(terminal.else_, initial_location, is_single_step);
} }
void A32EmitX64::EmitTerminalImpl(IR::Term::If terminal, IR::LocationDescriptor initial_location, bool is_single_step) {
Xbyak::Label pass = EmitCond(terminal.if_);
EmitTerminal(terminal.else_, initial_location, is_single_step);
code.L(pass);
EmitTerminal(terminal.then_, initial_location, is_single_step);
void EmitTerminalImpl(A32EmitX64& e, IR::Term::CheckHalt terminal, IR::LocationDescriptor initial_location, bool is_single_step) {
e.code.cmp(dword[e.code.ABI_JIT_PTR + offsetof(A32JitState, halt_reason)], 0);
e.code.jne(e.code.GetForceReturnFromRunCodeAddress());
e.EmitTerminal(terminal.else_, initial_location, is_single_step);
} }
void A32EmitX64::EmitTerminalImpl(IR::Term::CheckBit terminal, IR::LocationDescriptor initial_location, bool is_single_step) {
Xbyak::Label fail;
code.cmp(code.byte[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, check_bit)], u8(0));
code.jz(fail);
EmitTerminal(terminal.then_, initial_location, is_single_step);
code.L(fail);
EmitTerminal(terminal.else_, initial_location, is_single_step);
void EmitTerminalImpl(A32EmitX64&, IR::Term::Invalid, IR::LocationDescriptor, bool) {
UNREACHABLE();
}
} }
void A32EmitX64::EmitTerminalImpl(IR::Term::CheckHalt terminal, IR::LocationDescriptor initial_location, bool is_single_step) {
code.cmp(dword[code.ABI_JIT_PTR + offsetof(A32JitState, halt_reason)], 0);
code.jne(code.GetForceReturnFromRunCodeAddress());
EmitTerminal(terminal.else_, initial_location, is_single_step);
void A32EmitX64::EmitTerminal(IR::Terminal terminal, IR::LocationDescriptor initial_location, bool is_single_step) noexcept {
boost::apply_visitor([this, initial_location, is_single_step](auto x) {
EmitTerminalImpl(*this, x, initial_location, is_single_step);
}, terminal);
} }
void A32EmitX64::EmitPatchJg(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr) { void A32EmitX64::EmitPatchJg(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr) {

23
src/dynarmic/src/dynarmic/backend/x64/a32_emit_x64.h

@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
/* This file is part of the dynarmic project. /* This file is part of the dynarmic project.
* Copyright (c) 2016 MerryMage * Copyright (c) 2016 MerryMage
* SPDX-License-Identifier: 0BSD * SPDX-License-Identifier: 0BSD
@ -55,7 +58,7 @@ public:
void InvalidateCacheRanges(const boost::icl::interval_set<u32>& ranges); void InvalidateCacheRanges(const boost::icl::interval_set<u32>& ranges);
protected:
//protected:
void EmitCondPrelude(const A32EmitContext& ctx); void EmitCondPrelude(const A32EmitContext& ctx);
struct FastDispatchEntry { struct FastDispatchEntry {
@ -109,15 +112,7 @@ protected:
// Terminal instruction emitters // Terminal instruction emitters
void EmitSetUpperLocationDescriptor(IR::LocationDescriptor new_location, IR::LocationDescriptor old_location); void EmitSetUpperLocationDescriptor(IR::LocationDescriptor new_location, IR::LocationDescriptor old_location);
void EmitTerminalImpl(IR::Term::Interpret terminal, IR::LocationDescriptor initial_location, bool is_single_step) override;
void EmitTerminalImpl(IR::Term::ReturnToDispatch terminal, IR::LocationDescriptor initial_location, bool is_single_step) override;
void EmitTerminalImpl(IR::Term::LinkBlock terminal, IR::LocationDescriptor initial_location, bool is_single_step) override;
void EmitTerminalImpl(IR::Term::LinkBlockFast terminal, IR::LocationDescriptor initial_location, bool is_single_step) override;
void EmitTerminalImpl(IR::Term::PopRSBHint terminal, IR::LocationDescriptor initial_location, bool is_single_step) override;
void EmitTerminalImpl(IR::Term::FastDispatchHint terminal, IR::LocationDescriptor initial_location, bool is_single_step) override;
void EmitTerminalImpl(IR::Term::If terminal, IR::LocationDescriptor initial_location, bool is_single_step) override;
void EmitTerminalImpl(IR::Term::CheckBit terminal, IR::LocationDescriptor initial_location, bool is_single_step) override;
void EmitTerminalImpl(IR::Term::CheckHalt terminal, IR::LocationDescriptor initial_location, bool is_single_step) override;
void EmitTerminal(IR::Terminal terminal, IR::LocationDescriptor initial_location, bool is_single_step) noexcept override;
// Patching // Patching
void Unpatch(const IR::LocationDescriptor& target_desc) override; void Unpatch(const IR::LocationDescriptor& target_desc) override;
@ -131,10 +126,10 @@ protected:
BlockRangeInformation<u32> block_ranges; BlockRangeInformation<u32> block_ranges;
std::array<FastDispatchEntry, fast_dispatch_table_size> fast_dispatch_table; std::array<FastDispatchEntry, fast_dispatch_table_size> fast_dispatch_table;
ankerl::unordered_dense::map<u64, FastmemPatchInfo> fastmem_patch_info; ankerl::unordered_dense::map<u64, FastmemPatchInfo> fastmem_patch_info;
std::set<DoNotFastmemMarker> do_not_fastmem;
std::map<std::tuple<bool, size_t, int, int>, void (*)()> read_fallbacks;
std::map<std::tuple<bool, size_t, int, int>, void (*)()> write_fallbacks;
std::map<std::tuple<bool, size_t, int, int>, void (*)()> exclusive_write_fallbacks;
ankerl::unordered_dense::map<std::tuple<bool, size_t, int, int>, void (*)()> read_fallbacks;
ankerl::unordered_dense::map<std::tuple<bool, size_t, int, int>, void (*)()> write_fallbacks;
ankerl::unordered_dense::map<std::tuple<bool, size_t, int, int>, void (*)()> exclusive_write_fallbacks;
ankerl::unordered_dense::set<DoNotFastmemMarker> do_not_fastmem;
void (*memory_read_128)() = nullptr; // Dummy void (*memory_read_128)() = nullptr; // Dummy
void (*memory_write_128)() = nullptr; // Dummy void (*memory_write_128)() = nullptr; // Dummy
const void* terminal_handler_pop_rsb_hint; const void* terminal_handler_pop_rsb_hint;

139
src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.cpp

@ -149,7 +149,7 @@ finish_this_inst:
if (conf.enable_cycle_counting) { if (conf.enable_cycle_counting) {
EmitAddCycles(block.CycleCount()); EmitAddCycles(block.CycleCount());
} }
EmitX64::EmitTerminal(block.GetTerminal(), ctx.Location().SetSingleStepping(false), ctx.IsSingleStep());
EmitTerminal(block.GetTerminal(), ctx.Location().SetSingleStepping(false), ctx.IsSingleStep());
code.int3(); code.int3();
for (auto& deferred_emit : ctx.deferred_emits) { for (auto& deferred_emit : ctx.deferred_emits) {
@ -615,110 +615,121 @@ std::string A64EmitX64::LocationDescriptorToFriendlyName(const IR::LocationDescr
descriptor.FPCR().Value()); descriptor.FPCR().Value());
} }
void A64EmitX64::EmitTerminalImpl(IR::Term::Interpret terminal, IR::LocationDescriptor, bool) {
code.SwitchMxcsrOnExit();
Devirtualize<&A64::UserCallbacks::InterpreterFallback>(conf.callbacks).EmitCall(code, [&](RegList param) {
code.mov(param[0], A64::LocationDescriptor{terminal.next}.PC());
code.mov(qword[code.ABI_JIT_PTR + offsetof(A64JitState, pc)], param[0]);
code.mov(param[1].cvt32(), terminal.num_instructions);
namespace {
void EmitTerminalImpl(A64EmitX64& e, IR::Term::Interpret terminal, IR::LocationDescriptor, bool) {
e.code.SwitchMxcsrOnExit();
Devirtualize<&A64::UserCallbacks::InterpreterFallback>(e.conf.callbacks).EmitCall(e.code, [&](RegList param) {
e.code.mov(param[0], A64::LocationDescriptor{terminal.next}.PC());
e.code.mov(qword[e.code.ABI_JIT_PTR + offsetof(A64JitState, pc)], param[0]);
e.code.mov(param[1].cvt32(), terminal.num_instructions);
}); });
code.ReturnFromRunCode(true); // TODO: Check cycles
e.code.ReturnFromRunCode(true); // TODO: Check cycles
} }
void A64EmitX64::EmitTerminalImpl(IR::Term::ReturnToDispatch, IR::LocationDescriptor, bool) {
code.ReturnFromRunCode();
void EmitTerminalImpl(A64EmitX64& e, IR::Term::ReturnToDispatch, IR::LocationDescriptor, bool) {
e.code.ReturnFromRunCode();
} }
void A64EmitX64::EmitTerminalImpl(IR::Term::LinkBlock terminal, IR::LocationDescriptor, bool is_single_step) {
void EmitTerminalImpl(A64EmitX64& e, IR::Term::LinkBlock terminal, IR::LocationDescriptor, bool is_single_step) {
// Used for patches and linking // Used for patches and linking
if (conf.HasOptimization(OptimizationFlag::BlockLinking) && !is_single_step) {
if (conf.enable_cycle_counting) {
code.cmp(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)], 0);
patch_information[terminal.next].jg.push_back(code.getCurr());
if (const auto next_bb = GetBasicBlock(terminal.next)) {
EmitPatchJg(terminal.next, next_bb->entrypoint);
if (e.conf.HasOptimization(OptimizationFlag::BlockLinking) && !is_single_step) {
if (e.conf.enable_cycle_counting) {
e.code.cmp(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)], 0);
e.patch_information[terminal.next].jg.push_back(e.code.getCurr());
if (const auto next_bb = e.GetBasicBlock(terminal.next)) {
e.EmitPatchJg(terminal.next, next_bb->entrypoint);
} else { } else {
EmitPatchJg(terminal.next);
e.EmitPatchJg(terminal.next);
} }
} else { } else {
code.cmp(dword[code.ABI_JIT_PTR + offsetof(A64JitState, halt_reason)], 0);
patch_information[terminal.next].jz.push_back(code.getCurr());
if (const auto next_bb = GetBasicBlock(terminal.next)) {
EmitPatchJz(terminal.next, next_bb->entrypoint);
e.code.cmp(dword[e.code.ABI_JIT_PTR + offsetof(A64JitState, halt_reason)], 0);
e.patch_information[terminal.next].jz.push_back(e.code.getCurr());
if (const auto next_bb = e.GetBasicBlock(terminal.next)) {
e.EmitPatchJz(terminal.next, next_bb->entrypoint);
} else { } else {
EmitPatchJz(terminal.next);
e.EmitPatchJz(terminal.next);
} }
} }
code.mov(rax, A64::LocationDescriptor{terminal.next}.PC());
code.mov(qword[code.ABI_JIT_PTR + offsetof(A64JitState, pc)], rax);
code.ForceReturnFromRunCode();
e.code.mov(rax, A64::LocationDescriptor{terminal.next}.PC());
e.code.mov(qword[e.code.ABI_JIT_PTR + offsetof(A64JitState, pc)], rax);
e.code.ForceReturnFromRunCode();
} else { } else {
code.mov(rax, A64::LocationDescriptor{terminal.next}.PC());
code.mov(qword[code.ABI_JIT_PTR + offsetof(A64JitState, pc)], rax);
code.ReturnFromRunCode();
e.code.mov(rax, A64::LocationDescriptor{terminal.next}.PC());
e.code.mov(qword[e.code.ABI_JIT_PTR + offsetof(A64JitState, pc)], rax);
e.code.ReturnFromRunCode();
} }
} }
void A64EmitX64::EmitTerminalImpl(IR::Term::LinkBlockFast terminal, IR::LocationDescriptor, bool is_single_step) {
if (conf.HasOptimization(OptimizationFlag::BlockLinking) && !is_single_step) {
patch_information[terminal.next].jmp.push_back(code.getCurr());
if (auto next_bb = GetBasicBlock(terminal.next)) {
EmitPatchJmp(terminal.next, next_bb->entrypoint);
void EmitTerminalImpl(A64EmitX64& e, IR::Term::LinkBlockFast terminal, IR::LocationDescriptor, bool is_single_step) {
if (e.conf.HasOptimization(OptimizationFlag::BlockLinking) && !is_single_step) {
e.patch_information[terminal.next].jmp.push_back(e.code.getCurr());
if (auto next_bb = e.GetBasicBlock(terminal.next)) {
e.EmitPatchJmp(terminal.next, next_bb->entrypoint);
} else { } else {
EmitPatchJmp(terminal.next);
e.EmitPatchJmp(terminal.next);
} }
} else { } else {
code.mov(rax, A64::LocationDescriptor{terminal.next}.PC());
code.mov(qword[code.ABI_JIT_PTR + offsetof(A64JitState, pc)], rax);
code.ReturnFromRunCode();
e.code.mov(rax, A64::LocationDescriptor{terminal.next}.PC());
e.code.mov(qword[e.code.ABI_JIT_PTR + offsetof(A64JitState, pc)], rax);
e.code.ReturnFromRunCode();
} }
} }
void A64EmitX64::EmitTerminalImpl(IR::Term::PopRSBHint, IR::LocationDescriptor, bool is_single_step) {
if (conf.HasOptimization(OptimizationFlag::ReturnStackBuffer) && !is_single_step) {
code.jmp(terminal_handler_pop_rsb_hint);
void EmitTerminalImpl(A64EmitX64& e, IR::Term::PopRSBHint, IR::LocationDescriptor, bool is_single_step) {
if (e.conf.HasOptimization(OptimizationFlag::ReturnStackBuffer) && !is_single_step) {
e.code.jmp(e.terminal_handler_pop_rsb_hint);
} else { } else {
code.ReturnFromRunCode();
e.code.ReturnFromRunCode();
} }
} }
void A64EmitX64::EmitTerminalImpl(IR::Term::FastDispatchHint, IR::LocationDescriptor, bool is_single_step) {
if (!conf.HasOptimization(OptimizationFlag::FastDispatch) || is_single_step) {
code.ReturnFromRunCode();
return;
void EmitTerminalImpl(A64EmitX64& e, IR::Term::FastDispatchHint, IR::LocationDescriptor, bool is_single_step) {
if (!e.conf.HasOptimization(OptimizationFlag::FastDispatch) || is_single_step) {
e.code.ReturnFromRunCode();
} else {
e.code.jmp(e.terminal_handler_fast_dispatch_hint);
} }
code.jmp(terminal_handler_fast_dispatch_hint);
} }
void A64EmitX64::EmitTerminalImpl(IR::Term::If terminal, IR::LocationDescriptor initial_location, bool is_single_step) {
void EmitTerminalImpl(A64EmitX64& e, IR::Term::If terminal, IR::LocationDescriptor initial_location, bool is_single_step) {
switch (terminal.if_) { switch (terminal.if_) {
case IR::Cond::AL: case IR::Cond::AL:
case IR::Cond::NV: case IR::Cond::NV:
EmitTerminal(terminal.then_, initial_location, is_single_step);
e.EmitTerminal(terminal.then_, initial_location, is_single_step);
break; break;
default: default:
Xbyak::Label pass = EmitCond(terminal.if_);
EmitTerminal(terminal.else_, initial_location, is_single_step);
code.L(pass);
EmitTerminal(terminal.then_, initial_location, is_single_step);
Xbyak::Label pass = e.EmitCond(terminal.if_);
e.EmitTerminal(terminal.else_, initial_location, is_single_step);
e.code.L(pass);
e.EmitTerminal(terminal.then_, initial_location, is_single_step);
break; break;
} }
} }
void A64EmitX64::EmitTerminalImpl(IR::Term::CheckBit terminal, IR::LocationDescriptor initial_location, bool is_single_step) {
void EmitTerminalImpl(A64EmitX64& e, IR::Term::CheckBit terminal, IR::LocationDescriptor initial_location, bool is_single_step) {
Xbyak::Label fail; Xbyak::Label fail;
code.cmp(code.byte[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, check_bit)], u8(0));
code.jz(fail);
EmitTerminal(terminal.then_, initial_location, is_single_step);
code.L(fail);
EmitTerminal(terminal.else_, initial_location, is_single_step);
e.code.cmp(e.code.byte[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, check_bit)], u8(0));
e.code.jz(fail);
e.EmitTerminal(terminal.then_, initial_location, is_single_step);
e.code.L(fail);
e.EmitTerminal(terminal.else_, initial_location, is_single_step);
}
void EmitTerminalImpl(A64EmitX64& e, IR::Term::CheckHalt terminal, IR::LocationDescriptor initial_location, bool is_single_step) {
e.code.cmp(dword[e.code.ABI_JIT_PTR + offsetof(A64JitState, halt_reason)], 0);
e.code.jne(e.code.GetForceReturnFromRunCodeAddress());
e.EmitTerminal(terminal.else_, initial_location, is_single_step);
}
void EmitTerminalImpl(A64EmitX64&, IR::Term::Invalid, IR::LocationDescriptor, bool) {
UNREACHABLE();
}
} }
void A64EmitX64::EmitTerminalImpl(IR::Term::CheckHalt terminal, IR::LocationDescriptor initial_location, bool is_single_step) {
code.cmp(dword[code.ABI_JIT_PTR + offsetof(A64JitState, halt_reason)], 0);
code.jne(code.GetForceReturnFromRunCodeAddress());
EmitTerminal(terminal.else_, initial_location, is_single_step);
void A64EmitX64::EmitTerminal(IR::Terminal terminal, IR::LocationDescriptor initial_location, bool is_single_step) noexcept {
boost::apply_visitor([this, initial_location, is_single_step](auto x) {
EmitTerminalImpl(*this, x, initial_location, is_single_step);
}, terminal);
} }
void A64EmitX64::EmitPatchJg(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr) { void A64EmitX64::EmitPatchJg(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr) {

15
src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.h

@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
/* This file is part of the dynarmic project. /* This file is part of the dynarmic project.
* Copyright (c) 2016 MerryMage * Copyright (c) 2016 MerryMage
* SPDX-License-Identifier: 0BSD * SPDX-License-Identifier: 0BSD
@ -50,7 +53,7 @@ public:
void InvalidateCacheRanges(const boost::icl::interval_set<u64>& ranges); void InvalidateCacheRanges(const boost::icl::interval_set<u64>& ranges);
protected:
//protected:
struct FastDispatchEntry { struct FastDispatchEntry {
u64 location_descriptor = 0xFFFF'FFFF'FFFF'FFFFull; u64 location_descriptor = 0xFFFF'FFFF'FFFF'FFFFull;
const void* code_ptr = nullptr; const void* code_ptr = nullptr;
@ -104,15 +107,7 @@ protected:
void EmitExclusiveWriteMemoryInline(A64EmitContext& ctx, IR::Inst* inst); void EmitExclusiveWriteMemoryInline(A64EmitContext& ctx, IR::Inst* inst);
// Terminal instruction emitters // Terminal instruction emitters
void EmitTerminalImpl(IR::Term::Interpret terminal, IR::LocationDescriptor initial_location, bool is_single_step) override;
void EmitTerminalImpl(IR::Term::ReturnToDispatch terminal, IR::LocationDescriptor initial_location, bool is_single_step) override;
void EmitTerminalImpl(IR::Term::LinkBlock terminal, IR::LocationDescriptor initial_location, bool is_single_step) override;
void EmitTerminalImpl(IR::Term::LinkBlockFast terminal, IR::LocationDescriptor initial_location, bool is_single_step) override;
void EmitTerminalImpl(IR::Term::PopRSBHint terminal, IR::LocationDescriptor initial_location, bool is_single_step) override;
void EmitTerminalImpl(IR::Term::FastDispatchHint terminal, IR::LocationDescriptor initial_location, bool is_single_step) override;
void EmitTerminalImpl(IR::Term::If terminal, IR::LocationDescriptor initial_location, bool is_single_step) override;
void EmitTerminalImpl(IR::Term::CheckBit terminal, IR::LocationDescriptor initial_location, bool is_single_step) override;
void EmitTerminalImpl(IR::Term::CheckHalt terminal, IR::LocationDescriptor initial_location, bool is_single_step) override;
void EmitTerminal(IR::Terminal terminal, IR::LocationDescriptor initial_location, bool is_single_step) noexcept override;
// Patching // Patching
void Unpatch(const IR::LocationDescriptor& target_desc) override; void Unpatch(const IR::LocationDescriptor& target_desc) override;

24
src/dynarmic/src/dynarmic/backend/x64/emit_x64.cpp

@ -346,17 +346,6 @@ EmitX64::BlockDescriptor EmitX64::RegisterBlock(const IR::LocationDescriptor& de
return block_desc; return block_desc;
} }
void EmitX64::EmitTerminal(IR::Terminal terminal, IR::LocationDescriptor initial_location, bool is_single_step) {
boost::apply_visitor([this, initial_location, is_single_step](auto x) {
using T = std::decay_t<decltype(x)>;
if constexpr (!std::is_same_v<T, IR::Term::Invalid>) {
this->EmitTerminalImpl(x, initial_location, is_single_step);
} else {
ASSERT(false && "Invalid terminal");
}
}, terminal);
}
void EmitX64::Patch(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr) { void EmitX64::Patch(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr) {
const CodePtr save_code_ptr = code.getCurr(); const CodePtr save_code_ptr = code.getCurr();
const PatchInformation& patch_info = patch_information[target_desc]; const PatchInformation& patch_info = patch_information[target_desc];
@ -399,20 +388,13 @@ void EmitX64::ClearCache() {
void EmitX64::InvalidateBasicBlocks(const ankerl::unordered_dense::set<IR::LocationDescriptor>& locations) { void EmitX64::InvalidateBasicBlocks(const ankerl::unordered_dense::set<IR::LocationDescriptor>& locations) {
code.EnableWriting(); code.EnableWriting();
SCOPE_EXIT {
code.DisableWriting();
};
for (const auto& descriptor : locations) { for (const auto& descriptor : locations) {
const auto it = block_descriptors.find(descriptor);
if (it == block_descriptors.end()) {
continue;
}
if (auto const it = block_descriptors.find(descriptor); it != block_descriptors.end()) {
Unpatch(descriptor); Unpatch(descriptor);
block_descriptors.erase(it); block_descriptors.erase(it);
} }
} }
code.DisableWriting();
}
} // namespace Dynarmic::Backend::X64 } // namespace Dynarmic::Backend::X64

18
src/dynarmic/src/dynarmic/backend/x64/emit_x64.h

@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
/* This file is part of the dynarmic project. /* This file is part of the dynarmic project.
* Copyright (c) 2016 MerryMage * Copyright (c) 2016 MerryMage
* SPDX-License-Identifier: 0BSD * SPDX-License-Identifier: 0BSD
@ -92,7 +95,7 @@ public:
/// Invalidates a selection of basic blocks. /// Invalidates a selection of basic blocks.
void InvalidateBasicBlocks(const ankerl::unordered_dense::set<IR::LocationDescriptor>& locations); void InvalidateBasicBlocks(const ankerl::unordered_dense::set<IR::LocationDescriptor>& locations);
protected:
//protected:
// Microinstruction emitters // Microinstruction emitters
#define OPCODE(name, type, ...) void Emit##name(EmitContext& ctx, IR::Inst* inst); #define OPCODE(name, type, ...) void Emit##name(EmitContext& ctx, IR::Inst* inst);
#define A32OPC(...) #define A32OPC(...)
@ -111,18 +114,7 @@ protected:
void PushRSBHelper(Xbyak::Reg64 loc_desc_reg, Xbyak::Reg64 index_reg, IR::LocationDescriptor target); void PushRSBHelper(Xbyak::Reg64 loc_desc_reg, Xbyak::Reg64 index_reg, IR::LocationDescriptor target);
void EmitVerboseDebuggingOutput(RegAlloc& reg_alloc); void EmitVerboseDebuggingOutput(RegAlloc& reg_alloc);
// Terminal instruction emitters
void EmitTerminal(IR::Terminal terminal, IR::LocationDescriptor initial_location, bool is_single_step);
virtual void EmitTerminalImpl(IR::Term::Interpret terminal, IR::LocationDescriptor initial_location, bool is_single_step) = 0;
virtual void EmitTerminalImpl(IR::Term::ReturnToDispatch terminal, IR::LocationDescriptor initial_location, bool is_single_step) = 0;
virtual void EmitTerminalImpl(IR::Term::LinkBlock terminal, IR::LocationDescriptor initial_location, bool is_single_step) = 0;
virtual void EmitTerminalImpl(IR::Term::LinkBlockFast terminal, IR::LocationDescriptor initial_location, bool is_single_step) = 0;
virtual void EmitTerminalImpl(IR::Term::PopRSBHint terminal, IR::LocationDescriptor initial_location, bool is_single_step) = 0;
virtual void EmitTerminalImpl(IR::Term::FastDispatchHint terminal, IR::LocationDescriptor initial_location, bool is_single_step) = 0;
virtual void EmitTerminalImpl(IR::Term::If terminal, IR::LocationDescriptor initial_location, bool is_single_step) = 0;
virtual void EmitTerminalImpl(IR::Term::CheckBit terminal, IR::LocationDescriptor initial_location, bool is_single_step) = 0;
virtual void EmitTerminalImpl(IR::Term::CheckHalt terminal, IR::LocationDescriptor initial_location, bool is_single_step) = 0;
virtual void EmitTerminal(IR::Terminal terminal, IR::LocationDescriptor initial_location, bool is_single_step) noexcept = 0;
// Patching // Patching
struct PatchInformation { struct PatchInformation {

Loading…
Cancel
Save