@ -1,4 +1,4 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
# include <numeric>
@ -20,9 +20,13 @@ namespace Core::NCE {
Patcher : : Patcher ( Patcher & & other ) noexcept
: patch_cache ( std : : move ( other . patch_cache ) ) ,
m_patch_instructions ( std : : move ( other . m_patch_instructions ) ) ,
m_patch_instructions_pre ( std : : move ( other . m_patch_instructions_pre ) ) ,
c ( m_patch_instructions ) ,
c_pre ( m_patch_instructions_pre ) ,
m_save_context ( other . m_save_context ) ,
m_load_context ( other . m_load_context ) ,
m_save_context_pre ( other . m_save_context_pre ) ,
m_load_context_pre ( other . m_load_context_pre ) ,
mode ( other . mode ) ,
total_program_size ( other . total_program_size ) ,
m_relocate_module_index ( other . m_relocate_module_index ) ,
@ -42,20 +46,25 @@ using NativeExecutionParameters = Kernel::KThread::NativeExecutionParameters;
constexpr size_t MaxRelativeBranch = 128 _MiB ;
constexpr u32 ModuleCodeIndex = 0x24 / sizeof ( u32 ) ;
Patcher : : Patcher ( ) : c ( m_patch_instructions ) {
Patcher : : Patcher ( ) : c ( m_patch_instructions ) , c_pre ( m_patch_instructions_pre ) {
LOG_WARNING ( Core_ARM , " Patcher initialized with LRU cache {} " ,
patch_cache . isEnabled ( ) ? " enabled " : " disabled " ) ;
// The first word of the patch section is always a branch to the first instruction of the
// module.
c . dw ( 0 ) ;
c_pre . dw ( 0 ) ;
// Write save context helper function.
c . l ( m_save_context ) ;
WriteSaveContext ( ) ;
c_pre . l ( m_save_context_pre ) ;
WriteSaveContext ( c_pre ) ;
// Write load context helper function.
c . l ( m_load_context ) ;
WriteLoadContext ( ) ;
c_pre . l ( m_load_context_pre ) ;
WriteLoadContext ( c_pre ) ;
}
Patcher : : ~ Patcher ( ) = default ;
@ -64,7 +73,16 @@ bool Patcher::PatchText(const Kernel::PhysicalMemory& program_image,
const Kernel : : CodeSet : : Segment & code ) {
// If we have patched modules but cannot reach the new module, then it needs its own patcher.
const size_t image_size = program_image . size ( ) ;
if ( total_program_size + image_size > MaxRelativeBranch & & total_program_size > 0 ) {
// Check if we need split mode for large modules. A64 max takes 128MB
// tests showed that, with update, some are larger. (In this case 208MB)
bool use_split = false ;
if ( image_size > MaxRelativeBranch ) {
if ( total_program_size > 0 ) {
return false ;
}
use_split = true ;
} else if ( total_program_size + image_size > MaxRelativeBranch & & total_program_size > 0 ) {
return false ;
}
@ -74,7 +92,12 @@ bool Patcher::PatchText(const Kernel::PhysicalMemory& program_image,
// The first word of the patch section is always a branch to the first instruction of the
// module.
curr_patch - > m_branch_to_module_relocations . push_back ( { 0 , 0 } ) ;
if ( use_split ) {
// curr_patch->m_branch_to_module_relocations.push_back({0, 0});
curr_patch - > m_branch_to_module_relocations_pre . push_back ( { 0 , 0 } ) ;
} else {
curr_patch - > m_branch_to_module_relocations . push_back ( { 0 , 0 } ) ;
}
// Retrieve text segment data.
const auto text = std : : span { program_image } . subspan ( code . offset , code . size ) ;
@ -85,12 +108,18 @@ bool Patcher::PatchText(const Kernel::PhysicalMemory& program_image,
for ( u32 i = ModuleCodeIndex ; i < static_cast < u32 > ( text_words . size ( ) ) ; i + + ) {
const u32 inst = text_words [ i ] ;
const auto AddRelocations = [ & ] {
const auto AddRelocations = [ & ] ( bool & pre_buffer ) {
const uintptr_t this_offset = i * sizeof ( u32 ) ;
const uintptr_t next_offset = this_offset + sizeof ( u32 ) ;
// Relocate from here to patch.
this - > BranchToPatch ( this_offset ) ;
pre_buffer = use_split & & ( this_offset < MaxRelativeBranch ) ;
// Relocate to pre- or post-patch
if ( pre_buffer ) {
this - > BranchToPatchPre ( this_offset ) ;
} else {
this - > BranchToPatch ( this_offset ) ;
}
// Relocate from patch to next instruction.
return next_offset ;
@ -98,7 +127,13 @@ bool Patcher::PatchText(const Kernel::PhysicalMemory& program_image,
// SVC
if ( auto svc = SVC { inst } ; svc . Verify ( ) ) {
WriteSvcTrampoline ( AddRelocations ( ) , svc . GetValue ( ) ) ;
bool pre_buffer = false ;
auto ret = AddRelocations ( pre_buffer ) ;
if ( pre_buffer ) {
WriteSvcTrampoline ( ret , svc . GetValue ( ) , c_pre , m_save_context_pre , m_load_context_pre ) ;
} else {
WriteSvcTrampoline ( ret , svc . GetValue ( ) , c , m_save_context , m_load_context ) ;
}
continue ;
}
@ -109,13 +144,25 @@ bool Patcher::PatchText(const Kernel::PhysicalMemory& program_image,
const auto src_reg = mrs . GetSystemReg ( ) = = TpidrroEl0 ? oaknut : : SystemReg : : TPIDRRO_EL0
: oaknut : : SystemReg : : TPIDR_EL0 ;
const auto dest_reg = oaknut : : XReg { static_cast < int > ( mrs . GetRt ( ) ) } ;
WriteMrsHandler ( AddRelocations ( ) , dest_reg , src_reg ) ;
bool pre_buffer = false ;
auto ret = AddRelocations ( pre_buffer ) ;
if ( pre_buffer ) {
WriteMrsHandler ( ret , dest_reg , src_reg , c_pre ) ;
} else {
WriteMrsHandler ( ret , dest_reg , src_reg , c ) ;
}
continue ;
}
// MRS Xn, CNTPCT_EL0
if ( auto mrs = MRS { inst } ; mrs . Verify ( ) & & mrs . GetSystemReg ( ) = = CntpctEl0 ) {
WriteCntpctHandler ( AddRelocations ( ) , oaknut : : XReg { static_cast < int > ( mrs . GetRt ( ) ) } ) ;
bool pre_buffer = false ;
auto ret = AddRelocations ( pre_buffer ) ;
if ( pre_buffer ) {
WriteCntpctHandler ( ret , oaknut : : XReg { static_cast < int > ( mrs . GetRt ( ) ) } , c_pre ) ;
} else {
WriteCntpctHandler ( ret , oaknut : : XReg { static_cast < int > ( mrs . GetRt ( ) ) } , c ) ;
}
continue ;
}
@ -126,7 +173,13 @@ bool Patcher::PatchText(const Kernel::PhysicalMemory& program_image,
// MSR TPIDR_EL0, Xn
if ( auto msr = MSR { inst } ; msr . Verify ( ) & & msr . GetSystemReg ( ) = = TpidrEl0 ) {
WriteMsrHandler ( AddRelocations ( ) , oaknut : : XReg { static_cast < int > ( msr . GetRt ( ) ) } ) ;
bool pre_buffer = false ;
auto ret = AddRelocations ( pre_buffer ) ;
if ( pre_buffer ) {
WriteMsrHandler ( ret , oaknut : : XReg { static_cast < int > ( msr . GetRt ( ) ) } , c_pre ) ;
} else {
WriteMsrHandler ( ret , oaknut : : XReg { static_cast < int > ( msr . GetRt ( ) ) } , c ) ;
}
continue ;
}
@ -137,7 +190,11 @@ bool Patcher::PatchText(const Kernel::PhysicalMemory& program_image,
// Determine patching mode for the final relocation step
total_program_size + = image_size ;
this - > mode = image_size > MaxRelativeBranch ? PatchMode : : PreText : PatchMode : : PostData ;
if ( use_split ) {
this - > mode = PatchMode : : Split ;
} else {
this - > mode = image_size > MaxRelativeBranch ? PatchMode : : PreText : PatchMode : : PostData ;
}
return true ;
}
@ -146,7 +203,9 @@ bool Patcher::RelocateAndCopy(Common::ProcessAddress load_base,
Kernel : : PhysicalMemory & program_image ,
EntryTrampolines * out_trampolines ) {
const size_t patch_size = GetSectionSize ( ) ;
const size_t image_size = program_image . size ( ) ;
const size_t pre_patch_size = GetPreSectionSize ( ) ;
const size_t image_size = ( mode = = PatchMode : : Split ) ? program_image . size ( ) - pre_patch_size : program_image . size ( ) ;
// Retrieve text segment data.
const auto text = std : : span { program_image } . subspan ( code . offset , code . size ) ;
@ -162,6 +221,16 @@ bool Patcher::RelocateAndCopy(Common::ProcessAddress load_base,
}
} ;
const auto ApplyBranchToPatchRelocationPre = [ & ] ( u32 * target , const Relocation & rel ) {
oaknut : : CodeGenerator rc { target } ;
rc . B ( static_cast < ptrdiff_t > ( rel . patch_offset ) - static_cast < ptrdiff_t > ( pre_patch_size ) - static_cast < ptrdiff_t > ( rel . module_offset ) ) ;
} ;
const auto ApplyBranchToPatchRelocationPostSplit = [ & ] ( u32 * target , const Relocation & rel ) {
oaknut : : CodeGenerator rc { target } ;
rc . B ( static_cast < ptrdiff_t > ( image_size ) + static_cast < ptrdiff_t > ( rel . patch_offset ) - static_cast < ptrdiff_t > ( rel . module_offset ) ) ;
} ;
const auto ApplyBranchToModuleRelocation = [ & ] ( u32 * target , const Relocation & rel ) {
oaknut : : CodeGenerator rc { target } ;
if ( mode = = PatchMode : : PreText ) {
@ -171,6 +240,16 @@ bool Patcher::RelocateAndCopy(Common::ProcessAddress load_base,
}
} ;
const auto ApplyBranchToModuleRelocationPre = [ & ] ( u32 * target , const Relocation & rel ) {
oaknut : : CodeGenerator rc { target } ;
rc . B ( static_cast < ptrdiff_t > ( pre_patch_size ) + static_cast < ptrdiff_t > ( rel . module_offset ) - static_cast < ptrdiff_t > ( rel . patch_offset ) ) ;
} ;
const auto ApplyBranchToModuleRelocationPostSplit = [ & ] ( u32 * target , const Relocation & rel ) {
oaknut : : CodeGenerator rc { target } ;
rc . B ( static_cast < ptrdiff_t > ( rel . module_offset ) - static_cast < ptrdiff_t > ( image_size ) - static_cast < ptrdiff_t > ( rel . patch_offset ) ) ;
} ;
const auto RebasePatch = [ & ] ( ptrdiff_t patch_offset ) {
if ( mode = = PatchMode : : PreText ) {
return GetInteger ( load_base ) + patch_offset ;
@ -182,28 +261,87 @@ bool Patcher::RelocateAndCopy(Common::ProcessAddress load_base,
const auto RebasePc = [ & ] ( uintptr_t module_offset ) {
if ( mode = = PatchMode : : PreText ) {
return GetInteger ( load_base ) + patch_size + module_offset ;
} else {
return GetInteger ( load_base ) + module_offset ;
}
if ( mode = = PatchMode : : Split ) {
return GetInteger ( load_base ) + pre_patch_size + module_offset ;
}
return GetInteger ( load_base ) + module_offset ;
} ;
// We are now ready to relocate!
auto & patch = modules [ m_relocate_module_index + + ] ;
for ( const Relocation & rel : patch . m_branch_to_patch_relocations ) {
ApplyBranchToPatchRelocation ( text_words . data ( ) + rel . module_offset / sizeof ( u32 ) , rel ) ;
}
for ( const Relocation & rel : patch . m_branch_to_module_relocations ) {
ApplyBranchToModuleRelocation ( m_patch_instructions . data ( ) + rel . patch_offset / sizeof ( u32 ) ,
rel ) ;
}
// Rewrite PC constants and record post trampolines
for ( const Relocation & rel : patch . m_write_module_pc_relocations ) {
oaknut : : CodeGenerator rc { m_patch_instructions . data ( ) + rel . patch_offset / sizeof ( u32 ) } ;
rc . dx ( RebasePc ( rel . module_offset ) ) ;
if ( mode = = PatchMode : : Split ) {
for ( const Relocation & rel : patch . m_branch_to_pre_patch_relocations ) {
ApplyBranchToPatchRelocationPre ( text_words . data ( ) + rel . module_offset / sizeof ( u32 ) , rel ) ;
}
LOG_DEBUG ( Core_ARM , " applied Pre: {} " , patch . m_branch_to_pre_patch_relocations . size ( ) ) ;
for ( const Relocation & rel : patch . m_branch_to_patch_relocations ) {
ApplyBranchToPatchRelocationPostSplit ( text_words . data ( ) + rel . module_offset / sizeof ( u32 ) , rel ) ;
}
LOG_DEBUG ( Core_ARM , " applied Post: {} " , patch . m_branch_to_patch_relocations . size ( ) ) ;
for ( const Relocation & rel : patch . m_branch_to_module_relocations_pre ) {
ApplyBranchToModuleRelocationPre ( m_patch_instructions_pre . data ( ) + rel . patch_offset / sizeof ( u32 ) , rel ) ;
}
LOG_DEBUG ( Core_ARM , " aplied Pre-module {} " , patch . m_branch_to_module_relocations_pre . size ( ) ) ;
for ( const Relocation & rel : patch . m_branch_to_module_relocations ) {
ApplyBranchToModuleRelocationPostSplit ( m_patch_instructions . data ( ) + rel . patch_offset / sizeof ( u32 ) , rel ) ;
}
LOG_DEBUG ( Core_ARM , " applied Post-module {} " , patch . m_branch_to_module_relocations . size ( ) ) ;
// Pre
for ( const Relocation & rel : patch . m_write_module_pc_relocations_pre ) {
oaknut : : CodeGenerator rc { m_patch_instructions_pre . data ( ) + rel . patch_offset / sizeof ( u32 ) } ;
rc . dx ( RebasePc ( rel . module_offset ) ) ;
}
// Post
for ( const Relocation & rel : patch . m_write_module_pc_relocations ) {
oaknut : : CodeGenerator rc { m_patch_instructions . data ( ) + rel . patch_offset / sizeof ( u32 ) } ;
rc . dx ( RebasePc ( rel . module_offset ) ) ;
}
// Trampolines (split pre + post)
for ( const Trampoline & rel : patch . m_trampolines_pre ) {
out_trampolines - > insert ( { RebasePc ( rel . module_offset ) ,
GetInteger ( load_base ) + rel . patch_offset } ) ;
}
for ( const Trampoline & rel : patch . m_trampolines ) {
out_trampolines - > insert ( { RebasePc ( rel . module_offset ) ,
GetInteger ( load_base ) + pre_patch_size + image_size + rel . patch_offset } ) ;
}
if ( ! m_patch_instructions_pre . empty ( ) ) {
u32 insn = m_patch_instructions_pre [ 0 ] ;
if ( ( insn & 0xFC000000 ) = = 0x14000000 ) {
s32 imm26 = insn & 0x3FFFFFF ;
// Sign extend
if ( imm26 & 0x2000000 ) imm26 | = 0xFC000000 ;
}
}
} else {
for ( const Relocation & rel : patch . m_branch_to_patch_relocations ) {
ApplyBranchToPatchRelocation ( text_words . data ( ) + rel . module_offset / sizeof ( u32 ) , rel ) ;
}
for ( const Relocation & rel : patch . m_branch_to_module_relocations ) {
ApplyBranchToModuleRelocation ( m_patch_instructions . data ( ) + rel . patch_offset / sizeof ( u32 ) ,
rel ) ;
}
// Rewrite PC constants
for ( const Relocation & rel : patch . m_write_module_pc_relocations ) {
oaknut : : CodeGenerator rc { m_patch_instructions . data ( ) + rel . patch_offset / sizeof ( u32 ) } ;
rc . dx ( RebasePc ( rel . module_offset ) ) ;
}
}
for ( const Trampoline & rel : patch . m_trampolines ) {
out_trampolines - > insert ( { RebasePc ( rel . module_offset ) , RebasePatch ( rel . patch_offset ) } ) ;
if ( mode ! = PatchMode : : Split ) {
for ( const Trampoline & rel : patch . m_trampolines ) {
out_trampolines - > insert ( { RebasePc ( rel . module_offset ) , RebasePatch ( rel . patch_offset ) } ) ;
}
}
// Cortex-A57 seems to treat all exclusives as ordered, but newer processors do not.
@ -223,6 +361,15 @@ bool Patcher::RelocateAndCopy(Common::ProcessAddress load_base,
ASSERT ( image_size = = total_program_size ) ;
std : : memcpy ( program_image . data ( ) , m_patch_instructions . data ( ) ,
m_patch_instructions . size ( ) * sizeof ( u32 ) ) ;
} else if ( this - > mode = = PatchMode : : Split ) {
const size_t current_size = program_image . size ( ) ;
program_image . resize ( current_size + patch_size ) ;
// Copy pre-patch buffer to the beginning
std : : memcpy ( program_image . data ( ) , m_patch_instructions_pre . data ( ) ,
m_patch_instructions_pre . size ( ) * sizeof ( u32 ) ) ;
// Same for post-patch buffer to the end
std : : memcpy ( program_image . data ( ) + current_size , m_patch_instructions . data ( ) ,
m_patch_instructions . size ( ) * sizeof ( u32 ) ) ;
} else {
program_image . resize ( image_size + patch_size ) ;
std : : memcpy ( program_image . data ( ) + image_size , m_patch_instructions . data ( ) ,
@ -238,202 +385,225 @@ size_t Patcher::GetSectionSize() const noexcept {
return Common : : AlignUp ( m_patch_instructions . size ( ) * sizeof ( u32 ) , Core : : Memory : : YUZU_PAGESIZE ) ;
}
void Patcher : : WriteLoadContext ( ) {
size_t Patcher : : GetPreSectionSize ( ) const noexcept {
return Common : : AlignUp ( m_patch_instructions_pre . size ( ) * sizeof ( u32 ) , Core : : Memory : : YUZU_PAGESIZE ) ;
}
void Patcher : : WriteLoadContext ( oaknut : : VectorCodeGenerator & cg ) {
// This function was called, which modifies X30, so use that as a scratch register.
// SP contains the guest X30, so save our return X30 to SP + 8, since we have allocated 16 bytes
// of stack.
c . STR ( X30 , SP , 8 ) ;
c . MRS ( X30 , oaknut : : SystemReg : : TPIDR_EL0 ) ;
c . LDR ( X30 , X30 , offsetof ( NativeExecutionParameters , native_context ) ) ;
cg . STR ( X30 , SP , 8 ) ;
cg . MRS ( X30 , oaknut : : SystemReg : : TPIDR_EL0 ) ;
cg . LDR ( X30 , X30 , offsetof ( NativeExecutionParameters , native_context ) ) ;
// Load system registers.
c . LDR ( W0 , X30 , offsetof ( GuestContext , fpsr ) ) ;
c . MSR ( oaknut : : SystemReg : : FPSR , X0 ) ;
c . LDR ( W0 , X30 , offsetof ( GuestContext , fpcr ) ) ;
c . MSR ( oaknut : : SystemReg : : FPCR , X0 ) ;
c . LDR ( W0 , X30 , offsetof ( GuestContext , nzcv ) ) ;
c . MSR ( oaknut : : SystemReg : : NZCV , X0 ) ;
cg . LDR ( W0 , X30 , offsetof ( GuestContext , fpsr ) ) ;
cg . MSR ( oaknut : : SystemReg : : FPSR , X0 ) ;
cg . LDR ( W0 , X30 , offsetof ( GuestContext , fpcr ) ) ;
cg . MSR ( oaknut : : SystemReg : : FPCR , X0 ) ;
cg . LDR ( W0 , X30 , offsetof ( GuestContext , nzcv ) ) ;
cg . MSR ( oaknut : : SystemReg : : NZCV , X0 ) ;
// Load all vector registers.
static constexpr size_t VEC_OFF = offsetof ( GuestContext , vector_registers ) ;
for ( int i = 0 ; i < = 30 ; i + = 2 ) {
c . LDP ( oaknut : : QReg { i } , oaknut : : QReg { i + 1 } , X30 , VEC_OFF + 16 * i ) ;
cg . LDP ( oaknut : : QReg { i } , oaknut : : QReg { i + 1 } , X30 , VEC_OFF + 16 * i ) ;
}
// Load all general-purpose registers except X30.
for ( int i = 0 ; i < = 28 ; i + = 2 ) {
c . LDP ( oaknut : : XReg { i } , oaknut : : XReg { i + 1 } , X30 , 8 * i ) ;
cg . LDP ( oaknut : : XReg { i } , oaknut : : XReg { i + 1 } , X30 , 8 * i ) ;
}
// Reload our return X30 from the stack and return.
// The patch code will reload the guest X30 for us.
c . LDR ( X30 , SP , 8 ) ;
c . RET ( ) ;
cg . LDR ( X30 , SP , 8 ) ;
cg . RET ( ) ;
}
void Patcher : : WriteSaveContext ( ) {
void Patcher : : WriteSaveContext ( oaknut : : VectorCodeGenerator & cg ) {
// This function was called, which modifies X30, so use that as a scratch register.
// SP contains the guest X30, so save our X30 to SP + 8, since we have allocated 16 bytes of
// stack.
c . STR ( X30 , SP , 8 ) ;
c . MRS ( X30 , oaknut : : SystemReg : : TPIDR_EL0 ) ;
c . LDR ( X30 , X30 , offsetof ( NativeExecutionParameters , native_context ) ) ;
cg . STR ( X30 , SP , 8 ) ;
cg . MRS ( X30 , oaknut : : SystemReg : : TPIDR_EL0 ) ;
cg . LDR ( X30 , X30 , offsetof ( NativeExecutionParameters , native_context ) ) ;
// Store all general-purpose registers except X30.
for ( int i = 0 ; i < = 28 ; i + = 2 ) {
c . STP ( oaknut : : XReg { i } , oaknut : : XReg { i + 1 } , X30 , 8 * i ) ;
cg . STP ( oaknut : : XReg { i } , oaknut : : XReg { i + 1 } , X30 , 8 * i ) ;
}
// Store all vector registers.
static constexpr size_t VEC_OFF = offsetof ( GuestContext , vector_registers ) ;
for ( int i = 0 ; i < = 30 ; i + = 2 ) {
c . STP ( oaknut : : QReg { i } , oaknut : : QReg { i + 1 } , X30 , VEC_OFF + 16 * i ) ;
cg . STP ( oaknut : : QReg { i } , oaknut : : QReg { i + 1 } , X30 , VEC_OFF + 16 * i ) ;
}
// Store guest system registers, X30 and SP, using X0 as a scratch register.
c . STR ( X0 , SP , PRE_INDEXED , - 16 ) ;
c . LDR ( X0 , SP , 16 ) ;
c . STR ( X0 , X30 , 8 * 30 ) ;
c . ADD ( X0 , SP , 32 ) ;
c . STR ( X0 , X30 , offsetof ( GuestContext , sp ) ) ;
c . MRS ( X0 , oaknut : : SystemReg : : FPSR ) ;
c . STR ( W0 , X30 , offsetof ( GuestContext , fpsr ) ) ;
c . MRS ( X0 , oaknut : : SystemReg : : FPCR ) ;
c . STR ( W0 , X30 , offsetof ( GuestContext , fpcr ) ) ;
c . MRS ( X0 , oaknut : : SystemReg : : NZCV ) ;
c . STR ( W0 , X30 , offsetof ( GuestContext , nzcv ) ) ;
c . LDR ( X0 , SP , POST_INDEXED , 16 ) ;
cg . STR ( X0 , SP , PRE_INDEXED , - 16 ) ;
cg . LDR ( X0 , SP , 16 ) ;
cg . STR ( X0 , X30 , 8 * 30 ) ;
cg . ADD ( X0 , SP , 32 ) ;
cg . STR ( X0 , X30 , offsetof ( GuestContext , sp ) ) ;
cg . MRS ( X0 , oaknut : : SystemReg : : FPSR ) ;
cg . STR ( W0 , X30 , offsetof ( GuestContext , fpsr ) ) ;
cg . MRS ( X0 , oaknut : : SystemReg : : FPCR ) ;
cg . STR ( W0 , X30 , offsetof ( GuestContext , fpcr ) ) ;
cg . MRS ( X0 , oaknut : : SystemReg : : NZCV ) ;
cg . STR ( W0 , X30 , offsetof ( GuestContext , nzcv ) ) ;
cg . LDR ( X0 , SP , POST_INDEXED , 16 ) ;
// Reload our return X30 from the stack, and return.
c . LDR ( X30 , SP , 8 ) ;
c . RET ( ) ;
cg . LDR ( X30 , SP , 8 ) ;
cg . RET ( ) ;
}
void Patcher : : WriteSvcTrampoline ( ModuleDestLabel module_dest , u32 svc_id ) {
void Patcher : : WriteSvcTrampoline ( ModuleDestLabel module_dest , u32 svc_id , oaknut : : VectorCodeGenerator & cg , oaknut : : Label & save_ctx , oaknut : : Label & load_ctx ) {
// Determine if we're writing to the pre-patch buffer
const bool is_pre = ( & cg = = & c_pre ) ;
// We are about to start saving state, so we need to lock the context.
this - > LockContext ( ) ;
this - > LockContext ( cg ) ;
// Store guest X30 to the stack. Then, save the context and restore the stack.
// This will save all registers except PC, but we know PC at patch time.
c . STR ( X30 , SP , PRE_INDEXED , - 16 ) ;
c . BL ( m_ save_con te xt ) ;
c . LDR ( X30 , SP , POST_INDEXED , 16 ) ;
cg . STR ( X30 , SP , PRE_INDEXED , - 16 ) ;
cg . BL ( save_ctx ) ;
cg . LDR ( X30 , SP , POST_INDEXED , 16 ) ;
// Now that we've saved all registers, we can use any registers as scratch.
// Store PC + 4 to arm interface, since we know the instruction offset from the entry point.
oaknut : : Label pc_after_svc ;
c . MRS ( X1 , oaknut : : SystemReg : : TPIDR_EL0 ) ;
c . LDR ( X1 , X1 , offsetof ( NativeExecutionParameters , native_context ) ) ;
c . LDR ( X2 , pc_after_svc ) ;
c . STR ( X2 , X1 , offsetof ( GuestContext , pc ) ) ;
cg . MRS ( X1 , oaknut : : SystemReg : : TPIDR_EL0 ) ;
cg . LDR ( X1 , X1 , offsetof ( NativeExecutionParameters , native_context ) ) ;
cg . LDR ( X2 , pc_after_svc ) ;
cg . STR ( X2 , X1 , offsetof ( GuestContext , pc ) ) ;
// Store SVC number to execute when we return
c . MOV ( X2 , svc_id ) ;
c . STR ( W2 , X1 , offsetof ( GuestContext , svc ) ) ;
cg . MOV ( X2 , svc_id ) ;
cg . STR ( W2 , X1 , offsetof ( GuestContext , svc ) ) ;
// We are calling a SVC. Clear esr_el1 and return it.
static_assert ( std : : is_same_v < std : : underlying_type_t < HaltReason > , u64 > ) ;
oaknut : : Label retry ;
c . ADD ( X2 , X1 , offsetof ( GuestContext , esr_el1 ) ) ;
c . l ( retry ) ;
c . LDAXR ( X0 , X2 ) ;
c . STLXR ( W3 , XZR , X2 ) ;
c . CBNZ ( W3 , retry ) ;
cg . ADD ( X2 , X1 , offsetof ( GuestContext , esr_el1 ) ) ;
cg . l ( retry ) ;
cg . LDAXR ( X0 , X2 ) ;
cg . STLXR ( W3 , XZR , X2 ) ;
cg . CBNZ ( W3 , retry ) ;
// Add "calling SVC" flag. Since this is X0, this is now our return value.
c . ORR ( X0 , X0 , static_cast < u64 > ( HaltReason : : SupervisorCall ) ) ;
cg . ORR ( X0 , X0 , static_cast < u64 > ( HaltReason : : SupervisorCall ) ) ;
// Offset the GuestContext pointer to the HostContext member.
// STP has limited range of [-512, 504] which we can't reach otherwise
// NB: Due to this all offsets below are from the start of HostContext.
c . ADD ( X1 , X1 , offsetof ( GuestContext , host_ctx ) ) ;
cg . ADD ( X1 , X1 , offsetof ( GuestContext , host_ctx ) ) ;
// Reload host TPIDR_EL0 and SP.
static_assert ( offsetof ( HostContext , host_sp ) + 8 = = offsetof ( HostContext , host_tpidr_el0 ) ) ;
c . LDP ( X2 , X3 , X1 , offsetof ( HostContext , host_sp ) ) ;
c . MOV ( SP , X2 ) ;
c . MSR ( oaknut : : SystemReg : : TPIDR_EL0 , X3 ) ;
cg . LDP ( X2 , X3 , X1 , offsetof ( HostContext , host_sp ) ) ;
cg . MOV ( SP , X2 ) ;
cg . MSR ( oaknut : : SystemReg : : TPIDR_EL0 , X3 ) ;
// Load callee-saved host registers and return to host.
static constexpr size_t HOST_REGS_OFF = offsetof ( HostContext , host_saved_regs ) ;
static constexpr size_t HOST_VREGS_OFF = offsetof ( HostContext , host_saved_vregs ) ;
c . LDP ( X19 , X20 , X1 , HOST_REGS_OFF ) ;
c . LDP ( X21 , X22 , X1 , HOST_REGS_OFF + 2 * sizeof ( u64 ) ) ;
c . LDP ( X23 , X24 , X1 , HOST_REGS_OFF + 4 * sizeof ( u64 ) ) ;
c . LDP ( X25 , X26 , X1 , HOST_REGS_OFF + 6 * sizeof ( u64 ) ) ;
c . LDP ( X27 , X28 , X1 , HOST_REGS_OFF + 8 * sizeof ( u64 ) ) ;
c . LDP ( X29 , X30 , X1 , HOST_REGS_OFF + 10 * sizeof ( u64 ) ) ;
c . LDP ( Q8 , Q9 , X1 , HOST_VREGS_OFF ) ;
c . LDP ( Q10 , Q11 , X1 , HOST_VREGS_OFF + 2 * sizeof ( u128 ) ) ;
c . LDP ( Q12 , Q13 , X1 , HOST_VREGS_OFF + 4 * sizeof ( u128 ) ) ;
c . LDP ( Q14 , Q15 , X1 , HOST_VREGS_OFF + 6 * sizeof ( u128 ) ) ;
c . RET ( ) ;
cg . LDP ( X19 , X20 , X1 , HOST_REGS_OFF ) ;
cg . LDP ( X21 , X22 , X1 , HOST_REGS_OFF + 2 * sizeof ( u64 ) ) ;
cg . LDP ( X23 , X24 , X1 , HOST_REGS_OFF + 4 * sizeof ( u64 ) ) ;
cg . LDP ( X25 , X26 , X1 , HOST_REGS_OFF + 6 * sizeof ( u64 ) ) ;
cg . LDP ( X27 , X28 , X1 , HOST_REGS_OFF + 8 * sizeof ( u64 ) ) ;
cg . LDP ( X29 , X30 , X1 , HOST_REGS_OFF + 10 * sizeof ( u64 ) ) ;
cg . LDP ( Q8 , Q9 , X1 , HOST_VREGS_OFF ) ;
cg . LDP ( Q10 , Q11 , X1 , HOST_VREGS_OFF + 2 * sizeof ( u128 ) ) ;
cg . LDP ( Q12 , Q13 , X1 , HOST_VREGS_OFF + 4 * sizeof ( u128 ) ) ;
cg . LDP ( Q14 , Q15 , X1 , HOST_VREGS_OFF + 6 * sizeof ( u128 ) ) ;
cg . RET ( ) ;
// Write the post-SVC trampoline address, which will jump back to the guest after restoring its
// state.
curr_patch - > m_trampolines . push_back ( { c . offset ( ) , module_dest } ) ;
if ( is_pre ) {
curr_patch - > m_trampolines_pre . push_back ( { cg . offset ( ) , module_dest } ) ;
} else {
curr_patch - > m_trampolines . push_back ( { cg . offset ( ) , module_dest } ) ;
}
// Host called this location. Save the return address so we can
// unwind the stack properly when jumping back.
c . MRS ( X2 , oaknut : : SystemReg : : TPIDR_EL0 ) ;
c . LDR ( X2 , X2 , offsetof ( NativeExecutionParameters , native_context ) ) ;
c . ADD ( X0 , X2 , offsetof ( GuestContext , host_ctx ) ) ;
c . STR ( X30 , X0 , offsetof ( HostContext , host_saved_regs ) + 11 * sizeof ( u64 ) ) ;
cg . MRS ( X2 , oaknut : : SystemReg : : TPIDR_EL0 ) ;
cg . LDR ( X2 , X2 , offsetof ( NativeExecutionParameters , native_context ) ) ;
cg . ADD ( X0 , X2 , offsetof ( GuestContext , host_ctx ) ) ;
cg . STR ( X30 , X0 , offsetof ( HostContext , host_saved_regs ) + 11 * sizeof ( u64 ) ) ;
// Reload all guest registers except X30 and PC.
// The function also expects 16 bytes of stack already allocated.
c . STR ( X30 , SP , PRE_INDEXED , - 16 ) ;
c . BL ( m_ load_con te xt ) ;
c . LDR ( X30 , SP , POST_INDEXED , 16 ) ;
cg . STR ( X30 , SP , PRE_INDEXED , - 16 ) ;
cg . BL ( load_ctx ) ;
cg . LDR ( X30 , SP , POST_INDEXED , 16 ) ;
// Use X1 as a scratch register to restore X30.
c . STR ( X1 , SP , PRE_INDEXED , - 16 ) ;
c . MRS ( X1 , oaknut : : SystemReg : : TPIDR_EL0 ) ;
c . LDR ( X1 , X1 , offsetof ( NativeExecutionParameters , native_context ) ) ;
c . LDR ( X30 , X1 , offsetof ( GuestContext , cpu_registers ) + sizeof ( u64 ) * 30 ) ;
c . LDR ( X1 , SP , POST_INDEXED , 16 ) ;
cg . STR ( X1 , SP , PRE_INDEXED , - 16 ) ;
cg . MRS ( X1 , oaknut : : SystemReg : : TPIDR_EL0 ) ;
cg . LDR ( X1 , X1 , offsetof ( NativeExecutionParameters , native_context ) ) ;
cg . LDR ( X30 , X1 , offsetof ( GuestContext , cpu_registers ) + sizeof ( u64 ) * 30 ) ;
cg . LDR ( X1 , SP , POST_INDEXED , 16 ) ;
// Unlock the context.
this - > UnlockContext ( ) ;
this - > UnlockContext ( cg ) ;
// Jump back to the instruction after the emulated SVC.
this - > BranchToModule ( module_dest ) ;
if ( & cg = = & c_pre )
this - > BranchToModulePre ( module_dest ) ;
else
this - > BranchToModule ( module_dest ) ;
// Store PC after call.
c . l ( pc_after_svc ) ;
this - > WriteModulePc ( module_dest ) ;
cg . l ( pc_after_svc ) ;
if ( & cg = = & c_pre )
this - > WriteModulePcPre ( module_dest ) ;
else
this - > WriteModulePc ( module_dest ) ;
}
void Patcher : : WriteMrsHandler ( ModuleDestLabel module_dest , oaknut : : XReg dest_reg ,
oaknut : : SystemReg src_reg ) {
oaknut : : SystemReg src_reg , oaknut : : VectorCodeGenerator & cg ) {
// Retrieve emulated TLS register from GuestContext.
c . MRS ( dest_reg , oaknut : : SystemReg : : TPIDR_EL0 ) ;
cg . MRS ( dest_reg , oaknut : : SystemReg : : TPIDR_EL0 ) ;
if ( src_reg = = oaknut : : SystemReg : : TPIDRRO_EL0 ) {
c . LDR ( dest_reg , dest_reg , offsetof ( NativeExecutionParameters , tpidrro_el0 ) ) ;
cg . LDR ( dest_reg , dest_reg , offsetof ( NativeExecutionParameters , tpidrro_el0 ) ) ;
} else {
c . LDR ( dest_reg , dest_reg , offsetof ( NativeExecutionParameters , tpidr_el0 ) ) ;
cg . LDR ( dest_reg , dest_reg , offsetof ( NativeExecutionParameters , tpidr_el0 ) ) ;
}
// Jump back to the instruction after the emulated MRS.
this - > BranchToModule ( module_dest ) ;
if ( & cg = = & c_pre )
this - > BranchToModulePre ( module_dest ) ;
else
this - > BranchToModule ( module_dest ) ;
}
void Patcher : : WriteMsrHandler ( ModuleDestLabel module_dest , oaknut : : XReg src_reg ) {
void Patcher : : WriteMsrHandler ( ModuleDestLabel module_dest , oaknut : : XReg src_reg , oaknut : : VectorCodeGenerator & cg ) {
const auto scratch_reg = src_reg . index ( ) = = 0 ? X1 : X0 ;
c . STR ( scratch_reg , SP , PRE_INDEXED , - 16 ) ;
cg . STR ( scratch_reg , SP , PRE_INDEXED , - 16 ) ;
// Save guest value to NativeExecutionParameters::tpidr_el0.
c . MRS ( scratch_reg , oaknut : : SystemReg : : TPIDR_EL0 ) ;
c . STR ( src_reg , scratch_reg , offsetof ( NativeExecutionParameters , tpidr_el0 ) ) ;
cg . MRS ( scratch_reg , oaknut : : SystemReg : : TPIDR_EL0 ) ;
cg . STR ( src_reg , scratch_reg , offsetof ( NativeExecutionParameters , tpidr_el0 ) ) ;
// Restore scratch register.
c . LDR ( scratch_reg , SP , POST_INDEXED , 16 ) ;
cg . LDR ( scratch_reg , SP , POST_INDEXED , 16 ) ;
// Jump back to the instruction after the emulated MSR.
this - > BranchToModule ( module_dest ) ;
if ( & cg = = & c_pre )
this - > BranchToModulePre ( module_dest ) ;
else
this - > BranchToModule ( module_dest ) ;
}
void Patcher : : WriteCntpctHandler ( ModuleDestLabel module_dest , oaknut : : XReg dest_reg ) {
void Patcher : : WriteCntpctHandler ( ModuleDestLabel module_dest , oaknut : : XReg dest_reg , oaknut : : VectorCodeGenerator & cg ) {
static Common : : Arm64 : : NativeClock clock { } ;
const auto factor = clock . GetGuestCNTFRQFactor ( ) ;
const auto raw_factor = std : : bit_cast < std : : array < u64 , 2 > > ( factor ) ;
@ -446,80 +616,83 @@ void Patcher::WriteCntpctHandler(ModuleDestLabel module_dest, oaknut::XReg dest_
oaknut : : Label factorhi ;
// Save scratches.
c . STP ( scratch0 , scratch1 , SP , PRE_INDEXED , - 16 ) ;
cg . STP ( scratch0 , scratch1 , SP , PRE_INDEXED , - 16 ) ;
// Load counter value.
c . MRS ( dest_reg , oaknut : : SystemReg : : CNTVCT_EL0 ) ;
cg . MRS ( dest_reg , oaknut : : SystemReg : : CNTVCT_EL0 ) ;
// Load scaling factor.
c . LDR ( scratch0 , factorlo ) ;
c . LDR ( scratch1 , factorhi ) ;
cg . LDR ( scratch0 , factorlo ) ;
cg . LDR ( scratch1 , factorhi ) ;
// Multiply low bits and get result.
c . UMULH ( scratch0 , dest_reg , scratch0 ) ;
cg . UMULH ( scratch0 , dest_reg , scratch0 ) ;
// Multiply high bits and add low bit result.
c . MADD ( dest_reg , dest_reg , scratch1 , scratch0 ) ;
cg . MADD ( dest_reg , dest_reg , scratch1 , scratch0 ) ;
// Reload scratches.
c . LDP ( scratch0 , scratch1 , SP , POST_INDEXED , 16 ) ;
cg . LDP ( scratch0 , scratch1 , SP , POST_INDEXED , 16 ) ;
// Jump back to the instruction after the emulated MRS.
this - > BranchToModule ( module_dest ) ;
if ( & cg = = & c_pre )
this - > BranchToModulePre ( module_dest ) ;
else
this - > BranchToModule ( module_dest ) ;
// Scaling factor constant values.
c . l ( factorlo ) ;
c . dx ( raw_factor [ 0 ] ) ;
c . l ( factorhi ) ;
c . dx ( raw_factor [ 1 ] ) ;
cg . l ( factorlo ) ;
cg . dx ( raw_factor [ 0 ] ) ;
cg . l ( factorhi ) ;
cg . dx ( raw_factor [ 1 ] ) ;
}
void Patcher : : LockContext ( ) {
void Patcher : : LockContext ( oaknut : : VectorCodeGenerator & cg ) {
oaknut : : Label retry ;
// Save scratches.
c . STP ( X0 , X1 , SP , PRE_INDEXED , - 16 ) ;
cg . STP ( X0 , X1 , SP , PRE_INDEXED , - 16 ) ;
// Reload lock pointer.
c . l ( retry ) ;
c . CLREX ( ) ;
c . MRS ( X0 , oaknut : : SystemReg : : TPIDR_EL0 ) ;
c . ADD ( X0 , X0 , offsetof ( NativeExecutionParameters , lock ) ) ;
cg . l ( retry ) ;
cg . CLREX ( ) ;
cg . MRS ( X0 , oaknut : : SystemReg : : TPIDR_EL0 ) ;
cg . ADD ( X0 , X0 , offsetof ( NativeExecutionParameters , lock ) ) ;
static_assert ( SpinLockLocked = = 0 ) ;
// Load-linked with acquire ordering.
c . LDAXR ( W1 , X0 ) ;
cg . LDAXR ( W1 , X0 ) ;
// If the value was SpinLockLocked, clear monitor and retry.
c . CBZ ( W1 , retry ) ;
cg . CBZ ( W1 , retry ) ;
// Store-conditional SpinLockLocked with relaxed ordering.
c . STXR ( W1 , WZR , X0 ) ;
cg . STXR ( W1 , WZR , X0 ) ;
// If we failed to store, retry.
c . CBNZ ( W1 , retry ) ;
cg . CBNZ ( W1 , retry ) ;
// We succeeded! Reload scratches.
c . LDP ( X0 , X1 , SP , POST_INDEXED , 16 ) ;
cg . LDP ( X0 , X1 , SP , POST_INDEXED , 16 ) ;
}
void Patcher : : UnlockContext ( ) {
void Patcher : : UnlockContext ( oaknut : : VectorCodeGenerator & cg ) {
// Save scratches.
c . STP ( X0 , X1 , SP , PRE_INDEXED , - 16 ) ;
cg . STP ( X0 , X1 , SP , PRE_INDEXED , - 16 ) ;
// Load lock pointer.
c . MRS ( X0 , oaknut : : SystemReg : : TPIDR_EL0 ) ;
c . ADD ( X0 , X0 , offsetof ( NativeExecutionParameters , lock ) ) ;
cg . MRS ( X0 , oaknut : : SystemReg : : TPIDR_EL0 ) ;
cg . ADD ( X0 , X0 , offsetof ( NativeExecutionParameters , lock ) ) ;
// Load SpinLockUnlocked.
c . MOV ( W1 , SpinLockUnlocked ) ;
cg . MOV ( W1 , SpinLockUnlocked ) ;
// Store value with release ordering.
c . STLR ( W1 , X0 ) ;
cg . STLR ( W1 , X0 ) ;
// Load scratches.
c . LDP ( X0 , X1 , SP , POST_INDEXED , 16 ) ;
cg . LDP ( X0 , X1 , SP , POST_INDEXED , 16 ) ;
}
} // namespace Core::NCE