diff --git a/src/core/arm/dynarmic/arm_dynarmic_32.cpp b/src/core/arm/dynarmic/arm_dynarmic_32.cpp index 163772d8d5..81942a3dc8 100644 --- a/src/core/arm/dynarmic/arm_dynarmic_32.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic_32.cpp @@ -210,9 +210,12 @@ std::shared_ptr ArmDynarmic32::MakeJit(Common::PageTable* pa config.wall_clock_cntpct = m_uses_wall_clock; config.enable_cycle_counting = !m_uses_wall_clock; - // Code cache size - max in ARM is 128MiB, max in x86_64 is 2GiB - // Solaris doesn't support kPageSize >= 512MiB + // Code cache size +#ifdef ARCHITECTURE_arm64 config.code_cache_size = std::uint32_t(128_MiB); +#else + config.code_cache_size = std::uint32_t(512_MiB); +#endif // Allow memory fault handling to work if (m_system.DebuggerEnabled()) { diff --git a/src/core/arm/dynarmic/arm_dynarmic_64.cpp b/src/core/arm/dynarmic/arm_dynarmic_64.cpp index 1d74215971..b16e3eb20f 100644 --- a/src/core/arm/dynarmic/arm_dynarmic_64.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic_64.cpp @@ -269,9 +269,12 @@ std::shared_ptr ArmDynarmic64::MakeJit(Common::PageTable* pa config.wall_clock_cntpct = m_uses_wall_clock; config.enable_cycle_counting = !m_uses_wall_clock; - // Code cache size - max in ARM is 128MiB, max in x86_64 is 2GiB - // Solaris doesn't support kPageSize >= 512MiB + // Code cache size +#ifdef ARCHITECTURE_arm64 config.code_cache_size = std::uint32_t(128_MiB); +#else + config.code_cache_size = std::uint32_t(512_MiB); +#endif // Allow memory fault handling to work if (m_system.DebuggerEnabled()) { diff --git a/src/dynarmic/src/dynarmic/CMakeLists.txt b/src/dynarmic/src/dynarmic/CMakeLists.txt index 5d52637ec3..db319b66e6 100644 --- a/src/dynarmic/src/dynarmic/CMakeLists.txt +++ b/src/dynarmic/src/dynarmic/CMakeLists.txt @@ -58,11 +58,14 @@ add_library(dynarmic STATIC common/lut_from_list.h common/math_util.cpp common/math_util.h + common/memory_pool.cpp + common/memory_pool.h common/safe_ops.h common/spin_lock.h common/string_util.h common/u128.cpp common/u128.h + common/variant_util.h frontend/A32/a32_types.cpp frontend/A32/a32_types.h frontend/A64/a64_types.cpp @@ -77,6 +80,7 @@ add_library(dynarmic STATIC ir/basic_block.cpp ir/basic_block.h ir/cond.h + ir/ir_emitter.cpp ir/ir_emitter.h ir/location_descriptor.cpp ir/location_descriptor.h diff --git a/src/dynarmic/src/dynarmic/backend/exception_handler.h b/src/dynarmic/src/dynarmic/backend/exception_handler.h index cd274b111f..173949628c 100644 --- a/src/dynarmic/src/dynarmic/backend/exception_handler.h +++ b/src/dynarmic/src/dynarmic/backend/exception_handler.h @@ -15,15 +15,15 @@ #include #include "dynarmic/common/common_types.h" -#if defined(ARCHITECTURE_x86_64) +#if defined(MCL_ARCHITECTURE_X86_64) namespace Dynarmic::Backend::X64 { class BlockOfCode; } // namespace Dynarmic::Backend::X64 -#elif defined(ARCHITECTURE_arm64) +#elif defined(MCL_ARCHITECTURE_ARM64) namespace oaknut { class CodeBlock; } // namespace oaknut -#elif defined(ARCHITECTURE_riscv64) +#elif defined(MCL_ARCHITECTURE_RISCV) namespace Dynarmic::Backend::RV64 { class CodeBlock; } // namespace Dynarmic::Backend::RV64 @@ -33,16 +33,16 @@ class CodeBlock; namespace Dynarmic::Backend { -#if defined(ARCHITECTURE_x86_64) +#if defined(MCL_ARCHITECTURE_X86_64) struct FakeCall { u64 call_rip; u64 ret_rip; }; -#elif defined(ARCHITECTURE_arm64) +#elif defined(MCL_ARCHITECTURE_ARM64) struct FakeCall { u64 call_pc; }; -#elif defined(ARCHITECTURE_riscv64) +#elif defined(MCL_ARCHITECTURE_RISCV) struct FakeCall { }; #else @@ -54,11 +54,11 @@ public: ExceptionHandler(); ~ExceptionHandler(); -#if defined(ARCHITECTURE_x86_64) +#if defined(MCL_ARCHITECTURE_X86_64) void Register(X64::BlockOfCode& code); -#elif defined(ARCHITECTURE_arm64) +#elif defined(MCL_ARCHITECTURE_ARM64) void Register(oaknut::CodeBlock& mem, std::size_t mem_size); -#elif defined(ARCHITECTURE_riscv64) +#elif defined(MCL_ARCHITECTURE_RISCV) void Register(RV64::CodeBlock& mem, std::size_t mem_size); #else # error "Invalid architecture" diff --git a/src/dynarmic/src/dynarmic/backend/riscv64/a32_address_space.cpp b/src/dynarmic/src/dynarmic/backend/riscv64/a32_address_space.cpp index 8cde0049d8..cac27b77cd 100644 --- a/src/dynarmic/src/dynarmic/backend/riscv64/a32_address_space.cpp +++ b/src/dynarmic/src/dynarmic/backend/riscv64/a32_address_space.cpp @@ -28,7 +28,19 @@ A32AddressSpace::A32AddressSpace(const A32::UserConfig& conf) IR::Block A32AddressSpace::GenerateIR(IR::LocationDescriptor descriptor) const { IR::Block ir_block = A32::Translate(A32::LocationDescriptor{descriptor}, conf.callbacks, {conf.arch_version, conf.define_unpredictable_behaviour, conf.hook_hint_instructions}); - Optimization::Optimize(ir_block, conf, {}); + + Optimization::PolyfillPass(ir_block, {}); + if (conf.HasOptimization(OptimizationFlag::GetSetElimination)) { + Optimization::A32GetSetElimination(ir_block, {.convert_nzc_to_nz = true}); + Optimization::DeadCodeElimination(ir_block); + } + if (conf.HasOptimization(OptimizationFlag::ConstProp)) { + Optimization::A32ConstantMemoryReads(ir_block, conf.callbacks); + Optimization::ConstantPropagation(ir_block); + Optimization::DeadCodeElimination(ir_block); + } + Optimization::VerificationPass(ir_block); + return ir_block; } diff --git a/src/dynarmic/src/dynarmic/backend/x64/a32_emit_x64.cpp b/src/dynarmic/src/dynarmic/backend/x64/a32_emit_x64.cpp index 1191e443d1..b3faa8830e 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/a32_emit_x64.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/a32_emit_x64.cpp @@ -28,6 +28,7 @@ #include "dynarmic/backend/x64/nzcv_util.h" #include "dynarmic/backend/x64/perf_map.h" #include "dynarmic/backend/x64/stack_layout.h" +#include "dynarmic/common/variant_util.h" #include "dynarmic/frontend/A32/a32_location_descriptor.h" #include "dynarmic/frontend/A32/a32_types.h" #include "dynarmic/interface/A32/coprocessor.h" diff --git a/src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.cpp b/src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.cpp index 79ee56b3dd..bc5218c564 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.cpp @@ -122,9 +122,9 @@ A64EmitX64::BlockDescriptor A64EmitX64::Emit(IR::Block& block) noexcept { auto const opcode = inst.GetOpcode(); // Call the relevant Emit* member function. switch (opcode) { -#define OPCODE(name, type, ...) case IR::Opcode::name: goto opcode_branch; +#define OPCODE(name, type, ...) [[likely]] case IR::Opcode::name: goto opcode_branch; #define A32OPC(name, type, ...) -#define A64OPC(name, type, ...) case IR::Opcode::A64##name: goto a64_branch; +#define A64OPC(name, type, ...) [[likely]] case IR::Opcode::A64##name: goto a64_branch; #include "dynarmic/ir/opcodes.inc" #undef OPCODE #undef A32OPC @@ -762,7 +762,7 @@ void A64EmitX64::EmitPatchMovRcx(CodePtr target_code_ptr) { target_code_ptr = code.GetReturnFromRunCodeAddress(); } const CodePtr patch_location = code.getCurr(); - code.mov(code.rcx, u64(target_code_ptr)); + code.mov(code.rcx, reinterpret_cast(target_code_ptr)); code.EnsurePatchLocationSize(patch_location, 10); } diff --git a/src/dynarmic/src/dynarmic/backend/x64/a64_interface.cpp b/src/dynarmic/src/dynarmic/backend/x64/a64_interface.cpp index b895e42251..e89674c9cf 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/a64_interface.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/a64_interface.cpp @@ -80,16 +80,16 @@ public: }; // TODO: Check code alignment - const CodePtr aligned_code_ptr = CodePtr((uintptr_t(GetCurrentBlock()) + 15) & ~uintptr_t(15)); - const CodePtr current_code_ptr = [this, aligned_code_ptr] { + + const CodePtr current_code_ptr = [this] { // RSB optimization const u32 new_rsb_ptr = (jit_state.rsb_ptr - 1) & A64JitState::RSBPtrMask; if (jit_state.GetUniqueHash() == jit_state.rsb_location_descriptors[new_rsb_ptr]) { jit_state.rsb_ptr = new_rsb_ptr; - return CodePtr(jit_state.rsb_codeptrs[new_rsb_ptr]); + return reinterpret_cast(jit_state.rsb_codeptrs[new_rsb_ptr]); } - return aligned_code_ptr; - //return GetCurrentBlock(); + + return GetCurrentBlock(); }(); const HaltReason hr = block_of_code.RunCode(&jit_state, current_code_ptr); diff --git a/src/dynarmic/src/dynarmic/backend/x64/abi.cpp b/src/dynarmic/src/dynarmic/backend/x64/abi.cpp index 299bf1d1d6..a9bbab3d10 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/abi.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/abi.cpp @@ -10,6 +10,7 @@ #include +#include #include "dynarmic/common/common_types.h" #include @@ -75,8 +76,7 @@ void ABI_PopRegistersAndAdjustStack(BlockOfCode& code, const size_t frame_size, const FrameInfo frame_info = CalculateFrameInfo(num_gprs, num_xmms, frame_size); size_t xmm_offset = frame_info.xmm_offset + (num_xmms * XMM_SIZE); - for (auto it = regs.rbegin(); it != regs.rend(); ++it) { - auto const xmm = *it; + for (auto const xmm : mcl::iterator::reverse(regs)) { if (HostLocIsXMM(xmm)) { xmm_offset -= XMM_SIZE; if (code.HasHostFeature(HostFeature::AVX)) { @@ -88,11 +88,9 @@ void ABI_PopRegistersAndAdjustStack(BlockOfCode& code, const size_t frame_size, } if (frame_info.stack_subtraction != 0) code.add(rsp, u32(frame_info.stack_subtraction)); - for (auto it = regs.rbegin(); it != regs.rend(); ++it) { - auto const gpr = *it; + for (auto const gpr : mcl::iterator::reverse(regs)) if (HostLocIsGPR(gpr)) code.pop(HostLocToReg64(gpr)); - } } void ABI_PushCalleeSaveRegistersAndAdjustStack(BlockOfCode& code, const std::size_t frame_size) { diff --git a/src/dynarmic/src/dynarmic/backend/x64/block_of_code.cpp b/src/dynarmic/src/dynarmic/backend/x64/block_of_code.cpp index 8dfe84b37c..333becb5d4 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/block_of_code.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/block_of_code.cpp @@ -364,7 +364,8 @@ void BlockOfCode::GenRunCode(std::function rcp) { cmp(dword[ABI_JIT_PTR + jsi.offsetof_halt_reason], 0); jne(return_to_caller_mxcsr_already_exited, T_NEAR); - lock(); or_(dword[ABI_JIT_PTR + jsi.offsetof_halt_reason], static_cast(HaltReason::Step)); + lock(); + or_(dword[ABI_JIT_PTR + jsi.offsetof_halt_reason], static_cast(HaltReason::Step)); SwitchMxcsrOnEntry(); jmp(ABI_PARAM2); @@ -414,6 +415,7 @@ void BlockOfCode::GenRunCode(std::function rcp) { } xor_(eax, eax); + lock(); xchg(dword[ABI_JIT_PTR + jsi.offsetof_halt_reason], eax); ABI_PopCalleeSaveRegistersAndAdjustStack(*this, sizeof(StackLayout)); diff --git a/src/dynarmic/src/dynarmic/backend/x64/emit_x64.cpp b/src/dynarmic/src/dynarmic/backend/x64/emit_x64.cpp index 0160e8ab78..8a27c6586f 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/emit_x64.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/emit_x64.cpp @@ -11,7 +11,6 @@ #include #include "dynarmic/common/assert.h" -#include #include #include #include "dynarmic/common/common_types.h" @@ -22,6 +21,7 @@ #include "dynarmic/backend/x64/perf_map.h" #include "dynarmic/backend/x64/stack_layout.h" #include "dynarmic/backend/x64/verbose_debugging_output.h" +#include "dynarmic/common/variant_util.h" #include "dynarmic/ir/basic_block.h" #include "dynarmic/ir/microinstruction.h" #include "dynarmic/ir/opcodes.h" @@ -347,14 +347,14 @@ EmitX64::BlockDescriptor EmitX64::RegisterBlock(const IR::LocationDescriptor& de } void EmitX64::EmitTerminal(IR::Terminal terminal, IR::LocationDescriptor initial_location, bool is_single_step) { - boost::apply_visitor([this, initial_location, is_single_step](auto x) { + Common::VisitVariant(terminal, [this, initial_location, is_single_step](auto x) { using T = std::decay_t; if constexpr (!std::is_same_v) { this->EmitTerminalImpl(x, initial_location, is_single_step); } else { ASSERT(false && "Invalid terminal"); } - }, terminal); + }); } void EmitX64::Patch(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr) { diff --git a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_floating_point.cpp b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_floating_point.cpp index 67d0e06808..7ec478dadd 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_floating_point.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_floating_point.cpp @@ -78,10 +78,13 @@ void ForceDenormalsToZero(BlockOfCode& code, std::initializer_list t FpFixup::Norm_Src, FpFixup::Norm_Src, FpFixup::Norm_Src); - const Xbyak::Xmm tmp = xmm0; + + const Xbyak::Xmm tmp = xmm16; FCODE(vmovap)(tmp, code.BConst(xword, denormal_to_zero)); - for (const Xbyak::Xmm& xmm : to_daz) + + for (const Xbyak::Xmm& xmm : to_daz) { FCODE(vfixupimms)(xmm, xmm, tmp, u8(0)); + } return; } diff --git a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.cpp.inc b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.cpp.inc index 71437dfe8f..8f0ac986d8 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.cpp.inc +++ b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.cpp.inc @@ -270,31 +270,34 @@ void AxxEmitX64::EmitExclusiveWriteMemory(AxxEmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); const bool ordered = IsOrdered(args[3].GetImmediateAccType()); - if constexpr (bitsize == 128) { + if constexpr (bitsize != 128) { + ctx.reg_alloc.HostCall(inst, {}, args[1], args[2]); + } else { ctx.reg_alloc.Use(args[1], ABI_PARAM2); ctx.reg_alloc.Use(args[2], HostLoc::XMM1); ctx.reg_alloc.EndOfAllocScope(); ctx.reg_alloc.HostCall(inst); - } else { - ctx.reg_alloc.HostCall(inst, {}, args[1], args[2]); } - const Xbyak::Reg64 tmp = ctx.reg_alloc.ScratchGpr(); Xbyak::Label end; + code.mov(code.ABI_RETURN, u32(1)); - code.movzx(tmp.cvt32(), code.byte[code.ABI_JIT_PTR + offsetof(AxxJitState, exclusive_state)]); - code.test(tmp.cvt8(), tmp.cvt8()); + code.cmp(code.byte[code.ABI_JIT_PTR + offsetof(AxxJitState, exclusive_state)], u8(0)); code.je(end); - code.xor_(tmp.cvt32(), tmp.cvt32()); - code.xchg(tmp.cvt8(), code.byte[code.ABI_JIT_PTR + offsetof(AxxJitState, exclusive_state)]); - code.mov(code.ABI_PARAM1, u64(&conf)); + code.mov(code.byte[code.ABI_JIT_PTR + offsetof(AxxJitState, exclusive_state)], u8(0)); + code.mov(code.ABI_PARAM1, reinterpret_cast(&conf)); if constexpr (bitsize != 128) { using T = mcl::unsigned_integer_of_size; - code.CallLambda([](AxxUserConfig& conf, Axx::VAddr vaddr, T value) -> u32 { - return conf.global_monitor->DoExclusiveOperation(conf.processor_id, vaddr, [&](T expected) -> bool { - return (conf.callbacks->*callback)(vaddr, value, expected); - }) ? 0 : 1; - }); + + code.CallLambda( + [](AxxUserConfig& conf, Axx::VAddr vaddr, T value) -> u32 { + return conf.global_monitor->DoExclusiveOperation(conf.processor_id, vaddr, + [&](T expected) -> bool { + return (conf.callbacks->*callback)(vaddr, value, expected); + }) + ? 0 + : 1; + }); if (ordered) { code.mfence(); } @@ -302,11 +305,15 @@ void AxxEmitX64::EmitExclusiveWriteMemory(AxxEmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.AllocStackSpace(16 + ABI_SHADOW_SPACE); code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE]); code.movaps(xword[code.ABI_PARAM3], xmm1); - code.CallLambda([](AxxUserConfig& conf, Axx::VAddr vaddr, Vector& value) -> u32 { - return conf.global_monitor->DoExclusiveOperation(conf.processor_id, vaddr, [&](Vector expected) -> bool { - return (conf.callbacks->*callback)(vaddr, value, expected); - }) ? 0 : 1; - }); + code.CallLambda( + [](AxxUserConfig& conf, Axx::VAddr vaddr, Vector& value) -> u32 { + return conf.global_monitor->DoExclusiveOperation(conf.processor_id, vaddr, + [&](Vector expected) -> bool { + return (conf.callbacks->*callback)(vaddr, value, expected); + }) + ? 0 + : 1; + }); if (ordered) { code.mfence(); } @@ -427,11 +434,10 @@ void AxxEmitX64::EmitExclusiveWriteMemoryInline(AxxEmitContext& ctx, IR::Inst* i SharedLabel end = GenSharedLabel(); + code.mov(tmp, mcl::bit_cast(GetExclusiveMonitorAddressPointer(conf.global_monitor, conf.processor_id))); code.mov(status, u32(1)); - code.movzx(tmp.cvt32(), code.byte[code.ABI_JIT_PTR + offsetof(AxxJitState, exclusive_state)]); - code.test(tmp.cvt8(), tmp.cvt8()); + code.cmp(code.byte[code.ABI_JIT_PTR + offsetof(AxxJitState, exclusive_state)], u8(0)); code.je(*end, code.T_NEAR); - code.mov(tmp, std::bit_cast(GetExclusiveMonitorAddressPointer(conf.global_monitor, conf.processor_id))); code.cmp(qword[tmp], vaddr); code.jne(*end, code.T_NEAR); @@ -465,29 +471,30 @@ void AxxEmitX64::EmitExclusiveWriteMemoryInline(AxxEmitContext& ctx, IR::Inst* i const auto location = code.getCurr(); - switch (bitsize) { - case 8: - code.lock(); - code.cmpxchg(code.byte[dest_ptr], value.cvt8()); - break; - case 16: - code.lock(); - code.cmpxchg(word[dest_ptr], value.cvt16()); - break; - case 32: - code.lock(); - code.cmpxchg(dword[dest_ptr], value.cvt32()); - break; - case 64: - code.lock(); - code.cmpxchg(qword[dest_ptr], value.cvt64()); - break; - case 128: + if constexpr (bitsize == 128) { code.lock(); code.cmpxchg16b(ptr[dest_ptr]); - break; - default: - UNREACHABLE(); + } else { + switch (bitsize) { + case 8: + code.lock(); + code.cmpxchg(code.byte[dest_ptr], value.cvt8()); + break; + case 16: + code.lock(); + code.cmpxchg(word[dest_ptr], value.cvt16()); + break; + case 32: + code.lock(); + code.cmpxchg(dword[dest_ptr], value.cvt32()); + break; + case 64: + code.lock(); + code.cmpxchg(qword[dest_ptr], value.cvt64()); + break; + default: + UNREACHABLE(); + } } code.setnz(status.cvt8()); diff --git a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_vector.cpp b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_vector.cpp index ae7594aed5..4fd458958d 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_vector.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_vector.cpp @@ -609,8 +609,8 @@ void EmitX64::EmitVectorArithmeticVShift16(EmitContext& ctx, IR::Inst* inst) { const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); const Xbyak::Xmm left_shift = ctx.reg_alloc.UseScratchXmm(args[1]); - const Xbyak::Xmm right_shift = ctx.reg_alloc.ScratchXmm(); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm right_shift = xmm16; + const Xbyak::Xmm tmp = xmm17; code.vmovdqa32(tmp, code.Const(xword, 0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF)); code.vpxord(right_shift, right_shift, right_shift); @@ -674,8 +674,8 @@ void EmitX64::EmitVectorArithmeticVShift64(EmitContext& ctx, IR::Inst* inst) { const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); const Xbyak::Xmm left_shift = ctx.reg_alloc.UseScratchXmm(args[1]); - const Xbyak::Xmm right_shift = ctx.reg_alloc.ScratchXmm(); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm right_shift = xmm16; + const Xbyak::Xmm tmp = xmm17; code.vmovdqa32(tmp, code.Const(xword, 0x00000000000000FF, 0x00000000000000FF)); code.vpxorq(right_shift, right_shift, right_shift); @@ -1955,8 +1955,8 @@ void EmitX64::EmitVectorLogicalVShift16(EmitContext& ctx, IR::Inst* inst) { const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); const Xbyak::Xmm left_shift = ctx.reg_alloc.UseScratchXmm(args[1]); - const Xbyak::Xmm right_shift = ctx.reg_alloc.ScratchXmm(); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm right_shift = xmm16; + const Xbyak::Xmm tmp = xmm17; code.vmovdqa32(tmp, code.Const(xword, 0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF)); code.vpxord(right_shift, right_shift, right_shift); @@ -2737,7 +2737,7 @@ void EmitX64::EmitVectorPairedAddSignedWiden32(EmitContext& ctx, IR::Inst* inst) const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); if (code.HasHostFeature(HostFeature::AVX512_Ortho)) { - const Xbyak::Xmm c = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm c = xmm16; code.vpsraq(c, a, 32); code.vpsllq(a, a, 32); code.vpsraq(a, a, 32); @@ -5460,7 +5460,7 @@ void EmitX64::EmitVectorTableLookup128(EmitContext& ctx, IR::Inst* inst) { if (code.HasHostFeature(HostFeature::AVX512_Ortho | HostFeature::AVX512BW)) { const Xbyak::Xmm indicies = ctx.reg_alloc.UseXmm(args[2]); const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm masked = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm masked = xmm16; code.vpandd(masked, indicies, code.Const(xword_b, 0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0)); diff --git a/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp b/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp index 4012f5cb4d..bbeb59a375 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp @@ -9,7 +9,6 @@ #include "dynarmic/backend/x64/reg_alloc.h" #include -#include #include #include @@ -104,7 +103,7 @@ void HostLocInfo::AddValue(IR::Inst* inst) noexcept { values.push_back(inst); ASSERT(size_t(total_uses) + inst->UseCount() < (std::numeric_limits::max)()); total_uses += inst->UseCount(); - max_bit_width = std::max(max_bit_width, std::countr_zero(GetBitWidth(inst->GetType()))); + max_bit_width = std::max(max_bit_width, GetBitWidth(inst->GetType())); } void HostLocInfo::EmitVerboseDebuggingOutput(BlockOfCode* code, size_t host_loc_index) const noexcept { @@ -138,19 +137,19 @@ bool Argument::GetImmediateU1() const noexcept { u8 Argument::GetImmediateU8() const noexcept { const u64 imm = value.GetImmediateAsU64(); - ASSERT(imm <= u64(std::numeric_limits::max())); + ASSERT(imm < 0x100); return u8(imm); } u16 Argument::GetImmediateU16() const noexcept { const u64 imm = value.GetImmediateAsU64(); - ASSERT(imm <= u64(std::numeric_limits::max())); + ASSERT(imm < 0x10000); return u16(imm); } u32 Argument::GetImmediateU32() const noexcept { const u64 imm = value.GetImmediateAsU64(); - ASSERT(imm <= u64(std::numeric_limits::max())); + ASSERT(imm < 0x100000000); return u32(imm); } @@ -352,20 +351,10 @@ void RegAlloc::HostCall(IR::Inst* result_def, if (result_def) { DefineValueImpl(result_def, ABI_RETURN); } - for (size_t i = 0; i < args.size(); i++) { - if (args[i]) { - UseScratch(*args[i], args_hostloc[i]); - } else { - ScratchGpr(args_hostloc[i]); // TODO: Force spill - } - } - // Must match with with ScratchImpl - for (auto const gpr : other_caller_save) { - MoveOutOfTheWay(gpr); - LocInfo(gpr).WriteLock(); - } + for (size_t i = 0; i < args.size(); i++) { if (args[i] && !args[i]->get().IsVoid()) { + UseScratch(*args[i], args_hostloc[i]); // LLVM puts the burden of zero-extension of 8 and 16 bit values on the caller instead of the callee const Xbyak::Reg64 reg = HostLocToReg64(args_hostloc[i]); switch (args[i]->get().GetType()) { @@ -385,6 +374,14 @@ void RegAlloc::HostCall(IR::Inst* result_def, } } } + + for (size_t i = 0; i < args.size(); i++) + if (!args[i]) { + // TODO: Force spill + ScratchGpr(args_hostloc[i]); + } + for (auto const caller_saved : other_caller_save) + ScratchImpl({caller_saved}); } void RegAlloc::AllocStackSpace(const size_t stack_space) noexcept { @@ -429,18 +426,18 @@ HostLoc RegAlloc::SelectARegister(const boost::container::static_vector= HostLoc::R8 && *it <= HostLoc::R15) { - it_rex_candidate = it; - } else { - it_candidate = it; + } else { + if (loc_info.lru_counter < min_lru_counter) { + // Otherwise a "quasi"-LRU + min_lru_counter = loc_info.lru_counter; + if (*it >= HostLoc::R8 && *it <= HostLoc::R15) { + it_rex_candidate = it; + } else { + it_candidate = it; + } + if (min_lru_counter == 0) + break; //early exit } - // There used to be a break here - DO NOT BREAK away you MUST - // evaluate ALL of the registers BEFORE making a decision on when to take - // otherwise reg pressure will get high and bugs will seep :) - // TODO(lizzie): Investigate these god awful annoying reg pressure issues } } // Final resolution goes as follows: @@ -547,12 +544,13 @@ void RegAlloc::SpillRegister(HostLoc loc) noexcept { } HostLoc RegAlloc::FindFreeSpill(bool is_xmm) const noexcept { +#if 0 // TODO(lizzie): Ok, Windows hates XMM spills, this means less perf for windows // but it's fine anyways. We can find other ways to cheat it later - but which?!?! // we should NOT save xmm each block entering... MAYBE xbyak has a bug on start/end? // TODO(lizzie): This needs to be investigated further later. // Do not spill XMM into other XMM silly - /*if (!is_xmm) { + if (!is_xmm) { // TODO(lizzie): Using lower (xmm0 and such) registers results in issues/crashes - INVESTIGATE WHY // Intel recommends to spill GPR onto XMM registers IF POSSIBLE // TODO(lizzie): Issues on DBZ, theory: Scratch XMM not properly restored after a function call? @@ -560,9 +558,8 @@ HostLoc RegAlloc::FindFreeSpill(bool is_xmm) const noexcept { for (size_t i = size_t(HostLoc::XMM15); i >= size_t(HostLoc::XMM3); --i) if (const auto loc = HostLoc(i); LocInfo(loc).IsEmpty()) return loc; - }*/ - // TODO: Doing this would mean saving XMM on each call... need to benchmark the benefits - // of spilling on XMM versus the potential cost of using XMM registers..... + } +#endif // Otherwise go to stack spilling for (size_t i = size_t(HostLoc::FirstSpill); i < hostloc_info.size(); ++i) if (const auto loc = HostLoc(i); LocInfo(loc).IsEmpty()) diff --git a/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h b/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h index 0e9e465774..bfacdcca52 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h +++ b/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h @@ -12,7 +12,6 @@ #include #include -#include "boost/container/small_vector.hpp" #include "dynarmic/common/common_types.h" #include #include @@ -78,13 +77,13 @@ public: return std::find(values.begin(), values.end(), inst) != values.end(); } inline size_t GetMaxBitWidth() const noexcept { - return 1 << max_bit_width; + return max_bit_width; } void AddValue(IR::Inst* inst) noexcept; void EmitVerboseDebuggingOutput(BlockOfCode* code, size_t host_loc_index) const noexcept; private: //non trivial - boost::container::small_vector values; //24 + std::vector values; //24 // Block state uint16_t total_uses = 0; //8 //sometimes zeroed @@ -94,10 +93,10 @@ private: uint16_t is_being_used_count = 0; //8 uint16_t current_references = 0; //8 // Value state - uint8_t lru_counter : 2 = 0; //1 - uint8_t max_bit_width : 4 = 0; //Valid values: log2(1,2,4,8,16,32,128) = (0, 1, 2, 3, 4, 5, 6) + uint8_t max_bit_width = 0; //Valid values: 1,2,4,8,16,32,128 bool is_scratch : 1 = false; //1 bool is_set_last_use : 1 = false; //1 + alignas(16) uint8_t lru_counter = 0; //1 friend class RegAlloc; }; static_assert(sizeof(HostLocInfo) == 64); diff --git a/src/dynarmic/src/dynarmic/common/memory_pool.cpp b/src/dynarmic/src/dynarmic/common/memory_pool.cpp new file mode 100644 index 0000000000..f41dd92af5 --- /dev/null +++ b/src/dynarmic/src/dynarmic/common/memory_pool.cpp @@ -0,0 +1,13 @@ +/* This file is part of the dynarmic project. + * Copyright (c) 2016 MerryMage + * SPDX-License-Identifier: 0BSD + */ + +#include "dynarmic/common/memory_pool.h" + +#include + +namespace Dynarmic::Common { + + +} // namespace Dynarmic::Common diff --git a/src/dynarmic/src/dynarmic/common/memory_pool.h b/src/dynarmic/src/dynarmic/common/memory_pool.h new file mode 100644 index 0000000000..d0a5223db3 --- /dev/null +++ b/src/dynarmic/src/dynarmic/common/memory_pool.h @@ -0,0 +1,65 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + +/* This file is part of the dynarmic project. + * Copyright (c) 2016 MerryMage + * SPDX-License-Identifier: 0BSD + */ + +#pragma once + +#include +#include +#include + +namespace Dynarmic::Common { + +/// @tparam object_size Byte-size of objects to construct +/// @tparam slab_size Number of objects to have per slab +template +class Pool { +public: + inline Pool() noexcept { + AllocateNewSlab(); + } + inline ~Pool() noexcept { + std::free(current_slab); + for (char* slab : slabs) { + std::free(slab); + } + } + + Pool(const Pool&) = delete; + Pool(Pool&&) = delete; + + Pool& operator=(const Pool&) = delete; + Pool& operator=(Pool&&) = delete; + + /// @brief Returns a pointer to an `object_size`-bytes block of memory. + [[nodiscard]] void* Alloc() noexcept { + if (remaining == 0) { + slabs.push_back(current_slab); + AllocateNewSlab(); + } + void* ret = static_cast(current_ptr); + current_ptr += object_size; + remaining--; + return ret; + } +private: + /// @brief Allocates a completely new memory slab. + /// Used when an entirely new slab is needed + /// due the current one running out of usable space. + void AllocateNewSlab() noexcept { + current_slab = static_cast(std::malloc(object_size * slab_size)); + current_ptr = current_slab; + remaining = slab_size; + } + + std::vector slabs; + char* current_slab = nullptr; + char* current_ptr = nullptr; + size_t remaining = 0; +}; + +} // namespace Dynarmic::Common diff --git a/src/dynarmic/src/dynarmic/common/variant_util.h b/src/dynarmic/src/dynarmic/common/variant_util.h new file mode 100644 index 0000000000..4dd7f67167 --- /dev/null +++ b/src/dynarmic/src/dynarmic/common/variant_util.h @@ -0,0 +1,29 @@ +/* This file is part of the dynarmic project. + * Copyright (c) 2016 MerryMage + * SPDX-License-Identifier: 0BSD + */ + +#pragma once + +#include + +namespace Dynarmic::Common { +namespace detail { + +template +struct VariantVisitor : boost::static_visitor + , Lambda { + VariantVisitor(Lambda&& lambda) + : Lambda(std::move(lambda)) {} + + using Lambda::operator(); +}; + +} // namespace detail + +template +inline ReturnT VisitVariant(Variant&& variant, Lambda&& lambda) { + return boost::apply_visitor(detail::VariantVisitor(std::move(lambda)), variant); +} + +} // namespace Dynarmic::Common diff --git a/src/dynarmic/src/dynarmic/frontend/A32/a32_types.h b/src/dynarmic/src/dynarmic/frontend/A32/a32_types.h index 2a0cc25751..3f4501a528 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/a32_types.h +++ b/src/dynarmic/src/dynarmic/frontend/A32/a32_types.h @@ -9,9 +9,12 @@ #pragma once #include +#include + #include #include "dynarmic/common/assert.h" #include "dynarmic/common/common_types.h" + #include "dynarmic/interface/A32/coprocessor_util.h" #include "dynarmic/ir/cond.h" @@ -86,17 +89,24 @@ constexpr bool IsQuadExtReg(ExtReg reg) { inline size_t RegNumber(Reg reg) { ASSERT(reg != Reg::INVALID_REG); - return size_t(reg); + return static_cast(reg); } inline size_t RegNumber(ExtReg reg) { if (IsSingleExtReg(reg)) { - return size_t(reg) - size_t(ExtReg::S0); - } else if (IsDoubleExtReg(reg)) { - return size_t(reg) - size_t(ExtReg::D0); + return static_cast(reg) - static_cast(ExtReg::S0); + } + + if (IsDoubleExtReg(reg)) { + return static_cast(reg) - static_cast(ExtReg::D0); } - ASSERT(IsQuadExtReg(reg)); - return size_t(reg) - size_t(ExtReg::Q0); + + if (IsQuadExtReg(reg)) { + return static_cast(reg) - static_cast(ExtReg::Q0); + } + + ASSERT_MSG(false, "Invalid extended register"); + return 0; } inline Reg operator+(Reg reg, size_t number) { diff --git a/src/dynarmic/src/dynarmic/frontend/A32/decoder/arm.h b/src/dynarmic/src/dynarmic/frontend/A32/decoder/arm.h index c6f034ae21..0257c28ddb 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/decoder/arm.h +++ b/src/dynarmic/src/dynarmic/frontend/A32/decoder/arm.h @@ -30,13 +30,13 @@ template using ArmDecodeTable = std::array>, 0x1000>; namespace detail { -inline size_t ToFastLookupIndexArm(u32 instruction) noexcept { +inline size_t ToFastLookupIndexArm(u32 instruction) { return ((instruction >> 4) & 0x00F) | ((instruction >> 16) & 0xFF0); } } // namespace detail template -constexpr ArmDecodeTable GetArmDecodeTable() noexcept { +constexpr ArmDecodeTable GetArmDecodeTable() { std::vector> list = { #define INST(fn, name, bitstring) DYNARMIC_DECODER_GET_MATCHER(ArmMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)), #include "./arm.inc" @@ -62,27 +62,15 @@ constexpr ArmDecodeTable GetArmDecodeTable() noexcept { } template -std::optional>> DecodeArm(u32 instruction) noexcept { +std::optional>> DecodeArm(u32 instruction) { alignas(64) static const auto table = GetArmDecodeTable(); const auto matches_instruction = [instruction](const auto& matcher) { return matcher.Matches(instruction); }; + const auto& subtable = table[detail::ToFastLookupIndexArm(instruction)]; auto iter = std::find_if(subtable.begin(), subtable.end(), matches_instruction); return iter != subtable.end() ? std::optional>>(*iter) : std::nullopt; } -template -std::optional GetNameARM(u32 inst) noexcept { - std::vector>> list = { -#define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(ArmMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)) }, -#include "./arm.inc" -#undef INST - }; - auto const iter = std::find_if(list.cbegin(), list.cend(), [inst](auto const& m) { - return m.second.Matches(inst); - }); - return iter != list.cend() ? std::optional{iter->first} : std::nullopt; -} - } // namespace Dynarmic::A32 diff --git a/src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.h b/src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.h index 6bf52b87e6..f2e206695b 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.h +++ b/src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.h @@ -13,7 +13,6 @@ #include #include #include -#include #include #include "dynarmic/common/common_types.h" @@ -27,12 +26,15 @@ template using ASIMDMatcher = Decoder::Matcher; template -std::vector> GetASIMDDecodeTable() noexcept { - std::vector>> table = { -#define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(ASIMDMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)) }, +std::vector> GetASIMDDecodeTable() { + std::vector> table = { + +#define INST(fn, name, bitstring) DYNARMIC_DECODER_GET_MATCHER(ASIMDMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)), #include "./asimd.inc" #undef INST + }; + // Exceptions to the rule of thumb. const std::set comes_first{ "VBIC, VMOV, VMVN, VORR (immediate)", @@ -51,43 +53,29 @@ std::vector> GetASIMDDecodeTable() noexcept { "VQDMULH (scalar)", "VQRDMULH (scalar)", }; - const auto sort_begin = std::stable_partition(table.begin(), table.end(), [&](const auto& e) { - return comes_first.count(e.first) > 0; + const auto sort_begin = std::stable_partition(table.begin(), table.end(), [&](const auto& matcher) { + return comes_first.count(matcher.GetName()) > 0; }); - const auto sort_end = std::stable_partition(table.begin(), table.end(), [&](const auto& e) { - return comes_last.count(e.first) == 0; + const auto sort_end = std::stable_partition(table.begin(), table.end(), [&](const auto& matcher) { + return comes_last.count(matcher.GetName()) == 0; }); + // If a matcher has more bits in its mask it is more specific, so it should come first. - std::stable_sort(sort_begin, sort_end, [](const auto& a, const auto& b) { - return mcl::bit::count_ones(a.second.GetMask()) > mcl::bit::count_ones(b.second.GetMask()); + std::stable_sort(sort_begin, sort_end, [](const auto& matcher1, const auto& matcher2) { + return mcl::bit::count_ones(matcher1.GetMask()) > mcl::bit::count_ones(matcher2.GetMask()); }); - std::vector> final_table; - std::transform(table.cbegin(), table.cend(), std::back_inserter(final_table), [](auto const& e) { - return e.second; - }); - return final_table; -} -template -std::optional>> DecodeASIMD(u32 instruction) noexcept { - alignas(64) static const auto table = GetASIMDDecodeTable(); - auto iter = std::find_if(table.begin(), table.end(), [instruction](const auto& matcher) { - return matcher.Matches(instruction); - }); - return iter != table.end() ? std::optional>>(*iter) : std::nullopt; + return table; } template -std::optional GetNameASIMD(u32 inst) noexcept { - std::vector>> list = { -#define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(ASIMDMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)) }, -#include "./asimd.inc" -#undef INST - }; - auto const iter = std::find_if(list.cbegin(), list.cend(), [inst](auto const& m) { - return m.second.Matches(inst); - }); - return iter != list.cend() ? std::optional{iter->first} : std::nullopt; +std::optional>> DecodeASIMD(u32 instruction) { + static const auto table = GetASIMDDecodeTable(); + + const auto matches_instruction = [instruction](const auto& matcher) { return matcher.Matches(instruction); }; + + auto iter = std::find_if(table.begin(), table.end(), matches_instruction); + return iter != table.end() ? std::optional>>(*iter) : std::nullopt; } } // namespace Dynarmic::A32 diff --git a/src/dynarmic/src/dynarmic/frontend/A32/decoder/thumb16.h b/src/dynarmic/src/dynarmic/frontend/A32/decoder/thumb16.h index 16b99ba5aa..8073ee5d47 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/decoder/thumb16.h +++ b/src/dynarmic/src/dynarmic/frontend/A32/decoder/thumb16.h @@ -25,28 +25,18 @@ using Thumb16Matcher = Decoder::Matcher; template std::optional>> DecodeThumb16(u16 instruction) { - alignas(64) static const std::vector> table = { + static const std::vector> table = { + #define INST(fn, name, bitstring) DYNARMIC_DECODER_GET_MATCHER(Thumb16Matcher, fn, name, Decoder::detail::StringToArray<16>(bitstring)), #include "./thumb16.inc" #undef INST - }; - auto iter = std::find_if(table.begin(), table.end(), [instruction](const auto& matcher) { - return matcher.Matches(instruction); - }); - return iter != table.end() ? std::optional>>(*iter) : std::nullopt; -} -template -std::optional GetNameThumb16(u32 inst) noexcept { - std::vector>> list = { -#define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(Thumb16Matcher, fn, name, Decoder::detail::StringToArray<16>(bitstring)) }, -#include "./thumb16.inc" -#undef INST }; - auto const iter = std::find_if(list.cbegin(), list.cend(), [inst](auto const& m) { - return m.second.Matches(inst); - }); - return iter != list.cend() ? std::optional{iter->first} : std::nullopt; + + const auto matches_instruction = [instruction](const auto& matcher) { return matcher.Matches(instruction); }; + + auto iter = std::find_if(table.begin(), table.end(), matches_instruction); + return iter != table.end() ? std::optional>>(*iter) : std::nullopt; } } // namespace Dynarmic::A32 diff --git a/src/dynarmic/src/dynarmic/frontend/A32/decoder/thumb32.h b/src/dynarmic/src/dynarmic/frontend/A32/decoder/thumb32.h index 19418de67c..86a4d767a7 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/decoder/thumb32.h +++ b/src/dynarmic/src/dynarmic/frontend/A32/decoder/thumb32.h @@ -24,28 +24,18 @@ using Thumb32Matcher = Decoder::Matcher; template std::optional>> DecodeThumb32(u32 instruction) { - alignas(64) static const std::vector> table = { + static const std::vector> table = { + #define INST(fn, name, bitstring) DYNARMIC_DECODER_GET_MATCHER(Thumb32Matcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)), #include "./thumb32.inc" #undef INST - }; - auto iter = std::find_if(table.begin(), table.end(), [instruction](const auto& matcher) { - return matcher.Matches(instruction); - }); - return iter != table.end() ? std::optional>>(*iter) : std::nullopt; -} -template -std::optional GetNameThumb32(u32 inst) noexcept { - std::vector>> list = { -#define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(Thumb32Matcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)) }, -#include "./thumb32.inc" -#undef INST }; - auto const iter = std::find_if(list.cbegin(), list.cend(), [inst](auto const& m) { - return m.second.Matches(inst); - }); - return iter != list.cend() ? std::optional{iter->first} : std::nullopt; + + const auto matches_instruction = [instruction](const auto& matcher) { return matcher.Matches(instruction); }; + + auto iter = std::find_if(table.begin(), table.end(), matches_instruction); + return iter != table.end() ? std::optional>>(*iter) : std::nullopt; } } // namespace Dynarmic::A32 diff --git a/src/dynarmic/src/dynarmic/frontend/A32/decoder/vfp.h b/src/dynarmic/src/dynarmic/frontend/A32/decoder/vfp.h index a346304a9a..5fcacd2bda 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/decoder/vfp.h +++ b/src/dynarmic/src/dynarmic/frontend/A32/decoder/vfp.h @@ -26,42 +26,36 @@ using VFPMatcher = Decoder::Matcher; template std::optional>> DecodeVFP(u32 instruction) { using Table = std::vector>; - alignas(64) static const struct Tables { + + static const struct Tables { Table unconditional; Table conditional; - } tables = []() { + } tables = [] { Table list = { + #define INST(fn, name, bitstring) DYNARMIC_DECODER_GET_MATCHER(VFPMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)), #include "./vfp.inc" #undef INST + }; - auto const it = std::stable_partition(list.begin(), list.end(), [&](const auto& matcher) { + + const auto division = std::stable_partition(list.begin(), list.end(), [&](const auto& matcher) { return (matcher.GetMask() & 0xF0000000) == 0xF0000000; }); + return Tables{ - Table{list.begin(), it}, - Table{it, list.end()}, + Table{list.begin(), division}, + Table{division, list.end()}, }; }(); + const bool is_unconditional = (instruction & 0xF0000000) == 0xF0000000; const Table& table = is_unconditional ? tables.unconditional : tables.conditional; - auto iter = std::find_if(table.begin(), table.end(), [instruction](const auto& matcher) { - return matcher.Matches(instruction); - }); - return iter != table.end() ? std::optional>>(*iter) : std::nullopt; -} -template -std::optional GetNameVFP(u32 inst) noexcept { - std::vector>> list = { -#define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(VFPMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)) }, -#include "./vfp.inc" -#undef INST - }; - auto const iter = std::find_if(list.cbegin(), list.cend(), [inst](auto const& m) { - return m.second.Matches(inst); - }); - return iter != list.cend() ? std::optional{iter->first} : std::nullopt; + const auto matches_instruction = [instruction](const auto& matcher) { return matcher.Matches(instruction); }; + + auto iter = std::find_if(table.begin(), table.end(), matches_instruction); + return iter != table.end() ? std::optional>>(*iter) : std::nullopt; } } // namespace Dynarmic::A32 diff --git a/src/dynarmic/src/dynarmic/frontend/A32/translate/translate_thumb.cpp b/src/dynarmic/src/dynarmic/frontend/A32/translate/translate_thumb.cpp index 23935ba601..d0f963d8b3 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/translate/translate_thumb.cpp +++ b/src/dynarmic/src/dynarmic/frontend/A32/translate/translate_thumb.cpp @@ -97,7 +97,7 @@ u32 ConvertASIMDInstruction(u32 thumb_instruction) { return 0xF7F0A000; // UDF } -inline bool MaybeVFPOrASIMDInstruction(u32 thumb_instruction) noexcept { +bool MaybeVFPOrASIMDInstruction(u32 thumb_instruction) { return (thumb_instruction & 0xEC000000) == 0xEC000000 || (thumb_instruction & 0xFF100000) == 0xF9000000; } diff --git a/src/dynarmic/src/dynarmic/frontend/A64/decoder/a64.h b/src/dynarmic/src/dynarmic/frontend/A64/decoder/a64.h index 7e6cdc3935..c651dd7cde 100644 --- a/src/dynarmic/src/dynarmic/frontend/A64/decoder/a64.h +++ b/src/dynarmic/src/dynarmic/frontend/A64/decoder/a64.h @@ -37,31 +37,34 @@ inline size_t ToFastLookupIndex(u32 instruction) { template constexpr DecodeTable GetDecodeTable() { - std::vector>> list = { -#define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(Matcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)) }, + std::vector> list = { +#define INST(fn, name, bitstring) DYNARMIC_DECODER_GET_MATCHER(Matcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)), #include "./a64.inc" #undef INST }; + // If a matcher has more bits in its mask it is more specific, so it should come first. - std::stable_sort(list.begin(), list.end(), [](const auto& a, const auto& b) { + std::stable_sort(list.begin(), list.end(), [](const auto& matcher1, const auto& matcher2) { // If a matcher has more bits in its mask it is more specific, so it should come first. - return mcl::bit::count_ones(a.second.GetMask()) > mcl::bit::count_ones(b.second.GetMask()); + return mcl::bit::count_ones(matcher1.GetMask()) > mcl::bit::count_ones(matcher2.GetMask()); }); + // Exceptions to the above rule of thumb. - std::stable_partition(list.begin(), list.end(), [&](const auto& e) { + std::stable_partition(list.begin(), list.end(), [&](const auto& matcher) { return std::set{ "MOVI, MVNI, ORR, BIC (vector, immediate)", "FMOV (vector, immediate)", "Unallocated SIMD modified immediate", - }.count(e.first) > 0; + }.count(matcher.GetName()) > 0; }); + DecodeTable table{}; for (size_t i = 0; i < table.size(); ++i) { - for (auto const& e : list) { - const auto expect = detail::ToFastLookupIndex(e.second.GetExpected()); - const auto mask = detail::ToFastLookupIndex(e.second.GetMask()); + for (auto matcher : list) { + const auto expect = detail::ToFastLookupIndex(matcher.GetExpected()); + const auto mask = detail::ToFastLookupIndex(matcher.GetMask()); if ((i & mask) == expect) { - table[i].push_back(e.second); + table[i].push_back(matcher); } } } @@ -71,24 +74,12 @@ constexpr DecodeTable GetDecodeTable() { template std::optional>> Decode(u32 instruction) { alignas(64) static const auto table = GetDecodeTable(); - const auto& subtable = table[detail::ToFastLookupIndex(instruction)]; - auto iter = std::find_if(subtable.begin(), subtable.end(), [instruction](const auto& matcher) { + const auto matches_instruction = [instruction](const auto& matcher) { return matcher.Matches(instruction); - }); - return iter != subtable.end() ? std::optional>>(*iter) : std::nullopt; -} - -template -std::optional GetName(u32 inst) noexcept { - std::vector>> list = { -#define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(Matcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)) }, -#include "./a64.inc" -#undef INST }; - auto const iter = std::find_if(list.cbegin(), list.cend(), [inst](auto const& m) { - return m.second.Matches(inst); - }); - return iter != list.cend() ? std::optional{iter->first} : std::nullopt; + const auto& subtable = table[detail::ToFastLookupIndex(instruction)]; + auto iter = std::find_if(subtable.begin(), subtable.end(), matches_instruction); + return iter != subtable.end() ? std::optional>>(*iter) : std::nullopt; } } // namespace Dynarmic::A64 diff --git a/src/dynarmic/src/dynarmic/frontend/A64/translate/impl/a64_branch.cpp b/src/dynarmic/src/dynarmic/frontend/A64/translate/impl/a64_branch.cpp index 41dd8cb4d4..faf0686231 100644 --- a/src/dynarmic/src/dynarmic/frontend/A64/translate/impl/a64_branch.cpp +++ b/src/dynarmic/src/dynarmic/frontend/A64/translate/impl/a64_branch.cpp @@ -1,6 +1,3 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project -// SPDX-License-Identifier: GPL-3.0-or-later - /* This file is part of the dynarmic project. * Copyright (c) 2018 MerryMage * SPDX-License-Identifier: 0BSD @@ -23,12 +20,9 @@ bool TranslatorVisitor::B_cond(Imm<19> imm19, Cond cond) { bool TranslatorVisitor::B_uncond(Imm<26> imm26) { const s64 offset = concatenate(imm26, Imm<2>{0}).SignExtend(); const u64 target = ir.PC() + offset; - // Pattern to halt execution (B .) - if (target == ir.PC()) { - ir.SetTerm(IR::Term::LinkBlock{ir.current_location->SetPC(target)}); - return false; - } - ir.SetTerm(IR::Term::LinkBlockFast{ir.current_location->SetPC(target)}); + + //ir.SetTerm(IR::Term::LinkBlockFast{ir.current_location->SetPC(target)}); + ir.SetTerm(IR::Term::LinkBlock{ir.current_location->SetPC(target)}); return false; } diff --git a/src/dynarmic/src/dynarmic/frontend/decoder/decoder_detail.h b/src/dynarmic/src/dynarmic/frontend/decoder/decoder_detail.h index 0d9da6169c..bd76efda2a 100644 --- a/src/dynarmic/src/dynarmic/frontend/decoder/decoder_detail.h +++ b/src/dynarmic/src/dynarmic/frontend/decoder/decoder_detail.h @@ -70,9 +70,11 @@ struct detail { return std::make_tuple(mask, expect); } - /// @brief Generates the masks and shifts for each argument. - /// A '-' in a bitstring indicates that we don't care about that value. - /// An argument is specified by a continuous string of the same character. + /** + * Generates the masks and shifts for each argument. + * A '-' in a bitstring indicates that we don't care about that value. + * An argument is specified by a continuous string of the same character. + */ template static consteval auto GetArgInfo(std::array bitstring) { std::array masks = {}; @@ -96,6 +98,7 @@ struct detail { if constexpr (N > 0) { const size_t bit_position = opcode_bitsize - i - 1; + if (arg_index >= N) throw std::out_of_range("Unexpected field"); @@ -106,16 +109,20 @@ struct detail { } } } + #if !defined(DYNARMIC_IGNORE_ASSERTS) && !defined(__ANDROID__) // Avoids a MSVC ICE, and avoids Android NDK issue. ASSERT(std::all_of(masks.begin(), masks.end(), [](auto m) { return m != 0; })); #endif + return std::make_tuple(masks, shifts); } - /// @brief This struct's Make member function generates a lambda which decodes an instruction - /// based on the provided arg_masks and arg_shifts. The Visitor member function to call is - /// provided as a template argument. + /** + * This struct's Make member function generates a lambda which decodes an instruction based on + * the provided arg_masks and arg_shifts. The Visitor member function to call is provided as a + * template argument. + */ template struct VisitorCaller; @@ -123,36 +130,36 @@ struct detail { # pragma warning(push) # pragma warning(disable : 4800) // forcing value to bool 'true' or 'false' (performance warning) #endif - template - struct VisitorCaller { + template + struct VisitorCaller { template - static constexpr auto Make(std::integer_sequence, - ReturnType (V::*const fn)(Args...), + static auto Make(std::integer_sequence, + CallRetT (Visitor::*const fn)(Args...), const std::array arg_masks, const std::array arg_shifts) { - static_assert(std::is_same_v, "Member function is not from Matcher's Visitor"); - return [fn, arg_masks, arg_shifts](V& v, opcode_type instruction) { + static_assert(std::is_same_v, "Member function is not from Matcher's Visitor"); + return [fn, arg_masks, arg_shifts](Visitor& v, opcode_type instruction) { (void)instruction; (void)arg_masks; (void)arg_shifts; - return (v.*fn)(Args((instruction & arg_masks[iota]) >> arg_shifts[iota])...); + return (v.*fn)(static_cast((instruction & arg_masks[iota]) >> arg_shifts[iota])...); }; } }; - template - struct VisitorCaller { + template + struct VisitorCaller { template - static constexpr auto Make(std::integer_sequence, - ReturnType (V::*const fn)(Args...) const, + static auto Make(std::integer_sequence, + CallRetT (Visitor::*const fn)(Args...) const, const std::array arg_masks, const std::array arg_shifts) { - static_assert(std::is_same_v, "Member function is not from Matcher's Visitor"); - return [fn, arg_masks, arg_shifts](const V& v, opcode_type instruction) { + static_assert(std::is_same_v, "Member function is not from Matcher's Visitor"); + return [fn, arg_masks, arg_shifts](const Visitor& v, opcode_type instruction) { (void)instruction; (void)arg_masks; (void)arg_shifts; - return (v.*fn)(Args((instruction & arg_masks[iota]) >> arg_shifts[iota])...); + return (v.*fn)(static_cast((instruction & arg_masks[iota]) >> arg_shifts[iota])...); }; } }; @@ -160,21 +167,27 @@ struct detail { # pragma warning(pop) #endif - /// @brief Creates a matcher that can match and parse instructions based on bitstring. - /// See also: GetMaskAndExpect and GetArgInfo for format of bitstring. - template - static constexpr auto GetMatcher(F fn) { - constexpr size_t args_count = mcl::parameter_count_v; + /** + * Creates a matcher that can match and parse instructions based on bitstring. + * See also: GetMaskAndExpect and GetArgInfo for format of bitstring. + */ + template + static auto GetMatcher(FnT fn, const char* const name) { + constexpr size_t args_count = mcl::parameter_count_v; + constexpr auto mask = std::get<0>(GetMaskAndExpect(bitstring)); constexpr auto expect = std::get<1>(GetMaskAndExpect(bitstring)); constexpr auto arg_masks = std::get<0>(GetArgInfo(bitstring)); constexpr auto arg_shifts = std::get<1>(GetArgInfo(bitstring)); - const auto proxy_fn = VisitorCaller::Make(std::make_index_sequence(), fn, arg_masks, arg_shifts); - return MatcherT(mask, expect, proxy_fn); + + using Iota = std::make_index_sequence; + + const auto proxy_fn = VisitorCaller::Make(Iota(), fn, arg_masks, arg_shifts); + return MatcherT(name, mask, expect, proxy_fn); } }; -#define DYNARMIC_DECODER_GET_MATCHER(MatcherT, fn, name, bitstring) Decoder::detail::detail>::template GetMatcher(&V::fn) +#define DYNARMIC_DECODER_GET_MATCHER(MatcherT, fn, name, bitstring) Decoder::detail::detail>::template GetMatcher(&V::fn, name) } // namespace detail } // namespace Dynarmic::Decoder diff --git a/src/dynarmic/src/dynarmic/frontend/decoder/matcher.h b/src/dynarmic/src/dynarmic/frontend/decoder/matcher.h index f7e2884e0c..7e5c9c5a8f 100644 --- a/src/dynarmic/src/dynarmic/frontend/decoder/matcher.h +++ b/src/dynarmic/src/dynarmic/frontend/decoder/matcher.h @@ -14,12 +14,16 @@ namespace Dynarmic::Decoder { -/// Generic instruction handling construct. -/// @tparam Visitor An arbitrary visitor type that will be passed through -/// to the function being handled. This type must be the -/// type of the first parameter in a handler function. -/// @tparam OpcodeType Type representing an opcode. This must be the -/// type of the second parameter in a handler function. +/** + * Generic instruction handling construct. + * + * @tparam Visitor An arbitrary visitor type that will be passed through + * to the function being handled. This type must be the + * type of the first parameter in a handler function. + * + * @tparam OpcodeType Type representing an opcode. This must be the + * type of the second parameter in a handler function. + */ template class Matcher { public: @@ -27,35 +31,46 @@ public: using visitor_type = Visitor; using handler_return_type = typename Visitor::instruction_return_type; using handler_function = std::function; - Matcher(opcode_type mask, opcode_type expected, handler_function func) - : mask{mask}, expected{expected}, fn{std::move(func)} {} + + Matcher(const char* const name, opcode_type mask, opcode_type expected, handler_function func) + : name{name}, mask{mask}, expected{expected}, fn{std::move(func)} {} + + /// Gets the name of this type of instruction. + const char* GetName() const { + return name; + } /// Gets the mask for this instruction. - inline opcode_type GetMask() const noexcept { + opcode_type GetMask() const { return mask; } /// Gets the expected value after masking for this instruction. - inline opcode_type GetExpected() const noexcept { + opcode_type GetExpected() const { return expected; } - /// Tests to see if the given instruction is the instruction this matcher represents. - /// @param instruction The instruction to test - /// @returns true if the given instruction matches. - inline bool Matches(opcode_type instruction) const noexcept { + /** + * Tests to see if the given instruction is the instruction this matcher represents. + * @param instruction The instruction to test + * @returns true if the given instruction matches. + */ + bool Matches(opcode_type instruction) const { return (instruction & mask) == expected; } - /// Calls the corresponding instruction handler on visitor for this type of instruction. - /// @param v The visitor to use - /// @param instruction The instruction to decode. - inline handler_return_type call(Visitor& v, opcode_type instruction) const noexcept { + /** + * Calls the corresponding instruction handler on visitor for this type of instruction. + * @param v The visitor to use + * @param instruction The instruction to decode. + */ + handler_return_type call(Visitor& v, opcode_type instruction) const { ASSERT(Matches(instruction)); return fn(v, instruction); } private: + const char* name; opcode_type mask; opcode_type expected; handler_function fn; diff --git a/src/dynarmic/src/dynarmic/ir/basic_block.cpp b/src/dynarmic/src/dynarmic/ir/basic_block.cpp index ae9e0b103b..b00ab3cb20 100644 --- a/src/dynarmic/src/dynarmic/ir/basic_block.cpp +++ b/src/dynarmic/src/dynarmic/ir/basic_block.cpp @@ -15,6 +15,8 @@ #include #include "dynarmic/common/assert.h" + +#include "dynarmic/common/memory_pool.h" #include "dynarmic/frontend/A32/a32_types.h" #include "dynarmic/frontend/A64/a64_types.h" #include "dynarmic/ir/cond.h" @@ -25,7 +27,8 @@ namespace Dynarmic::IR { Block::Block(const LocationDescriptor& location) : location{location}, end_location{location}, - cond{Cond::AL} + cond{Cond::AL}, + instruction_alloc_pool{std::make_unique>()} { } @@ -37,21 +40,7 @@ Block::Block(const LocationDescriptor& location) /// @param args A sequence of Value instances used as arguments for the instruction. /// @returns Iterator to the newly created instruction. Block::iterator Block::PrependNewInst(iterator insertion_point, Opcode opcode, std::initializer_list args) noexcept { - // First try using the "inline" buffer, otherwise fallback to a slower slab-like allocation scheme - // purpouse is to avoid many calls to new/delete which invoke malloc which invokes mmap - // just pool it!!! - reason why there is an inline buffer is because many small blocks are created - // with few instructions due to subpar optimisations on other passes... plus branch-heavy code will - // hugely benefit from the coherency of faster allocations... - IR::Inst* inst; - if (inlined_inst.size() < inlined_inst.max_size()) { - inlined_inst.emplace_back(opcode); - inst = &inlined_inst[inlined_inst.size() - 1]; - } else { - if (pooled_inst.empty() || pooled_inst.back().size() == pooled_inst.back().max_size()) - pooled_inst.emplace_back(); - pooled_inst.back().emplace_back(opcode); - inst = &pooled_inst.back()[pooled_inst.back().size() - 1]; - } + IR::Inst* inst = new (instruction_alloc_pool->Alloc()) IR::Inst(opcode); DEBUG_ASSERT(args.size() == inst->NumArgs()); std::for_each(args.begin(), args.end(), [&inst, index = size_t(0)](const auto& arg) mutable { inst->SetArg(index, arg); diff --git a/src/dynarmic/src/dynarmic/ir/basic_block.h b/src/dynarmic/src/dynarmic/ir/basic_block.h index dd978da68d..b91d557d44 100644 --- a/src/dynarmic/src/dynarmic/ir/basic_block.h +++ b/src/dynarmic/src/dynarmic/ir/basic_block.h @@ -13,9 +13,6 @@ #include #include -#include -#include -#include #include #include "dynarmic/common/common_types.h" @@ -24,6 +21,7 @@ #include "dynarmic/ir/terminal.h" #include "dynarmic/ir/value.h" #include "dynarmic/ir/dense_list.h" +#include "dynarmic/common/memory_pool.h" namespace Dynarmic::IR { @@ -166,12 +164,8 @@ public: return cycle_count; } private: - /// "Hot cache" for small blocks so we don't call global allocator - boost::container::static_vector inlined_inst; /// List of instructions in this block. instruction_list_type instructions; - /// "Long/far" memory pool - boost::container::stable_vector> pooled_inst; /// Block to execute next if `cond` did not pass. std::optional cond_failed = {}; /// Description of the starting location of this block @@ -180,6 +174,8 @@ private: LocationDescriptor end_location; /// Conditional to pass in order to execute this block Cond cond; + /// Memory pool for instruction list + std::unique_ptr> instruction_alloc_pool; /// Terminal instruction of this block. Terminal terminal = Term::Invalid{}; /// Number of cycles this block takes to execute if the conditional fails. @@ -187,7 +183,6 @@ private: /// Number of cycles this block takes to execute. size_t cycle_count = 0; }; -static_assert(sizeof(Block) == 2048); /// Returns a string representation of the contents of block. Intended for debugging. std::string DumpBlock(const IR::Block& block) noexcept; diff --git a/src/dynarmic/src/dynarmic/ir/ir_emitter.cpp b/src/dynarmic/src/dynarmic/ir/ir_emitter.cpp new file mode 100644 index 0000000000..a8ef7e2989 --- /dev/null +++ b/src/dynarmic/src/dynarmic/ir/ir_emitter.cpp @@ -0,0 +1,21 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + +/* This file is part of the dynarmic project. + * Copyright (c) 2016 MerryMage + * SPDX-License-Identifier: 0BSD + */ + +#include "dynarmic/ir/ir_emitter.h" + +#include + +#include "dynarmic/common/assert.h" +#include + +#include "dynarmic/ir/opcodes.h" + +namespace Dynarmic::IR { + + +} // namespace Dynarmic::IR diff --git a/src/dynarmic/tests/decoder_tests.cpp b/src/dynarmic/tests/decoder_tests.cpp index 4ad9d90833..cdd2c70cd9 100644 --- a/src/dynarmic/tests/decoder_tests.cpp +++ b/src/dynarmic/tests/decoder_tests.cpp @@ -57,7 +57,7 @@ TEST_CASE("ASIMD Decoder: Ensure table order correctness", "[decode][a32][.]") { INFO("Instruction: " << std::hex << std::setfill('0') << std::setw(8) << instruction); INFO("Expect: " << std::hex << std::setfill('0') << std::setw(8) << expect); INFO("Fill: " << std::hex << std::setfill('0') << std::setw(8) << x); - INFO("Name: " << *A32::GetNameASIMD(instruction)); + //INFO("Name: " << *A32::GetNameASIMD(instruction)); INFO("iserr: " << iserr); //INFO("alternative: " << alternative->GetName()); INFO("altiserr: " << altiserr); diff --git a/src/dynarmic/tests/print_info.cpp b/src/dynarmic/tests/print_info.cpp index 8936f32bd3..3d1268f467 100644 --- a/src/dynarmic/tests/print_info.cpp +++ b/src/dynarmic/tests/print_info.cpp @@ -40,18 +40,18 @@ using namespace Dynarmic; std::string_view GetNameOfA32Instruction(u32 instruction) { - if (auto const vfp_decoder = A32::DecodeVFP(instruction)) - return *A32::GetNameVFP(instruction); - else if (auto const asimd_decoder = A32::DecodeASIMD(instruction)) - return *A32::GetNameASIMD(instruction); - else if (auto const decoder = A32::DecodeArm(instruction)) - return *A32::GetNameARM(instruction); + //if (auto const vfp_decoder = A32::DecodeVFP(instruction)) + // return *A32::GetNameVFP(instruction); + //else if (auto const asimd_decoder = A32::DecodeASIMD(instruction)) + // return *A32::GetNameASIMD(instruction); + //else if (auto const decoder = A32::DecodeArm(instruction)) + // return *A32::GetNameARM(instruction); return ""; } std::string_view GetNameOfA64Instruction(u32 instruction) { - if (auto const decoder = A64::Decode(instruction)) - return *A64::GetName(instruction); + //if (auto const decoder = A64::Decode(instruction)) + // return *A64::GetName(instruction); return ""; }