diff --git a/externals/powah/powah_emit.hpp b/externals/powah/powah_emit.hpp index 4b6f1d72e6..5af323ec5d 100644 --- a/externals/powah/powah_emit.hpp +++ b/externals/powah/powah_emit.hpp @@ -373,6 +373,7 @@ struct Context { void MFCTR(GPR const rt) { MFSPR(powah::GPR{9}, rt, powah::GPR{0}); } void MTCTR(GPR const rt) { MTSPR(powah::GPR{9}, rt, powah::GPR{0}); } void BCTRL() { base[offset++] = 0x4e800421; } //BCCTRL(R0, CR0, R0); + void BCTR() { base[offset++] = 0x4e800420; } //BCCTR(R0, CR0, R0); // TODO: PowerPC 11 stuff void ISEL(GPR const rd, GPR const ra, GPR const rb, uint32_t d) { diff --git a/src/dynarmic/src/dynarmic/backend/ppc64/a32_core.h b/src/dynarmic/src/dynarmic/backend/ppc64/a32_core.h index 19816d4c93..6545a1af4e 100644 --- a/src/dynarmic/src/dynarmic/backend/ppc64/a32_core.h +++ b/src/dynarmic/src/dynarmic/backend/ppc64/a32_core.h @@ -23,7 +23,6 @@ struct A32JitState { u32 cpsr_nzcv = 0; u32 fpscr = 0; u8 check_bit = 0; - void* run_fn = nullptr; IR::LocationDescriptor GetLocationDescriptor() const { return IR::LocationDescriptor{regs[15] | (u64(upper_location_descriptor) << 32)}; diff --git a/src/dynarmic/src/dynarmic/backend/ppc64/a32_interface.cpp b/src/dynarmic/src/dynarmic/backend/ppc64/a32_interface.cpp index 1f01592005..c99f92dc29 100644 --- a/src/dynarmic/src/dynarmic/backend/ppc64/a32_interface.cpp +++ b/src/dynarmic/src/dynarmic/backend/ppc64/a32_interface.cpp @@ -73,7 +73,6 @@ struct A32Core final { auto const loc = thread_ctx.GetLocationDescriptor(); auto const entry = process.GetOrEmit(loc); using CodeFn = HaltReason (*)(A32AddressSpace*, A32JitState*, volatile u32*, void*); - thread_ctx.run_fn = (void*)&A32Core::Run; return (CodeFn(entry))(&process, &thread_ctx, halt_reason, reinterpret_cast(&A32Core::Run)); } }; diff --git a/src/dynarmic/src/dynarmic/backend/ppc64/a64_core.h b/src/dynarmic/src/dynarmic/backend/ppc64/a64_core.h index 0d5fc6eace..a9226759dd 100644 --- a/src/dynarmic/src/dynarmic/backend/ppc64/a64_core.h +++ b/src/dynarmic/src/dynarmic/backend/ppc64/a64_core.h @@ -29,7 +29,6 @@ struct A64JitState { u32 fpsr = 0; volatile u32 halt_reason = 0; u8 check_bit = 0; - void* run_fn = nullptr; IR::LocationDescriptor GetLocationDescriptor() const { const u64 fpcr_u64 = u64(fpcr & A64::LocationDescriptor::fpcr_mask) << A64::LocationDescriptor::fpcr_shift; diff --git a/src/dynarmic/src/dynarmic/backend/ppc64/a64_interface.cpp b/src/dynarmic/src/dynarmic/backend/ppc64/a64_interface.cpp index f07b8ef1be..7573531f8d 100644 --- a/src/dynarmic/src/dynarmic/backend/ppc64/a64_interface.cpp +++ b/src/dynarmic/src/dynarmic/backend/ppc64/a64_interface.cpp @@ -71,13 +71,18 @@ struct A64AddressSpace final { ankerl::unordered_dense::map block_infos; }; +__attribute__((noinline)) HaltReason test_thunk(A64AddressSpace* a, A64JitState* b, volatile u32* c, void* d) { + printf("%p,%p,%p,%p\n", a, b, c, d); + return HaltReason::UserDefined2; +} + struct A64Core final { static HaltReason Run(A64AddressSpace& process, A64JitState& thread_ctx, volatile u32* halt_reason) { const auto loc = thread_ctx.GetLocationDescriptor(); const auto entry = process.GetOrEmit(loc); - using CodeFn = HaltReason (*)(A64AddressSpace*, A64JitState*, volatile u32*, void*); - thread_ctx.run_fn = (void*)&A64Core::Run; - return (CodeFn(entry))(&process, &thread_ctx, halt_reason, reinterpret_cast(&A64Core::Run)); + using AbstractCodeFn = HaltReason (*)(A64AddressSpace*, A64JitState*, volatile u32*, void (*fn)()); + using CodeFn = HaltReason (*)(A64AddressSpace*, A64JitState*, volatile u32*, AbstractCodeFn fn); + return (CodeFn(entry))(&process, &thread_ctx, halt_reason, AbstractCodeFn(test_thunk)); } }; diff --git a/src/dynarmic/src/dynarmic/backend/ppc64/abi.h b/src/dynarmic/src/dynarmic/backend/ppc64/abi.h index 51af0d73b7..cc2fd63a08 100644 --- a/src/dynarmic/src/dynarmic/backend/ppc64/abi.h +++ b/src/dynarmic/src/dynarmic/backend/ppc64/abi.h @@ -46,7 +46,10 @@ vrsave Non-volatile 32-bit register constexpr powah::GPR RPROCESS = powah::R3; constexpr powah::GPR RJIT = powah::R4; constexpr powah::GPR RHALTREASON = powah::R5; -constexpr powah::GPR RNZCV = powah::R31; +constexpr powah::GPR RTOCPTR = powah::R6; +// temporals +constexpr powah::GPR RNZCV = powah::R7; +constexpr powah::GPR RCHECKBIT = powah::R8; constexpr powah::GPR ABI_PARAM1 = powah::R3; constexpr powah::GPR ABI_PARAM2 = powah::R4; @@ -55,7 +58,6 @@ constexpr powah::GPR ABI_PARAM4 = powah::R6; // See https://refspecs.linuxfoundation.org/ELF/ppc64/PPC-elf64abi.html#REG constexpr std::initializer_list GPR_ORDER{ - //6, 7, 8, 9, 10, 11, 12, //volatile // r13 is thread-id 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31 //non-volatile }; diff --git a/src/dynarmic/src/dynarmic/backend/ppc64/emit_ppc64.cpp b/src/dynarmic/src/dynarmic/backend/ppc64/emit_ppc64.cpp index 38e40af3f5..d58fe5003d 100644 --- a/src/dynarmic/src/dynarmic/backend/ppc64/emit_ppc64.cpp +++ b/src/dynarmic/src/dynarmic/backend/ppc64/emit_ppc64.cpp @@ -159,10 +159,11 @@ void EmitTerminal(powah::Context& code, EmitContext& ctx, IR::Term::LinkBlock te auto const tmp = ctx.reg_alloc.ScratchGpr(); code.LI(tmp, terminal.next.Value()); code.STD(tmp, PPC64::RJIT, offsetof(A64JitState, pc)); - code.LD(tmp, PPC64::RJIT, offsetof(A64JitState, run_fn)); + code.LD(tmp, PPC64::RTOCPTR, 0); code.MTCTR(tmp); - code.BCTRL(); - code.LD(powah::R2, powah::R1, offsetof(StackLayout, sp)); + code.LD(powah::R2, PPC64::RTOCPTR, 8); + code.LD(powah::R11, PPC64::RTOCPTR, 16); + code.BCTR(); } else { auto const tmp = ctx.reg_alloc.ScratchGpr(); code.LI(tmp, terminal.next.Value()); @@ -191,7 +192,7 @@ void EmitTerminal(powah::Context& code, EmitContext& ctx, IR::Term::CheckBit ter powah::Label const l_else = code.DefineLabel(); powah::Label const l_end = code.DefineLabel(); auto const tmp = ctx.reg_alloc.ScratchGpr(); - code.LD(tmp, powah::R1, offsetof(StackLayout, check_bit)); + code.MR(tmp, PPC64::RCHECKBIT); code.CMPLDI(tmp, 0); code.BEQ(powah::CR0, l_else); // CheckBit == 1 @@ -219,51 +220,53 @@ EmittedBlockInfo EmitPPC64(powah::Context& code, IR::Block block, const EmitConf RegAlloc reg_alloc{code}; EmitContext ctx{block, reg_alloc, emit_conf, ebi}; + size_t const stack_size = 112 + ABI_CALLEE_SAVED.size() * 8; auto const start_offset = code.offset; ebi.entry_point = &code.base[start_offset]; - - code.MFLR(powah::R0); - code.STD(powah::R0, powah::R1, offsetof(StackLayout, lr)); - // Non-volatile saves - std::vector abi_callee_saved{ABI_CALLEE_SAVED}; - for (size_t i = 0; i < abi_callee_saved.size(); ++i) - code.STD(abi_callee_saved[i], powah::R1, -(8 + i * 8)); - code.STDU(powah::R1, powah::R1, -sizeof(StackLayout)); - code.STD(powah::R2, powah::R1, offsetof(StackLayout, sp)); - - for (auto iter = block.begin(); iter != block.end(); ++iter) { - IR::Inst* inst = &*iter; - switch (inst->GetOpcode()) { + if (!block.empty()) { + code.MFLR(powah::R0); + code.STD(powah::R0, powah::R1, 16); + // Non-volatile saves + std::vector gp_regs{ABI_CALLEE_SAVED}; + for (size_t i = 0; i < gp_regs.size(); ++i) + code.STD(gp_regs[i], powah::R1, -int32_t(gp_regs.size() - i) * 8); + code.STDU(powah::R1, powah::R1, uint32_t(-stack_size)); + code.STD(powah::R2, powah::R1, 40); + + for (auto iter = block.begin(); iter != block.end(); ++iter) { + IR::Inst* inst = &*iter; + switch (inst->GetOpcode()) { #define OPCODE(name, type, ...) \ - case IR::Opcode::name: \ - EmitIR(code, ctx, inst); \ - break; + case IR::Opcode::name: \ + EmitIR(code, ctx, inst); \ + break; #define A32OPC(name, type, ...) \ - case IR::Opcode::A32##name: \ - EmitIR(code, ctx, inst); \ - break; + case IR::Opcode::A32##name: \ + EmitIR(code, ctx, inst); \ + break; #define A64OPC(name, type, ...) \ - case IR::Opcode::A64##name: \ - EmitIR(code, ctx, inst); \ - break; + case IR::Opcode::A64##name: \ + EmitIR(code, ctx, inst); \ + break; #include "dynarmic/ir/opcodes.inc" #undef OPCODE #undef A32OPC #undef A64OPC - default: - UNREACHABLE(); + default: + UNREACHABLE(); + } } - } - - // auto const cycles_to_add = block.CycleCount(); - EmitTerminal(code, ctx, ctx.block.GetTerminal(), ctx.block.Location(), false); - code.ADDI(powah::R1, powah::R1, sizeof(StackLayout)); - code.LD(powah::R0, powah::R1, offsetof(StackLayout, lr)); - code.MTLR(powah::R0); - for (size_t i = 0; i < abi_callee_saved.size(); ++i) - code.LD(abi_callee_saved[i], powah::R1, -(8 + i * 8)); - code.BLR(); + // auto const cycles_to_add = block.CycleCount(); + code.ADDI(powah::R1, powah::R1, stack_size); + for (size_t i = 0; i < gp_regs.size(); ++i) + code.LD(gp_regs[i], powah::R1, -int32_t(gp_regs.size() - i) * 8); + code.LD(powah::R0, powah::R1, 16); + code.MTLR(powah::R0); + EmitTerminal(code, ctx, ctx.block.GetTerminal(), ctx.block.Location(), false); + } else { + EmitTerminal(code, ctx, ctx.block.GetTerminal(), ctx.block.Location(), false); + } code.ApplyRelocs(); /* diff --git a/src/dynarmic/src/dynarmic/backend/ppc64/emit_ppc64_a64.cpp b/src/dynarmic/src/dynarmic/backend/ppc64/emit_ppc64_a64.cpp index b341373d61..efe7c734fe 100644 --- a/src/dynarmic/src/dynarmic/backend/ppc64/emit_ppc64_a64.cpp +++ b/src/dynarmic/src/dynarmic/backend/ppc64/emit_ppc64_a64.cpp @@ -19,7 +19,7 @@ namespace Dynarmic::Backend::PPC64 { template<> void EmitIR(powah::Context& code, EmitContext& ctx, IR::Inst* inst) { auto const value = ctx.reg_alloc.UseGpr(inst->GetArg(0)); - code.STD(value, powah::R1, offsetof(StackLayout, check_bit)); + code.MR(PPC64::RCHECKBIT, value); } template<> diff --git a/src/dynarmic/src/dynarmic/backend/ppc64/reg_alloc.h b/src/dynarmic/src/dynarmic/backend/ppc64/reg_alloc.h index a8122bfb92..ff95193ee7 100644 --- a/src/dynarmic/src/dynarmic/backend/ppc64/reg_alloc.h +++ b/src/dynarmic/src/dynarmic/backend/ppc64/reg_alloc.h @@ -49,15 +49,15 @@ struct RegAlloc; /// this basically means that we can use temporals and not need to go thru /// any weird deallocation stuffs :) template struct RegLock { - inline RegLock(RegAlloc& reg_alloc, T const value) noexcept + constexpr RegLock(RegAlloc& reg_alloc, T const value) noexcept : reg_alloc{reg_alloc} , value{value} { SetLock(true); } inline ~RegLock() noexcept { SetLock(false); } - operator T const&() { return value; } - operator T() const { return value; } + constexpr operator T const&() noexcept { return value; } + constexpr operator T() const noexcept { return value; } inline void SetLock(bool v) noexcept; RegAlloc& reg_alloc; const T value; @@ -94,7 +94,6 @@ private: std::array fprs; std::array vprs; std::array spills; - uint32_t lru_counter = 0; }; template<> inline void RegLock::SetLock(bool v) noexcept { diff --git a/src/dynarmic/src/dynarmic/backend/ppc64/stack_layout.h b/src/dynarmic/src/dynarmic/backend/ppc64/stack_layout.h index d672ccea6b..ec5334154b 100644 --- a/src/dynarmic/src/dynarmic/backend/ppc64/stack_layout.h +++ b/src/dynarmic/src/dynarmic/backend/ppc64/stack_layout.h @@ -12,12 +12,7 @@ namespace Dynarmic::Backend::PPC64 { constexpr size_t SpillCount = 16; struct alignas(16) StackLayout { - u64 resv0; //0 - u64 resv1; //8 - u64 lr; //16 - u64 sp; //24 std::array spill; - u64 check_bit; }; static_assert(sizeof(StackLayout) % 16 == 0); diff --git a/src/dynarmic/src/dynarmic/ir/basic_block.cpp b/src/dynarmic/src/dynarmic/ir/basic_block.cpp index ae9e0b103b..35f2f1ac9f 100644 --- a/src/dynarmic/src/dynarmic/ir/basic_block.cpp +++ b/src/dynarmic/src/dynarmic/ir/basic_block.cpp @@ -22,10 +22,10 @@ namespace Dynarmic::IR { -Block::Block(const LocationDescriptor& location) - : location{location}, - end_location{location}, - cond{Cond::AL} +Block::Block(const LocationDescriptor& location) noexcept : + location{location} + , end_location{location} + , cond{Cond::AL} { } diff --git a/src/dynarmic/src/dynarmic/ir/basic_block.h b/src/dynarmic/src/dynarmic/ir/basic_block.h index dd978da68d..0bc72b6da7 100644 --- a/src/dynarmic/src/dynarmic/ir/basic_block.h +++ b/src/dynarmic/src/dynarmic/ir/basic_block.h @@ -44,8 +44,8 @@ public: using reverse_iterator = instruction_list_type::reverse_iterator; using const_reverse_iterator = instruction_list_type::const_reverse_iterator; - explicit Block(const LocationDescriptor& location); - ~Block() = default; + explicit Block(const LocationDescriptor& location) noexcept; + ~Block() noexcept = default; Block(const Block&) = delete; Block& operator=(const Block&) = delete; Block(Block&&) = default;