Browse Source

[dynarmic] fix ODR violations

Signed-off-by: lizzie <lizzie@eden-emu.dev>
lizzie/improve-odr-dynarmic
lizzie 1 week ago
committed by crueter
parent
commit
d54346dd38
  1. 4
      src/dynarmic/src/dynarmic/backend/arm64/emit_arm64_data_processing.cpp
  2. 22
      src/dynarmic/src/dynarmic/backend/arm64/emit_arm64_memory.cpp
  3. 4
      src/dynarmic/src/dynarmic/backend/riscv64/emit_riscv64_data_processing.cpp
  4. 4
      src/dynarmic/src/dynarmic/backend/x64/emit_x64_data_processing.cpp
  5. 28
      src/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.h
  6. 48
      src/dynarmic/src/dynarmic/backend/x64/emit_x64_saturation.cpp
  7. 2
      src/dynarmic/src/dynarmic/backend/x64/emit_x64_vector.cpp
  8. 36
      src/dynarmic/src/dynarmic/backend/x64/emit_x64_vector_saturation.cpp
  9. 6
      src/dynarmic/src/dynarmic/common/fp/fused.cpp
  10. 2
      src/dynarmic/src/dynarmic/common/u128.cpp
  11. 15
      src/dynarmic/src/dynarmic/common/u128.h
  12. 6
      src/dynarmic/src/dynarmic/ir/ir_emitter.h
  13. 4
      src/dynarmic/src/dynarmic/ir/opcodes.h
  14. 5
      src/dynarmic/src/dynarmic/ir/opcodes.inc
  15. 4
      src/dynarmic/src/dynarmic/ir/opt_passes.cpp

4
src/dynarmic/src/dynarmic/backend/arm64/emit_arm64_data_processing.cpp

@ -642,7 +642,7 @@ void EmitIR<IR::Opcode::ArithmeticShiftRight64>(oaknut::CodeGenerator& code, Emi
}
template<>
void EmitIR<IR::Opcode::RotateRight32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
void EmitIR<IR::Opcode::BitRotateRight32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
const auto carry_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp);
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
@ -708,7 +708,7 @@ void EmitIR<IR::Opcode::RotateRight32>(oaknut::CodeGenerator& code, EmitContext&
}
template<>
void EmitIR<IR::Opcode::RotateRight64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
void EmitIR<IR::Opcode::BitRotateRight64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto& operand_arg = args[0];
auto& shift_arg = args[1];

22
src/dynarmic/src/dynarmic/backend/arm64/emit_arm64_memory.cpp

@ -209,9 +209,9 @@ void CallbackOnlyEmitExclusiveWriteMemory(oaknut::CodeGenerator& code, EmitConte
ctx.reg_alloc.DefineAsRegister(inst, X0);
}
constexpr size_t page_bits = 12;
constexpr size_t page_size = 1 << page_bits;
constexpr size_t page_mask = (1 << page_bits) - 1;
constexpr size_t page_table_const_bits = 12;
constexpr size_t page_table_const_size = 1 << page_table_const_bits;
constexpr size_t page_table_const_mask = (1 << page_table_const_bits) - 1;
// This function may use Xscratch0 as a scratch register
// Trashes NZCV
@ -242,28 +242,28 @@ void EmitDetectMisalignedVAddr(oaknut::CodeGenerator& code, EmitContext& ctx, oa
code.TST(Xaddr, align_mask);
code.B(NE, *fallback);
} else {
// If (addr & page_mask) > page_size - byte_size, use fallback.
code.AND(Xscratch0, Xaddr, page_mask);
code.CMP(Xscratch0, page_size - bitsize / 8);
// If (addr & page_table_const_mask) > page_table_const_size - byte_size, use fallback.
code.AND(Xscratch0, Xaddr, page_table_const_mask);
code.CMP(Xscratch0, page_table_const_size - bitsize / 8);
code.B(HI, *fallback);
}
}
// Outputs Xscratch0 = page_table[addr >> page_bits]
// Outputs Xscratch0 = page_table[addr >> page_table_const_bits]
// May use Xscratch1 as scratch register
// Address to read/write = [ret0 + ret1], ret0 is always Xscratch0 and ret1 is either Xaddr or Xscratch1
// Trashes NZCV
template<size_t bitsize>
std::pair<oaknut::XReg, oaknut::XReg> InlinePageTableEmitVAddrLookup(oaknut::CodeGenerator& code, EmitContext& ctx, oaknut::XReg Xaddr, const SharedLabel& fallback) {
const size_t valid_page_index_bits = ctx.conf.page_table_address_space_bits - page_bits;
const size_t valid_page_index_bits = ctx.conf.page_table_address_space_bits - page_table_const_bits;
const size_t unused_top_bits = 64 - ctx.conf.page_table_address_space_bits;
EmitDetectMisalignedVAddr<bitsize>(code, ctx, Xaddr, fallback);
if (ctx.conf.silently_mirror_page_table || unused_top_bits == 0) {
code.UBFX(Xscratch0, Xaddr, page_bits, valid_page_index_bits);
code.UBFX(Xscratch0, Xaddr, page_table_const_bits, valid_page_index_bits);
} else {
code.LSR(Xscratch0, Xaddr, page_bits);
code.LSR(Xscratch0, Xaddr, page_table_const_bits);
code.TST(Xscratch0, u64(~u64(0)) << valid_page_index_bits);
code.B(NE, *fallback);
}
@ -283,7 +283,7 @@ std::pair<oaknut::XReg, oaknut::XReg> InlinePageTableEmitVAddrLookup(oaknut::Cod
if (ctx.conf.absolute_offset_page_table) {
return std::make_pair(Xscratch0, Xaddr);
}
code.AND(Xscratch1, Xaddr, page_mask);
code.AND(Xscratch1, Xaddr, page_table_const_mask);
return std::make_pair(Xscratch0, Xscratch1);
}

4
src/dynarmic/src/dynarmic/backend/riscv64/emit_riscv64_data_processing.cpp

@ -164,12 +164,12 @@ void EmitIR<IR::Opcode::ArithmeticShiftRight64>(biscuit::Assembler&, EmitContext
}
template<>
void EmitIR<IR::Opcode::RotateRight32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
void EmitIR<IR::Opcode::BitRotateRight32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}
template<>
void EmitIR<IR::Opcode::RotateRight64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
void EmitIR<IR::Opcode::BitRotateRight64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
UNIMPLEMENTED();
}

4
src/dynarmic/src/dynarmic/backend/x64/emit_x64_data_processing.cpp

@ -663,7 +663,7 @@ void EmitX64::EmitArithmeticShiftRight64(EmitContext& ctx, IR::Inst* inst) {
}
}
void EmitX64::EmitRotateRight32(EmitContext& ctx, IR::Inst* inst) {
void EmitX64::EmitBitRotateRight32(EmitContext& ctx, IR::Inst* inst) {
const auto carry_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp);
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
@ -736,7 +736,7 @@ void EmitX64::EmitRotateRight32(EmitContext& ctx, IR::Inst* inst) {
}
}
void EmitX64::EmitRotateRight64(EmitContext& ctx, IR::Inst* inst) {
void EmitX64::EmitBitRotateRight64(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto& operand_arg = args[0];
auto& shift_arg = args[1];

28
src/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.h

@ -6,6 +6,8 @@
* SPDX-License-Identifier: 0BSD
*/
#pragma once
#include <bit>
#include "dynarmic/backend/x64/xbyak.h"
@ -22,9 +24,9 @@ namespace {
using namespace Xbyak::util;
constexpr size_t page_bits = 12;
constexpr size_t page_size = 1 << page_bits;
constexpr size_t page_mask = (1 << page_bits) - 1;
constexpr size_t page_table_const_bits = 12;
constexpr size_t page_table_const_size = 1 << page_table_const_bits;
constexpr size_t page_table_const_mask = (1 << page_table_const_bits) - 1;
template<typename EmitContext>
void EmitDetectMisalignedVAddr(BlockOfCode& code, EmitContext& ctx, size_t bitsize, Xbyak::Label& abort, Xbyak::Reg64 vaddr, Xbyak::Reg64 tmp) {
@ -50,7 +52,7 @@ void EmitDetectMisalignedVAddr(BlockOfCode& code, EmitContext& ctx, size_t bitsi
code.test(vaddr, align_mask);
if (ctx.conf.only_detect_misalignment_via_page_table_on_page_boundary) {
const u32 page_align_mask = static_cast<u32>(page_size - 1) & ~align_mask;
const u32 page_align_mask = static_cast<u32>(page_table_const_size - 1) & ~align_mask;
SharedLabel detect_boundary = GenSharedLabel(), resume = GenSharedLabel();
@ -83,7 +85,7 @@ template<>
// TODO: This code assumes vaddr has been zext from 32-bits to 64-bits.
code.mov(tmp, vaddr.cvt32());
code.shr(tmp, int(page_bits));
code.shr(tmp, int(page_table_const_bits));
code.shl(tmp, int(ctx.conf.page_table_log2_stride));
code.mov(page, qword[r14 + tmp.cvt64()]);
if (ctx.conf.page_table_pointer_mask_bits == 0) {
@ -96,13 +98,13 @@ template<>
return page + vaddr;
}
code.mov(tmp, vaddr.cvt32());
code.and_(tmp, static_cast<u32>(page_mask));
code.and_(tmp, static_cast<u32>(page_table_const_mask));
return page + tmp.cvt64();
}
template<>
[[maybe_unused]] Xbyak::RegExp EmitVAddrLookup<A64EmitContext>(BlockOfCode& code, A64EmitContext& ctx, size_t bitsize, Xbyak::Label& abort, Xbyak::Reg64 vaddr) {
const size_t valid_page_index_bits = ctx.conf.page_table_address_space_bits - page_bits;
const size_t valid_page_index_bits = ctx.conf.page_table_address_space_bits - page_table_const_bits;
const size_t unused_top_bits = 64 - ctx.conf.page_table_address_space_bits;
const Xbyak::Reg64 page = ctx.reg_alloc.ScratchGpr(code);
@ -112,29 +114,29 @@ template<>
if (unused_top_bits == 0) {
code.mov(tmp, vaddr);
code.shr(tmp, int(page_bits));
code.shr(tmp, int(page_table_const_bits));
} else if (ctx.conf.silently_mirror_page_table) {
if (valid_page_index_bits >= 32) {
if (code.HasHostFeature(HostFeature::BMI2)) {
const Xbyak::Reg64 bit_count = ctx.reg_alloc.ScratchGpr(code);
code.mov(bit_count, unused_top_bits);
code.bzhi(tmp, vaddr, bit_count);
code.shr(tmp, int(page_bits));
code.shr(tmp, int(page_table_const_bits));
ctx.reg_alloc.Release(bit_count);
} else {
code.mov(tmp, vaddr);
code.shl(tmp, int(unused_top_bits));
code.shr(tmp, int(unused_top_bits + page_bits));
code.shr(tmp, int(unused_top_bits + page_table_const_bits));
}
} else {
code.mov(tmp, vaddr);
code.shr(tmp, int(page_bits));
code.shr(tmp, int(page_table_const_bits));
code.and_(tmp, u32((1 << valid_page_index_bits) - 1));
}
} else {
ASSERT(valid_page_index_bits < 32);
code.mov(tmp, vaddr);
code.shr(tmp, int(page_bits));
code.shr(tmp, int(page_table_const_bits));
code.test(tmp, u32(-(1 << valid_page_index_bits)));
code.jnz(abort, code.T_NEAR);
}
@ -151,7 +153,7 @@ template<>
return page + vaddr;
}
code.mov(tmp, vaddr);
code.and_(tmp, static_cast<u32>(page_mask));
code.and_(tmp, static_cast<u32>(page_table_const_mask));
return page + tmp;
}

48
src/dynarmic/src/dynarmic/backend/x64/emit_x64_saturation.cpp

@ -25,12 +25,12 @@ using namespace Xbyak::util;
namespace {
enum class Op {
enum class SaturationOp {
Add,
Sub,
};
template<Op op, size_t size, bool has_overflow_inst = false>
template<SaturationOp op, size_t size, bool has_overflow_inst = false>
void EmitSignedSaturatedOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
@ -51,7 +51,7 @@ void EmitSignedSaturatedOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst)
// overflow now contains 0x7F... if a was positive, or 0x80... if a was negative
if constexpr (op == Op::Add) {
if constexpr (op == SaturationOp::Add) {
code.add(result, addend);
} else {
code.sub(result, addend);
@ -75,16 +75,16 @@ void EmitSignedSaturatedOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst)
ctx.reg_alloc.DefineValue(code, inst, result);
}
template<Op op, size_t size>
template<SaturationOp op, size_t size>
void EmitUnsignedSaturatedOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
Xbyak::Reg op_result = ctx.reg_alloc.UseScratchGpr(code, args[0]).changeBit(size);
Xbyak::Reg addend = ctx.reg_alloc.UseScratchGpr(code, args[1]).changeBit(size);
constexpr u64 boundary = op == Op::Add ? (std::numeric_limits<mcl::unsigned_integer_of_size<size>>::max)() : 0;
constexpr u64 boundary = op == SaturationOp::Add ? (std::numeric_limits<mcl::unsigned_integer_of_size<size>>::max)() : 0;
if constexpr (op == Op::Add) {
if constexpr (op == SaturationOp::Add) {
code.add(op_result, addend);
} else {
code.sub(op_result, addend);
@ -106,11 +106,11 @@ void EmitUnsignedSaturatedOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst
} // anonymous namespace
void EmitX64::EmitSignedSaturatedAddWithFlag32(EmitContext& ctx, IR::Inst* inst) {
EmitSignedSaturatedOp<Op::Add, 32, true>(code, ctx, inst);
EmitSignedSaturatedOp<SaturationOp::Add, 32, true>(code, ctx, inst);
}
void EmitX64::EmitSignedSaturatedSubWithFlag32(EmitContext& ctx, IR::Inst* inst) {
EmitSignedSaturatedOp<Op::Sub, 32, true>(code, ctx, inst);
EmitSignedSaturatedOp<SaturationOp::Sub, 32, true>(code, ctx, inst);
}
void EmitX64::EmitSignedSaturation(EmitContext& ctx, IR::Inst* inst) {
@ -192,19 +192,19 @@ void EmitX64::EmitUnsignedSaturation(EmitContext& ctx, IR::Inst* inst) {
}
void EmitX64::EmitSignedSaturatedAdd8(EmitContext& ctx, IR::Inst* inst) {
EmitSignedSaturatedOp<Op::Add, 8>(code, ctx, inst);
EmitSignedSaturatedOp<SaturationOp::Add, 8>(code, ctx, inst);
}
void EmitX64::EmitSignedSaturatedAdd16(EmitContext& ctx, IR::Inst* inst) {
EmitSignedSaturatedOp<Op::Add, 16>(code, ctx, inst);
EmitSignedSaturatedOp<SaturationOp::Add, 16>(code, ctx, inst);
}
void EmitX64::EmitSignedSaturatedAdd32(EmitContext& ctx, IR::Inst* inst) {
EmitSignedSaturatedOp<Op::Add, 32>(code, ctx, inst);
EmitSignedSaturatedOp<SaturationOp::Add, 32>(code, ctx, inst);
}
void EmitX64::EmitSignedSaturatedAdd64(EmitContext& ctx, IR::Inst* inst) {
EmitSignedSaturatedOp<Op::Add, 64>(code, ctx, inst);
EmitSignedSaturatedOp<SaturationOp::Add, 64>(code, ctx, inst);
}
void EmitX64::EmitSignedSaturatedDoublingMultiplyReturnHigh16(EmitContext& ctx, IR::Inst* inst) {
@ -256,51 +256,51 @@ void EmitX64::EmitSignedSaturatedDoublingMultiplyReturnHigh32(EmitContext& ctx,
}
void EmitX64::EmitSignedSaturatedSub8(EmitContext& ctx, IR::Inst* inst) {
EmitSignedSaturatedOp<Op::Sub, 8>(code, ctx, inst);
EmitSignedSaturatedOp<SaturationOp::Sub, 8>(code, ctx, inst);
}
void EmitX64::EmitSignedSaturatedSub16(EmitContext& ctx, IR::Inst* inst) {
EmitSignedSaturatedOp<Op::Sub, 16>(code, ctx, inst);
EmitSignedSaturatedOp<SaturationOp::Sub, 16>(code, ctx, inst);
}
void EmitX64::EmitSignedSaturatedSub32(EmitContext& ctx, IR::Inst* inst) {
EmitSignedSaturatedOp<Op::Sub, 32>(code, ctx, inst);
EmitSignedSaturatedOp<SaturationOp::Sub, 32>(code, ctx, inst);
}
void EmitX64::EmitSignedSaturatedSub64(EmitContext& ctx, IR::Inst* inst) {
EmitSignedSaturatedOp<Op::Sub, 64>(code, ctx, inst);
EmitSignedSaturatedOp<SaturationOp::Sub, 64>(code, ctx, inst);
}
void EmitX64::EmitUnsignedSaturatedAdd8(EmitContext& ctx, IR::Inst* inst) {
EmitUnsignedSaturatedOp<Op::Add, 8>(code, ctx, inst);
EmitUnsignedSaturatedOp<SaturationOp::Add, 8>(code, ctx, inst);
}
void EmitX64::EmitUnsignedSaturatedAdd16(EmitContext& ctx, IR::Inst* inst) {
EmitUnsignedSaturatedOp<Op::Add, 16>(code, ctx, inst);
EmitUnsignedSaturatedOp<SaturationOp::Add, 16>(code, ctx, inst);
}
void EmitX64::EmitUnsignedSaturatedAdd32(EmitContext& ctx, IR::Inst* inst) {
EmitUnsignedSaturatedOp<Op::Add, 32>(code, ctx, inst);
EmitUnsignedSaturatedOp<SaturationOp::Add, 32>(code, ctx, inst);
}
void EmitX64::EmitUnsignedSaturatedAdd64(EmitContext& ctx, IR::Inst* inst) {
EmitUnsignedSaturatedOp<Op::Add, 64>(code, ctx, inst);
EmitUnsignedSaturatedOp<SaturationOp::Add, 64>(code, ctx, inst);
}
void EmitX64::EmitUnsignedSaturatedSub8(EmitContext& ctx, IR::Inst* inst) {
EmitUnsignedSaturatedOp<Op::Sub, 8>(code, ctx, inst);
EmitUnsignedSaturatedOp<SaturationOp::Sub, 8>(code, ctx, inst);
}
void EmitX64::EmitUnsignedSaturatedSub16(EmitContext& ctx, IR::Inst* inst) {
EmitUnsignedSaturatedOp<Op::Sub, 16>(code, ctx, inst);
EmitUnsignedSaturatedOp<SaturationOp::Sub, 16>(code, ctx, inst);
}
void EmitX64::EmitUnsignedSaturatedSub32(EmitContext& ctx, IR::Inst* inst) {
EmitUnsignedSaturatedOp<Op::Sub, 32>(code, ctx, inst);
EmitUnsignedSaturatedOp<SaturationOp::Sub, 32>(code, ctx, inst);
}
void EmitX64::EmitUnsignedSaturatedSub64(EmitContext& ctx, IR::Inst* inst) {
EmitUnsignedSaturatedOp<Op::Sub, 64>(code, ctx, inst);
EmitUnsignedSaturatedOp<SaturationOp::Sub, 64>(code, ctx, inst);
}
} // namespace Dynarmic::Backend::X64

2
src/dynarmic/src/dynarmic/backend/x64/emit_x64_vector.cpp

@ -5856,3 +5856,5 @@ void EmitX64::EmitZeroVector(EmitContext& ctx, IR::Inst* inst) {
}
} // namespace Dynarmic::Backend::X64
#undef ICODE

36
src/dynarmic/src/dynarmic/backend/x64/emit_x64_vector_saturation.cpp

@ -52,12 +52,12 @@ void EmitVectorSaturatedNative(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
ctx.reg_alloc.DefineValue(code, inst, result);
}
enum class Op {
enum class VectorSaturationOp {
Add,
Sub,
};
template<Op op, size_t esize>
template<VectorSaturationOp op, size_t esize>
void EmitVectorSignedSaturated(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
static_assert(esize == 32 || esize == 64);
constexpr u64 msb_mask = esize == 32 ? 0x8000000080000000 : 0x8000000000000000;
@ -72,7 +72,7 @@ void EmitVectorSignedSaturated(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
code.movaps(xmm0, operand1);
if constexpr (op == Op::Add) {
if constexpr (op == VectorSaturationOp::Add) {
ICODE(vpadd)(result, operand1, operand2);
code.vpternlogd(xmm0, result, operand2, 0b00100100);
} else {
@ -102,7 +102,7 @@ void EmitVectorSignedSaturated(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code);
if (code.HasHostFeature(HostFeature::AVX)) {
if constexpr (op == Op::Add) {
if constexpr (op == VectorSaturationOp::Add) {
ICODE(vpadd)(result, operand1, operand2);
} else {
ICODE(vpsub)(result, operand1, operand2);
@ -112,7 +112,7 @@ void EmitVectorSignedSaturated(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
} else {
code.movaps(xmm0, operand1);
code.movaps(tmp, operand1);
if constexpr (op == Op::Add) {
if constexpr (op == VectorSaturationOp::Add) {
ICODE(padd)(result, operand2);
} else {
ICODE(psub)(result, operand2);
@ -121,7 +121,7 @@ void EmitVectorSignedSaturated(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
code.pxor(tmp, result);
}
if constexpr (op == Op::Add) {
if constexpr (op == VectorSaturationOp::Add) {
code.pandn(xmm0, tmp);
} else {
code.pand(xmm0, tmp);
@ -165,7 +165,7 @@ void EmitVectorSignedSaturated(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
}
}
template<Op op, size_t esize>
template<VectorSaturationOp op, size_t esize>
void EmitVectorUnsignedSaturated(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
static_assert(esize == 32 || esize == 64);
@ -177,7 +177,7 @@ void EmitVectorUnsignedSaturated(BlockOfCode& code, EmitContext& ctx, IR::Inst*
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code);
const Xbyak::Reg8 overflow = ctx.reg_alloc.ScratchGpr(code).cvt8();
if constexpr (op == Op::Add) {
if constexpr (op == VectorSaturationOp::Add) {
ICODE(vpadd)(result, operand1, operand2);
ICODE(vpcmpu)(k1, result, operand1, CmpInt::LessThan);
ICODE(vpternlog)(result | k1, result, result, u8(0xFF));
@ -201,7 +201,7 @@ void EmitVectorUnsignedSaturated(BlockOfCode& code, EmitContext& ctx, IR::Inst*
const Xbyak::Reg8 overflow = ctx.reg_alloc.ScratchGpr(code).cvt8();
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code);
if constexpr (op == Op::Add) {
if constexpr (op == VectorSaturationOp::Add) {
if (code.HasHostFeature(HostFeature::AVX)) {
code.vpxor(xmm0, operand1, operand2);
code.vpand(tmp, operand1, operand2);
@ -250,7 +250,7 @@ void EmitVectorUnsignedSaturated(BlockOfCode& code, EmitContext& ctx, IR::Inst*
code.setnz(overflow);
code.or_(code.byte[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], overflow);
if constexpr (op == Op::Add) {
if constexpr (op == VectorSaturationOp::Add) {
code.por(result, tmp);
ctx.reg_alloc.DefineValue(code, inst, result);
} else {
@ -270,11 +270,11 @@ void EmitX64::EmitVectorSignedSaturatedAdd16(EmitContext& ctx, IR::Inst* inst) {
}
void EmitX64::EmitVectorSignedSaturatedAdd32(EmitContext& ctx, IR::Inst* inst) {
EmitVectorSignedSaturated<Op::Add, 32>(code, ctx, inst);
EmitVectorSignedSaturated<VectorSaturationOp::Add, 32>(code, ctx, inst);
}
void EmitX64::EmitVectorSignedSaturatedAdd64(EmitContext& ctx, IR::Inst* inst) {
EmitVectorSignedSaturated<Op::Add, 64>(code, ctx, inst);
EmitVectorSignedSaturated<VectorSaturationOp::Add, 64>(code, ctx, inst);
}
void EmitX64::EmitVectorSignedSaturatedSub8(EmitContext& ctx, IR::Inst* inst) {
@ -286,11 +286,11 @@ void EmitX64::EmitVectorSignedSaturatedSub16(EmitContext& ctx, IR::Inst* inst) {
}
void EmitX64::EmitVectorSignedSaturatedSub32(EmitContext& ctx, IR::Inst* inst) {
EmitVectorSignedSaturated<Op::Sub, 32>(code, ctx, inst);
EmitVectorSignedSaturated<VectorSaturationOp::Sub, 32>(code, ctx, inst);
}
void EmitX64::EmitVectorSignedSaturatedSub64(EmitContext& ctx, IR::Inst* inst) {
EmitVectorSignedSaturated<Op::Sub, 64>(code, ctx, inst);
EmitVectorSignedSaturated<VectorSaturationOp::Sub, 64>(code, ctx, inst);
}
void EmitX64::EmitVectorUnsignedSaturatedAdd8(EmitContext& ctx, IR::Inst* inst) {
@ -302,11 +302,11 @@ void EmitX64::EmitVectorUnsignedSaturatedAdd16(EmitContext& ctx, IR::Inst* inst)
}
void EmitX64::EmitVectorUnsignedSaturatedAdd32(EmitContext& ctx, IR::Inst* inst) {
EmitVectorUnsignedSaturated<Op::Add, 32>(code, ctx, inst);
EmitVectorUnsignedSaturated<VectorSaturationOp::Add, 32>(code, ctx, inst);
}
void EmitX64::EmitVectorUnsignedSaturatedAdd64(EmitContext& ctx, IR::Inst* inst) {
EmitVectorUnsignedSaturated<Op::Add, 64>(code, ctx, inst);
EmitVectorUnsignedSaturated<VectorSaturationOp::Add, 64>(code, ctx, inst);
}
void EmitX64::EmitVectorUnsignedSaturatedSub8(EmitContext& ctx, IR::Inst* inst) {
@ -318,11 +318,11 @@ void EmitX64::EmitVectorUnsignedSaturatedSub16(EmitContext& ctx, IR::Inst* inst)
}
void EmitX64::EmitVectorUnsignedSaturatedSub32(EmitContext& ctx, IR::Inst* inst) {
EmitVectorUnsignedSaturated<Op::Sub, 32>(code, ctx, inst);
EmitVectorUnsignedSaturated<VectorSaturationOp::Sub, 32>(code, ctx, inst);
}
void EmitX64::EmitVectorUnsignedSaturatedSub64(EmitContext& ctx, IR::Inst* inst) {
EmitVectorUnsignedSaturated<Op::Sub, 64>(code, ctx, inst);
EmitVectorUnsignedSaturated<VectorSaturationOp::Sub, 64>(code, ctx, inst);
}
} // namespace Dynarmic::Backend::X64

6
src/dynarmic/src/dynarmic/common/fp/fused.cpp

@ -32,7 +32,7 @@ FPUnpacked FusedMulAdd(FPUnpacked addend, FPUnpacked op1, FPUnpacked op2) {
return std::make_tuple(exponent, value);
}();
if (product_value == 0) {
if (product_value == u128(0, 0)) {
return addend;
}
@ -52,13 +52,13 @@ FPUnpacked FusedMulAdd(FPUnpacked addend, FPUnpacked op1, FPUnpacked op2) {
}
// addend < product
const u128 result = product_value + StickyLogicalShiftRight(addend.mantissa, exp_diff - normalized_point_position);
const u128 result = product_value + StickyLogicalShiftRight(u128(addend.mantissa, 0), exp_diff - normalized_point_position);
return ReduceMantissa(product_sign, product_exponent, result);
}
// Subtraction
const u128 addend_long = u128(addend.mantissa) << normalized_point_position;
const u128 addend_long = u128(addend.mantissa, 0) << normalized_point_position;
bool result_sign;
u128 result;

2
src/dynarmic/src/dynarmic/common/u128.cpp

@ -137,7 +137,7 @@ u128 StickyLogicalShiftRight(u128 operand, int amount) {
}
if (operand.lower != 0 || operand.upper != 0) {
return u128(1);
return u128(1, 0);
}
return {};
}

15
src/dynarmic/src/dynarmic/common/u128.h

@ -1,4 +1,4 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
/* This file is part of the dynarmic project.
@ -23,22 +23,13 @@ struct u128 {
u128(u128&&) = default;
u128& operator=(const u128&) = default;
u128& operator=(u128&&) = default;
u128(u64 lower_, u64 upper_)
: lower(lower_), upper(upper_) {}
template<typename T>
/* implicit */ u128(T value)
: lower(value), upper(0) {
static_assert(std::is_integral_v<T>);
static_assert(mcl::bitsizeof<T> <= mcl::bitsizeof<u64>);
}
explicit u128(u64 lower_, u64 upper_) : lower(lower_), upper(upper_) {}
u64 lower = 0;
u64 upper = 0;
template<size_t bit_position>
bool Bit() const {
[[nodiscard]] inline bool Bit() const {
static_assert(bit_position < 128);
if constexpr (bit_position < 64) {
return mcl::bit::get_bit<bit_position>(lower);

6
src/dynarmic/src/dynarmic/ir/ir_emitter.h

@ -228,7 +228,7 @@ public:
}
ResultAndCarry<U32> RotateRight(const U32& value_in, const U8& shift_amount, const U1& carry_in) {
const auto result = Inst<U32>(Opcode::RotateRight32, value_in, shift_amount, carry_in);
const auto result = Inst<U32>(Opcode::BitRotateRight32, value_in, shift_amount, carry_in);
const auto carry_out = Inst<U1>(Opcode::GetCarryFromOp, result);
return {result, carry_out};
}
@ -265,9 +265,9 @@ public:
U32U64 RotateRight(const U32U64& value_in, const U8& shift_amount) {
if (value_in.GetType() == Type::U32) {
return Inst<U32>(Opcode::RotateRight32, value_in, shift_amount, Imm1(0));
return Inst<U32>(Opcode::BitRotateRight32, value_in, shift_amount, Imm1(0));
} else {
return Inst<U64>(Opcode::RotateRight64, value_in, shift_amount);
return Inst<U64>(Opcode::BitRotateRight64, value_in, shift_amount);
}
}

4
src/dynarmic/src/dynarmic/ir/opcodes.h

@ -45,8 +45,8 @@ constexpr bool IsArithmeticShift(const Opcode op) noexcept {
/// @brief Determines whether or not this instruction performs a logical shift.
constexpr bool IsCircularShift(const Opcode op) noexcept {
return op == Opcode::RotateRight32
|| op == Opcode::RotateRight64
return op == Opcode::BitRotateRight32
|| op == Opcode::BitRotateRight64
|| op == Opcode::RotateRightExtended;
}

5
src/dynarmic/src/dynarmic/ir/opcodes.inc

@ -46,8 +46,9 @@ OPCODE(LogicalShiftRight32, U32, U32,
OPCODE(LogicalShiftRight64, U64, U64, U8 )
OPCODE(ArithmeticShiftRight32, U32, U32, U8, U1 )
OPCODE(ArithmeticShiftRight64, U64, U64, U8 )
OPCODE(RotateRight32, U32, U32, U8, U1 )
OPCODE(RotateRight64, U64, U64, U8 )
// windows.h defines RotateRight64 and RotateRight32
OPCODE(BitRotateRight32, U32, U32, U8, U1 )
OPCODE(BitRotateRight64, U64, U64, U8 )
OPCODE(RotateRightExtended, U32, U32, U1 )
OPCODE(LogicalShiftLeftMasked32, U32, U32, U32 )
OPCODE(LogicalShiftLeftMasked64, U64, U64, U64 )

4
src/dynarmic/src/dynarmic/ir/opt_passes.cpp

@ -1072,12 +1072,12 @@ static void ConstantPropagation(IR::Block& block) {
ReplaceUsesWith(inst, false, Safe::ArithmeticShiftRight<u64>(inst.GetArg(0).GetU64(), inst.GetArg(1).GetU8()));
}
break;
case Op::RotateRight32:
case Op::BitRotateRight32:
if (FoldShifts(inst)) {
ReplaceUsesWith(inst, true, mcl::bit::rotate_right<u32>(inst.GetArg(0).GetU32(), inst.GetArg(1).GetU8()));
}
break;
case Op::RotateRight64:
case Op::BitRotateRight64:
if (FoldShifts(inst)) {
ReplaceUsesWith(inst, false, mcl::bit::rotate_right<u64>(inst.GetArg(0).GetU64(), inst.GetArg(1).GetU8()));
}

Loading…
Cancel
Save