From c95282b2da9307d47fa155a88be59f69ee19acad Mon Sep 17 00:00:00 2001 From: lizzie Date: Sat, 18 Oct 2025 20:08:13 +0000 Subject: [PATCH] remove mcl integer_of_size Signed-off-by: lizzie --- .../emit_arm64_vector_floating_point.cpp | 1 - .../backend/x64/a32_emit_x64_memory.cpp | 1 - .../src/dynarmic/backend/x64/a64_emit_x64.cpp | 1 - .../backend/x64/a64_emit_x64_memory.cpp | 1 - .../backend/x64/emit_x64_floating_point.cpp | 150 ++++++++---------- .../backend/x64/emit_x64_memory.cpp.inc | 4 +- .../backend/x64/emit_x64_saturation.cpp | 5 +- .../x64/emit_x64_vector_floating_point.cpp | 37 +++-- src/dynarmic/src/dynarmic/common/fp/util.h | 11 ++ 9 files changed, 101 insertions(+), 110 deletions(-) diff --git a/src/dynarmic/src/dynarmic/backend/arm64/emit_arm64_vector_floating_point.cpp b/src/dynarmic/src/dynarmic/backend/arm64/emit_arm64_vector_floating_point.cpp index 13ed856156..09127df1c7 100644 --- a/src/dynarmic/src/dynarmic/backend/arm64/emit_arm64_vector_floating_point.cpp +++ b/src/dynarmic/src/dynarmic/backend/arm64/emit_arm64_vector_floating_point.cpp @@ -14,7 +14,6 @@ #include #include #include -#include #include #include "dynarmic/backend/arm64/a32_jitstate.h" diff --git a/src/dynarmic/src/dynarmic/backend/x64/a32_emit_x64_memory.cpp b/src/dynarmic/src/dynarmic/backend/x64/a32_emit_x64_memory.cpp index a1fca21f47..23f585af21 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/a32_emit_x64_memory.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/a32_emit_x64_memory.cpp @@ -10,7 +10,6 @@ #include #include -#include #include #include "dynarmic/backend/x64/a32_emit_x64.h" diff --git a/src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.cpp b/src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.cpp index b5da18701c..e897cef7cb 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.cpp @@ -13,7 +13,6 @@ #include "dynarmic/common/assert.h" #include #include "dynarmic/common/common_types.h" -#include #include #include "dynarmic/backend/x64/a64_jitstate.h" diff --git a/src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64_memory.cpp b/src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64_memory.cpp index 8fd6777542..4cd6279ec1 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64_memory.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64_memory.cpp @@ -10,7 +10,6 @@ #include #include -#include #include #include "dynarmic/backend/x64/a64_emit_x64.h" diff --git a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_floating_point.cpp b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_floating_point.cpp index a2fe4001a9..5331942bf2 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_floating_point.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_floating_point.cpp @@ -18,7 +18,6 @@ #include #include #include "dynarmic/common/common_types.h" -#include #include #include "dynarmic/backend/x64/abi.h" @@ -38,7 +37,7 @@ #define FCODE(NAME) \ [&code](auto... args) { \ - if constexpr (fsize == 32) { \ + if (fsize == 32) { \ code.NAME##s(args...); \ } else { \ code.NAME##d(args...); \ @@ -46,7 +45,7 @@ } #define ICODE(NAME) \ [&code](auto... args) { \ - if constexpr (fsize == 32) { \ + if (fsize == 32) { \ code.NAME##d(args...); \ } else { \ code.NAME##q(args...); \ @@ -105,7 +104,7 @@ void ForceDenormalsToZero(BlockOfCode& code, std::initializer_list t for (const Xbyak::Xmm& xmm : to_daz) { code.movaps(xmm0, code.Const(xword, fsize == 32 ? f32_non_sign_mask : f64_non_sign_mask)); code.andps(xmm0, xmm); - if constexpr (fsize == 32) { + if (fsize == 32) { code.pcmpgtd(xmm0, code.Const(xword, f32_smallest_normal - 1)); } else if (code.HasHostFeature(HostFeature::SSE42)) { code.pcmpgtq(xmm0, code.Const(xword, f64_smallest_normal - 1)); @@ -120,13 +119,11 @@ void ForceDenormalsToZero(BlockOfCode& code, std::initializer_list t template void DenormalsAreZero(BlockOfCode& code, EmitContext& ctx, std::initializer_list to_daz) { - if (ctx.FPCR().FZ()) { + if (ctx.FPCR().FZ()) ForceDenormalsToZero(code, to_daz); - } } -template -void ZeroIfNaN(BlockOfCode& code, Xbyak::Xmm xmm_value, Xbyak::Xmm xmm_scratch) { +void ZeroIfNaN(BlockOfCode& code, Xbyak::Xmm xmm_value, Xbyak::Xmm xmm_scratch, size_t fsize) { if (code.HasHostFeature(HostFeature::AVX512_OrthoFloat)) { constexpr u32 nan_to_zero = FixupLUT(FpFixup::PosZero, FpFixup::PosZero); @@ -141,8 +138,7 @@ void ZeroIfNaN(BlockOfCode& code, Xbyak::Xmm xmm_value, Xbyak::Xmm xmm_scratch) } } -template -void ForceToDefaultNaN(BlockOfCode& code, Xbyak::Xmm result) { +void ForceToDefaultNaN(BlockOfCode& code, Xbyak::Xmm result, size_t fsize) { if (code.HasHostFeature(HostFeature::AVX512_OrthoFloat)) { const Xbyak::Opmask nan_mask = k1; FCODE(vfpclasss)(nan_mask, result, u8(FpClass::QNaN | FpClass::SNaN)); @@ -208,7 +204,7 @@ void PostProcessNaN(BlockOfCode& code, Xbyak::Xmm result, Xbyak::Xmm tmp) { // We allow for the case where op1 and result are the same register. We do not read from op1 once result is written to. template void EmitPostProcessNaNs(BlockOfCode& code, Xbyak::Xmm result, Xbyak::Xmm op1, Xbyak::Xmm op2, Xbyak::Reg64 tmp, Xbyak::Label end) { - using FPT = mcl::unsigned_integer_of_size; + using FPT = UnsignedIntegerN; constexpr FPT exponent_mask = FP::FPInfo::exponent_mask; constexpr FPT mantissa_msb = FP::FPInfo::mantissa_msb; constexpr u8 mantissa_msb_bit = static_cast(FP::FPInfo::explicit_mantissa_width - 1); @@ -236,7 +232,7 @@ void EmitPostProcessNaNs(BlockOfCode& code, Xbyak::Xmm result, Xbyak::Xmm op1, X } constexpr size_t shift = fsize == 32 ? 0 : 48; - if constexpr (fsize == 32) { + if (fsize == 32) { code.movd(tmp.cvt32(), xmm0); } else { // We do this to avoid requiring 64-bit immediates @@ -252,7 +248,7 @@ void EmitPostProcessNaNs(BlockOfCode& code, Xbyak::Xmm result, Xbyak::Xmm op1, X // op1 == QNaN && op2 == SNaN <<< The problematic case // op1 == QNaN && op2 == Inf - if constexpr (fsize == 32) { + if (fsize == 32) { code.movd(tmp.cvt32(), op2); code.shl(tmp.cvt32(), 32 - mantissa_msb_bit); } else { @@ -283,7 +279,7 @@ void FPTwoOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) { if (!ctx.FPCR().DN() && !ctx.HasOptimization(OptimizationFlag::Unsafe_InaccurateNaN)) { end = ProcessNaN(code, ctx, result); } - if constexpr (std::is_member_function_pointer_v) { + if (std::is_member_function_pointer_v) { (code.*fn)(result, result); } else { fn(result); @@ -291,7 +287,7 @@ void FPTwoOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) { if (ctx.HasOptimization(OptimizationFlag::Unsafe_InaccurateNaN)) { // Do nothing } else if (ctx.FPCR().DN()) { - ForceToDefaultNaN(code, result); + ForceToDefaultNaN(code, result, fsize); } else { PostProcessNaN(code, result, xmm0); } @@ -302,7 +298,7 @@ void FPTwoOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) { template void FPThreeOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) { - using FPT = mcl::unsigned_integer_of_size; + using FPT = UnsignedIntegerN; auto args = ctx.reg_alloc.GetArgumentInfo(inst); @@ -310,14 +306,14 @@ void FPThreeOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); const Xbyak::Xmm operand = ctx.reg_alloc.UseScratchXmm(args[1]); - if constexpr (std::is_member_function_pointer_v) { + if (std::is_member_function_pointer_v) { (code.*fn)(result, operand); } else { fn(result, operand); } if (!ctx.HasOptimization(OptimizationFlag::Unsafe_InaccurateNaN)) { - ForceToDefaultNaN(code, result); + ForceToDefaultNaN(code, result, fsize); } ctx.reg_alloc.DefineValue(inst, result); @@ -332,7 +328,7 @@ void FPThreeOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) SharedLabel end = GenSharedLabel(), nan = GenSharedLabel(); code.movaps(result, op1); - if constexpr (std::is_member_function_pointer_v) { + if (std::is_member_function_pointer_v) { (code.*fn)(result, op2); } else { fn(result, op2); @@ -361,7 +357,7 @@ void FPThreeOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) template void FPAbs(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { - using FPT = mcl::unsigned_integer_of_size; + using FPT = UnsignedIntegerN; constexpr FPT non_sign_mask = FP::FPInfo::sign_mask - FPT(1u); auto args = ctx.reg_alloc.GetArgumentInfo(inst); @@ -387,7 +383,7 @@ void EmitX64::EmitFPAbs64(EmitContext& ctx, IR::Inst* inst) { template void FPNeg(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { - using FPT = mcl::unsigned_integer_of_size; + using FPT = UnsignedIntegerN; constexpr FPT sign_mask = FP::FPInfo::sign_mask; auto args = ctx.reg_alloc.GetArgumentInfo(inst); @@ -442,7 +438,7 @@ static void EmitFPMinMax(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { FCODE(ucomis)(result, operand); code.jz(*equal, code.T_NEAR); - if constexpr (is_max) { + if (is_max) { FCODE(maxs)(result, operand); } else { FCODE(mins)(result, operand); @@ -454,7 +450,7 @@ static void EmitFPMinMax(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { code.L(*equal); code.jp(nan); - if constexpr (is_max) { + if (is_max) { code.andps(result, operand); } else { code.orps(result, operand); @@ -477,7 +473,7 @@ static void EmitFPMinMax(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { template static inline void EmitFPMinMaxNumeric(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) noexcept { - using FPT = mcl::unsigned_integer_of_size; + using FPT = UnsignedIntegerN; constexpr FPT default_nan = FP::FPInfo::DefaultNaN(); auto args = ctx.reg_alloc.GetArgumentInfo(inst); @@ -502,7 +498,7 @@ static inline void EmitFPMinMaxNumeric(BlockOfCode& code, EmitContext& ctx, IR:: tmp.setBit(fsize); const auto move_to_tmp = [=, &code](const Xbyak::Xmm& xmm) { - if constexpr (fsize == 32) { + if (fsize == 32) { code.movd(tmp.cvt32(), xmm); } else { code.movq(tmp.cvt64(), xmm); @@ -513,7 +509,7 @@ static inline void EmitFPMinMaxNumeric(BlockOfCode& code, EmitContext& ctx, IR:: FCODE(ucomis)(op1, op2); code.jz(*z, code.T_NEAR); - if constexpr (is_max) { + if (is_max) { FCODE(maxs)(op2, op1); } else { FCODE(mins)(op2, op1); @@ -527,7 +523,7 @@ static inline void EmitFPMinMaxNumeric(BlockOfCode& code, EmitContext& ctx, IR:: code.L(*z); code.jp(nan); - if constexpr (is_max) { + if (is_max) { code.andps(op2, op1); } else { code.orps(op2, op1); @@ -629,12 +625,12 @@ void EmitX64::EmitFPMul64(EmitContext& ctx, IR::Inst* inst) { template static void EmitFPMulAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { - using FPT = mcl::unsigned_integer_of_size; + using FPT = UnsignedIntegerN; const auto fallback_fn = negate_product ? &FP::FPMulSub : &FP::FPMulAdd; auto args = ctx.reg_alloc.GetArgumentInfo(inst); - if constexpr (fsize != 16) { + if (fsize != 16) { const bool needs_rounding_correction = ctx.FPCR().FZ(); const bool needs_nan_correction = !ctx.FPCR().DN(); @@ -643,13 +639,13 @@ static void EmitFPMulAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]); const Xbyak::Xmm operand3 = ctx.reg_alloc.UseXmm(args[2]); - if constexpr (negate_product) { + if (negate_product) { FCODE(vfnmadd231s)(result, operand2, operand3); } else { FCODE(vfmadd231s)(result, operand2, operand3); } if (ctx.FPCR().DN()) { - ForceToDefaultNaN(code, result); + ForceToDefaultNaN(code, result, fsize); } ctx.reg_alloc.DefineValue(inst, result); @@ -665,7 +661,7 @@ static void EmitFPMulAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); code.movaps(result, operand1); - if constexpr (negate_product) { + if (negate_product) { FCODE(vfnmadd231s)(result, operand2, operand3); } else { FCODE(vfmadd231s)(result, operand2, operand3); @@ -686,9 +682,8 @@ static void EmitFPMulAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { } else { UNREACHABLE(); } - if (ctx.FPCR().DN()) { - ForceToDefaultNaN(code, result); - } + if (ctx.FPCR().DN()) + ForceToDefaultNaN(code, result, fsize); code.L(*end); ctx.deferred_emits.emplace_back([=, &code, &ctx] { @@ -769,7 +764,7 @@ static void EmitFPMulAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { code.ptest(operand2, xmm0); code.jnz(op2_done); code.vorps(result, operand2, xmm0); - if constexpr (negate_product) { + if (negate_product) { code.xorps(result, code.Const(xword, FP::FPInfo::sign_mask)); } code.jmp(*end); @@ -785,7 +780,7 @@ static void EmitFPMulAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { // at this point, all SNaNs have been handled // if op1 was not a QNaN and op2 is, negate the result - if constexpr (negate_product) { + if (negate_product) { FCODE(ucomis)(operand1, operand1); code.jp(*end); FCODE(ucomis)(operand2, operand2); @@ -806,7 +801,7 @@ static void EmitFPMulAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { const Xbyak::Xmm operand2 = ctx.reg_alloc.UseScratchXmm(args[1]); const Xbyak::Xmm operand3 = ctx.reg_alloc.UseXmm(args[2]); - if constexpr (negate_product) { + if (negate_product) { code.xorps(operand2, code.Const(xword, FP::FPInfo::sign_mask)); } FCODE(muls)(operand2, operand3); @@ -857,7 +852,7 @@ void EmitX64::EmitFPMulSub64(EmitContext& ctx, IR::Inst* inst) { template static void EmitFPMulX(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { - using FPT = mcl::unsigned_integer_of_size; + using FPT = UnsignedIntegerN; auto args = ctx.reg_alloc.GetArgumentInfo(inst); @@ -917,9 +912,9 @@ void EmitX64::EmitFPMulX64(EmitContext& ctx, IR::Inst* inst) { template static void EmitFPRecipEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { - using FPT = mcl::unsigned_integer_of_size; + using FPT = UnsignedIntegerN; - if constexpr (fsize != 16) { + if (fsize != 16) { if (ctx.HasOptimization(OptimizationFlag::Unsafe_ReducedErrorFP)) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); const Xbyak::Xmm operand = ctx.reg_alloc.UseXmm(args[0]); @@ -928,7 +923,7 @@ static void EmitFPRecipEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* i if (code.HasHostFeature(HostFeature::AVX512_OrthoFloat)) { FCODE(vrcp14s)(result, operand, operand); } else { - if constexpr (fsize == 32) { + if (fsize == 32) { code.rcpss(result, operand); } else { code.cvtsd2ss(result, operand); @@ -963,7 +958,7 @@ void EmitX64::EmitFPRecipEstimate64(EmitContext& ctx, IR::Inst* inst) { template static void EmitFPRecipExponent(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { - using FPT = mcl::unsigned_integer_of_size; + using FPT = UnsignedIntegerN; auto args = ctx.reg_alloc.GetArgumentInfo(inst); ctx.reg_alloc.HostCall(inst, args[0]); @@ -986,11 +981,11 @@ void EmitX64::EmitFPRecipExponent64(EmitContext& ctx, IR::Inst* inst) { template static void EmitFPRecipStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { - using FPT = mcl::unsigned_integer_of_size; + using FPT = UnsignedIntegerN; auto args = ctx.reg_alloc.GetArgumentInfo(inst); - if constexpr (fsize != 16) { + if (fsize != 16) { if (code.HasHostFeature(HostFeature::FMA) && ctx.HasOptimization(OptimizationFlag::Unsafe_InaccurateNaN)) { Xbyak::Label end, fallback; @@ -1123,9 +1118,9 @@ void EmitX64::EmitFPRoundInt64(EmitContext& ctx, IR::Inst* inst) { template static void EmitFPRSqrtEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { - using FPT = mcl::unsigned_integer_of_size; + using FPT = UnsignedIntegerN; - if constexpr (fsize != 16) { + if (fsize != 16) { if (ctx.HasOptimization(OptimizationFlag::Unsafe_ReducedErrorFP)) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); const Xbyak::Xmm operand = ctx.reg_alloc.UseXmm(args[0]); @@ -1134,7 +1129,7 @@ static void EmitFPRSqrtEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* i if (code.HasHostFeature(HostFeature::AVX512_OrthoFloat)) { FCODE(vrsqrt14s)(result, operand, operand); } else { - if constexpr (fsize == 32) { + if (fsize == 32) { code.rsqrtss(result, operand); } else { code.cvtsd2ss(result, operand); @@ -1180,7 +1175,7 @@ static void EmitFPRSqrtEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* i bool needs_fallback = false; code.L(*bad_values); - if constexpr (fsize == 32) { + if (fsize == 32) { code.movd(tmp, operand); if (!ctx.FPCR().FZ()) { @@ -1302,11 +1297,11 @@ void EmitX64::EmitFPRSqrtEstimate64(EmitContext& ctx, IR::Inst* inst) { template static void EmitFPRSqrtStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { - using FPT = mcl::unsigned_integer_of_size; + using FPT = UnsignedIntegerN; auto args = ctx.reg_alloc.GetArgumentInfo(inst); - if constexpr (fsize != 16) { + if (fsize != 16) { if (code.HasHostFeature(HostFeature::FMA | HostFeature::AVX) && ctx.HasOptimization(OptimizationFlag::Unsafe_InaccurateNaN)) { const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(args[0]); const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]); @@ -1485,9 +1480,8 @@ void EmitX64::EmitFPHalfToDouble(EmitContext& ctx, IR::Inst* inst) { // Double-conversion here is acceptable as this is expanding precision. code.vcvtph2ps(result, value); code.vcvtps2pd(result, result); - if (ctx.FPCR().DN()) { - ForceToDefaultNaN<64>(code, result); - } + if (ctx.FPCR().DN()) + ForceToDefaultNaN(code, result, 64); ctx.reg_alloc.DefineValue(inst, result); return; @@ -1509,9 +1503,8 @@ void EmitX64::EmitFPHalfToSingle(EmitContext& ctx, IR::Inst* inst) { const Xbyak::Xmm value = ctx.reg_alloc.UseXmm(args[0]); code.vcvtph2ps(result, value); - if (ctx.FPCR().DN()) { - ForceToDefaultNaN<32>(code, result); - } + if (ctx.FPCR().DN()) + ForceToDefaultNaN(code, result, 32); ctx.reg_alloc.DefineValue(inst, result); return; @@ -1519,23 +1512,22 @@ void EmitX64::EmitFPHalfToSingle(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.HostCall(inst, args[0]); code.mov(code.ABI_PARAM2.cvt32(), ctx.FPCR().Value()); - code.mov(code.ABI_PARAM3.cvt32(), static_cast(rounding_mode)); + code.mov(code.ABI_PARAM3.cvt32(), u32(rounding_mode)); code.lea(code.ABI_PARAM4, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]); code.CallFunction(&FP::FPConvert); } void EmitX64::EmitFPSingleToDouble(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const auto rounding_mode = static_cast(args[1].GetImmediateU8()); + const auto rounding_mode = FP::RoundingMode(args[1].GetImmediateU8()); // We special-case the non-IEEE-defined ToOdd rounding mode. if (rounding_mode == ctx.FPCR().RMode() && rounding_mode != FP::RoundingMode::ToOdd) { const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); code.cvtss2sd(result, result); - if (ctx.FPCR().DN()) { - ForceToDefaultNaN<64>(code, result); - } + if (ctx.FPCR().DN()) + ForceToDefaultNaN(code, result, 64); ctx.reg_alloc.DefineValue(inst, result); } else { ctx.reg_alloc.HostCall(inst, args[0]); @@ -1553,12 +1545,9 @@ void EmitX64::EmitFPSingleToHalf(EmitContext& ctx, IR::Inst* inst) { if (code.HasHostFeature(HostFeature::F16C) && !ctx.FPCR().AHP() && !ctx.FPCR().FZ16()) { const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); - - if (ctx.FPCR().DN()) { - ForceToDefaultNaN<32>(code, result); - } - code.vcvtps2ph(result, result, static_cast(*round_imm)); - + if (ctx.FPCR().DN()) + ForceToDefaultNaN(code, result, 32); + code.vcvtps2ph(result, result, u8(*round_imm)); ctx.reg_alloc.DefineValue(inst, result); return; } @@ -1586,21 +1575,18 @@ void EmitX64::EmitFPDoubleToHalf(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitFPDoubleToSingle(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const auto rounding_mode = static_cast(args[1].GetImmediateU8()); - + const auto rounding_mode = FP::RoundingMode(args[1].GetImmediateU8()); // We special-case the non-IEEE-defined ToOdd rounding mode. if (rounding_mode == ctx.FPCR().RMode() && rounding_mode != FP::RoundingMode::ToOdd) { const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); - code.cvtsd2ss(result, result); - if (ctx.FPCR().DN()) { - ForceToDefaultNaN<32>(code, result); - } + if (ctx.FPCR().DN()) + ForceToDefaultNaN(code, result, 32); ctx.reg_alloc.DefineValue(inst, result); } else { ctx.reg_alloc.HostCall(inst, args[0]); code.mov(code.ABI_PARAM2.cvt32(), ctx.FPCR().Value()); - code.mov(code.ABI_PARAM3.cvt32(), static_cast(rounding_mode)); + code.mov(code.ABI_PARAM3.cvt32(), u32(rounding_mode)); code.lea(code.ABI_PARAM4, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]); code.CallFunction(&FP::FPConvert); } @@ -1630,7 +1616,7 @@ static void EmitFPToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { const size_t fbits = args[1].GetImmediateU8(); const auto rounding_mode = FP::RoundingMode(args[2].GetImmediateU8()); - if constexpr (fsize != 16) { + if (fsize != 16) { const auto round_imm = ConvertRoundingModeToX64Immediate(rounding_mode); // cvttsd2si truncates during operation so rounding (and thus SSE4.1) not required @@ -1640,7 +1626,7 @@ static void EmitFPToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { const Xbyak::Xmm src = ctx.reg_alloc.UseScratchXmm(args[0]); const Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr().cvt64(); - if constexpr (fsize == 64) { + if (fsize == 64) { if (fbits != 0) { const u64 scale_factor = static_cast((fbits + 1023) << 52); code.mulsd(src, code.Const(xword, scale_factor)); @@ -1662,13 +1648,13 @@ static void EmitFPToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { code.cvtss2sd(src, src); } - if constexpr (isize == 64) { + if (isize == 64) { const Xbyak::Xmm scratch = ctx.reg_alloc.ScratchXmm(); if (!unsigned_) { SharedLabel saturate_max = GenSharedLabel(), end = GenSharedLabel(); - ZeroIfNaN<64>(code, src, scratch); + ZeroIfNaN(code, src, scratch, 64); code.movsd(scratch, code.Const(xword, f64_max_s64_lim)); code.comisd(scratch, src); @@ -1706,11 +1692,11 @@ static void EmitFPToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { code.sar(result2, 63); code.or_(result, result2); } - } else if constexpr (isize == 32) { + } else if (isize == 32) { if (!unsigned_) { const Xbyak::Xmm scratch = ctx.reg_alloc.ScratchXmm(); - ZeroIfNaN<64>(code, src, scratch); + ZeroIfNaN(code, src, scratch, 64); code.minsd(src, code.Const(xword, f64_max_s32)); // maxsd not required as cvttsd2si results in 0x8000'0000 when out of range code.cvttsd2si(result.cvt32(), src); // 32 bit gpr @@ -1723,7 +1709,7 @@ static void EmitFPToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { } else { const Xbyak::Xmm scratch = ctx.reg_alloc.ScratchXmm(); - ZeroIfNaN<64>(code, src, scratch); + ZeroIfNaN(code, src, scratch, 64); code.maxsd(src, code.Const(xword, unsigned_ ? f64_min_u16 : f64_min_s16)); code.minsd(src, code.Const(xword, unsigned_ ? f64_max_u16 : f64_max_s16)); code.cvttsd2si(result, src); // 64 bit gpr diff --git a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.cpp.inc b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.cpp.inc index 34f77b0446..af9f392662 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.cpp.inc +++ b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.cpp.inc @@ -223,7 +223,7 @@ void AxxEmitX64::EmitExclusiveReadMemory(AxxEmitContext& ctx, IR::Inst* inst) { const bool ordered = IsOrdered(args[2].GetImmediateAccType()); if constexpr (bitsize != 128) { - using T = mcl::unsigned_integer_of_size; + using T = UnsignedIntegerN; ctx.reg_alloc.HostCall(inst, {}, args[1]); @@ -290,7 +290,7 @@ void AxxEmitX64::EmitExclusiveWriteMemory(AxxEmitContext& ctx, IR::Inst* inst) { code.mov(code.byte[code.ABI_JIT_PTR + offsetof(AxxJitState, exclusive_state)], u8(0)); code.mov(code.ABI_PARAM1, reinterpret_cast(&conf)); if constexpr (bitsize != 128) { - using T = mcl::unsigned_integer_of_size; + using T = UnsignedIntegerN; code.CallLambda( [](AxxUserConfig& conf, Axx::VAddr vaddr, T value) -> u32 { diff --git a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_saturation.cpp b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_saturation.cpp index 31231c02aa..6a693a2d61 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_saturation.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_saturation.cpp @@ -11,7 +11,6 @@ #include "dynarmic/common/assert.h" #include #include "dynarmic/common/common_types.h" -#include #include "dynarmic/backend/x64/block_of_code.h" #include "dynarmic/backend/x64/emit_x64.h" @@ -38,7 +37,7 @@ void EmitSignedSaturatedOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) Xbyak::Reg addend = ctx.reg_alloc.UseGpr(args[1]).changeBit(size); Xbyak::Reg overflow = ctx.reg_alloc.ScratchGpr().changeBit(size); - constexpr u64 int_max = static_cast((std::numeric_limits>::max)()); + constexpr u64 int_max = static_cast((std::numeric_limits>::max)()); if constexpr (size < 64) { code.xor_(overflow.cvt32(), overflow.cvt32()); code.bt(result.cvt32(), size - 1); @@ -82,7 +81,7 @@ void EmitUnsignedSaturatedOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst Xbyak::Reg op_result = ctx.reg_alloc.UseScratchGpr(args[0]).changeBit(size); Xbyak::Reg addend = ctx.reg_alloc.UseScratchGpr(args[1]).changeBit(size); - constexpr u64 boundary = op == Op::Add ? (std::numeric_limits>::max)() : 0; + constexpr u64 boundary = op == Op::Add ? (std::numeric_limits>::max)() : 0; if constexpr (op == Op::Add) { code.add(op_result, addend); diff --git a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_vector_floating_point.cpp b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_vector_floating_point.cpp index 66a179a481..ec73f74c98 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_vector_floating_point.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_vector_floating_point.cpp @@ -20,7 +20,6 @@ #include #include #include -#include #include #include "dynarmic/backend/x64/abi.h" @@ -76,7 +75,7 @@ void MaybeStandardFPSCRValue(BlockOfCode& code, EmitContext& ctx, bool fpcr_cont template class Indexer, size_t narg> struct NaNHandler { public: - using FPT = mcl::unsigned_integer_of_size; + using FPT = UnsignedIntegerN; using function_type = void (*)(std::array, narg>&, FP::FPCR); @@ -158,33 +157,33 @@ Xbyak::Address GetVectorOf(BlockOfCode& code) { template Xbyak::Address GetNaNVector(BlockOfCode& code) { - using FPT = mcl::unsigned_integer_of_size; + using FPT = UnsignedIntegerN; return GetVectorOf::DefaultNaN()>(code); } template Xbyak::Address GetNegativeZeroVector(BlockOfCode& code) { - using FPT = mcl::unsigned_integer_of_size; + using FPT = UnsignedIntegerN; return GetVectorOf::Zero(true)>(code); } template Xbyak::Address GetNonSignMaskVector(BlockOfCode& code) { - using FPT = mcl::unsigned_integer_of_size; + using FPT = UnsignedIntegerN; constexpr FPT non_sign_mask = FP::FPInfo::exponent_mask | FP::FPInfo::mantissa_mask; return GetVectorOf(code); } template Xbyak::Address GetSmallestNormalVector(BlockOfCode& code) { - using FPT = mcl::unsigned_integer_of_size; + using FPT = UnsignedIntegerN; constexpr FPT smallest_normal_number = FP::FPValue::exponent_min, 1>(); return GetVectorOf(code); } -template value> +template value> Xbyak::Address GetVectorOf(BlockOfCode& code) { - using FPT = mcl::unsigned_integer_of_size; + using FPT = UnsignedIntegerN; return GetVectorOf()>(code); } @@ -1085,7 +1084,7 @@ static void EmitFPVectorMinMaxNumeric(BlockOfCode& code, EmitContext& ctx, IR::I if (code.HasHostFeature(HostFeature::AVX)) { MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] { - using FPT = mcl::unsigned_integer_of_size; + using FPT = UnsignedIntegerN; // result = xmm_a == SNaN || xmm_b == QNaN { @@ -1158,7 +1157,7 @@ static void EmitFPVectorMinMaxNumeric(BlockOfCode& code, EmitContext& ctx, IR::I } MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] { - using FPT = mcl::unsigned_integer_of_size; + using FPT = UnsignedIntegerN; // result = xmm_a == SNaN || xmm_b == QNaN { @@ -1314,7 +1313,7 @@ static void EmitFPVectorMulAddFallback(VectorArray& result, const VectorArr template void EmitFPVectorMulAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { - using FPT = mcl::unsigned_integer_of_size; + using FPT = UnsignedIntegerN; const auto fallback_fn = [](VectorArray& result, const VectorArray& addend, const VectorArray& op1, const VectorArray& op2, FP::FPCR fpcr, FP::FPSR& fpsr) { for (size_t i = 0; i < result.size(); i++) { @@ -1425,7 +1424,7 @@ void EmitX64::EmitFPVectorMulAdd64(EmitContext& ctx, IR::Inst* inst) { template static void EmitFPVectorMulX(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { - using FPT = mcl::unsigned_integer_of_size; + using FPT = UnsignedIntegerN; auto args = ctx.reg_alloc.GetArgumentInfo(inst); const bool fpcr_controlled = args[2].GetImmediateU1(); @@ -1491,7 +1490,7 @@ void EmitX64::EmitFPVectorMulX64(EmitContext& ctx, IR::Inst* inst) { template void FPVectorNeg(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { - using FPT = mcl::unsigned_integer_of_size; + using FPT = UnsignedIntegerN; constexpr FPT sign_mask = FP::FPInfo::sign_mask; auto args = ctx.reg_alloc.GetArgumentInfo(inst); @@ -1544,7 +1543,7 @@ void EmitX64::EmitFPVectorPairedAddLower64(EmitContext& ctx, IR::Inst* inst) { template static void EmitRecipEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { - using FPT = mcl::unsigned_integer_of_size; + using FPT = UnsignedIntegerN; if constexpr (fsize != 16) { if (ctx.HasOptimization(OptimizationFlag::Unsafe_ReducedErrorFP)) { @@ -1590,7 +1589,7 @@ void EmitX64::EmitFPVectorRecipEstimate64(EmitContext& ctx, IR::Inst* inst) { template static void EmitRecipStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { - using FPT = mcl::unsigned_integer_of_size; + using FPT = UnsignedIntegerN; const auto fallback_fn = [](VectorArray& result, const VectorArray& op1, const VectorArray& op2, FP::FPCR fpcr, FP::FPSR& fpsr) { for (size_t i = 0; i < result.size(); i++) { @@ -1760,7 +1759,7 @@ void EmitX64::EmitFPVectorRoundInt64(EmitContext& ctx, IR::Inst* inst) { template static void EmitRSqrtEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { - using FPT = mcl::unsigned_integer_of_size; + using FPT = UnsignedIntegerN; const auto fallback_fn = [](VectorArray& result, const VectorArray& operand, FP::FPCR fpcr, FP::FPSR& fpsr) { for (size_t i = 0; i < result.size(); i++) { @@ -1852,7 +1851,7 @@ void EmitX64::EmitFPVectorRSqrtEstimate64(EmitContext& ctx, IR::Inst* inst) { template static void EmitRSqrtStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { - using FPT = mcl::unsigned_integer_of_size; + using FPT = UnsignedIntegerN; const auto fallback_fn = [](VectorArray& result, const VectorArray& op1, const VectorArray& op2, FP::FPCR fpcr, FP::FPSR& fpsr) { for (size_t i = 0; i < result.size(); i++) { @@ -2126,7 +2125,7 @@ void EmitFPVectorToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { FCODE(orp)(src, exceed_unsigned); } } else { - using FPT = mcl::unsigned_integer_of_size; // WORKAROUND: For issue 678 on MSVC + using FPT = UnsignedIntegerN; // WORKAROUND: For issue 678 on MSVC constexpr u64 integer_max = FPT((std::numeric_limits>>::max)()); code.movaps(xmm0, GetVectorOf(code)); @@ -2150,7 +2149,7 @@ void EmitFPVectorToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { mp::lift_value>; static const auto lut = Common::GenerateLookupTableFromList([](I) { - using FPT = mcl::unsigned_integer_of_size; // WORKAROUND: For issue 678 on MSVC + using FPT = UnsignedIntegerN; // WORKAROUND: For issue 678 on MSVC return std::pair{ mp::lower_to_tuple_v, Common::FptrCast([](VectorArray& output, const VectorArray& input, FP::FPCR fpcr, FP::FPSR& fpsr) { diff --git a/src/dynarmic/src/dynarmic/common/fp/util.h b/src/dynarmic/src/dynarmic/common/fp/util.h index fda34e3ee5..ac4a0efb1b 100644 --- a/src/dynarmic/src/dynarmic/common/fp/util.h +++ b/src/dynarmic/src/dynarmic/common/fp/util.h @@ -6,6 +6,7 @@ #pragma once #include +#include #include "dynarmic/common/fp/fpcr.h" #include "dynarmic/common/fp/info.h" @@ -96,4 +97,14 @@ constexpr std::optional ProcessNaNs(FPT a, FPT b, FPT c) { return std::nullopt; } +namespace Detail { +template struct IntegerOfSize {}; +template<> struct IntegerOfSize<8> { using U = std::uint8_t, S = std::int8_t; }; +template<> struct IntegerOfSize<16> { using U = std::uint16_t, S = std::int16_t; }; +template<> struct IntegerOfSize<32> { using U = std::uint32_t, S = std::int32_t; }; +template<> struct IntegerOfSize<64> { using U = std::uint64_t, S = std::int64_t; }; +} +using UnsignedIntegerN = typename Detail::IntegerOfSize::U; +using SignedIntegerN = typename Detail::IntegerOfSize::S; + } // namespace Dynarmic::FP