Browse Source

remove mcl integer_of_size

Signed-off-by: lizzie <lizzie@eden-emu.dev>
pull/2775/head
lizzie 4 months ago
committed by crueter
parent
commit
c95282b2da
  1. 1
      src/dynarmic/src/dynarmic/backend/arm64/emit_arm64_vector_floating_point.cpp
  2. 1
      src/dynarmic/src/dynarmic/backend/x64/a32_emit_x64_memory.cpp
  3. 1
      src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.cpp
  4. 1
      src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64_memory.cpp
  5. 150
      src/dynarmic/src/dynarmic/backend/x64/emit_x64_floating_point.cpp
  6. 4
      src/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.cpp.inc
  7. 5
      src/dynarmic/src/dynarmic/backend/x64/emit_x64_saturation.cpp
  8. 37
      src/dynarmic/src/dynarmic/backend/x64/emit_x64_vector_floating_point.cpp
  9. 11
      src/dynarmic/src/dynarmic/common/fp/util.h

1
src/dynarmic/src/dynarmic/backend/arm64/emit_arm64_vector_floating_point.cpp

@ -14,7 +14,6 @@
#include <mcl/mp/typelist/list.hpp> #include <mcl/mp/typelist/list.hpp>
#include <mcl/mp/typelist/lower_to_tuple.hpp> #include <mcl/mp/typelist/lower_to_tuple.hpp>
#include <mcl/type_traits/function_info.hpp> #include <mcl/type_traits/function_info.hpp>
#include <mcl/type_traits/integer_of_size.hpp>
#include <oaknut/oaknut.hpp> #include <oaknut/oaknut.hpp>
#include "dynarmic/backend/arm64/a32_jitstate.h" #include "dynarmic/backend/arm64/a32_jitstate.h"

1
src/dynarmic/src/dynarmic/backend/x64/a32_emit_x64_memory.cpp

@ -10,7 +10,6 @@
#include <fmt/format.h> #include <fmt/format.h>
#include <fmt/ostream.h> #include <fmt/ostream.h>
#include <mcl/type_traits/integer_of_size.hpp>
#include <xbyak/xbyak.h> #include <xbyak/xbyak.h>
#include "dynarmic/backend/x64/a32_emit_x64.h" #include "dynarmic/backend/x64/a32_emit_x64.h"

1
src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.cpp

@ -13,7 +13,6 @@
#include "dynarmic/common/assert.h" #include "dynarmic/common/assert.h"
#include <mcl/scope_exit.hpp> #include <mcl/scope_exit.hpp>
#include "dynarmic/common/common_types.h" #include "dynarmic/common/common_types.h"
#include <mcl/type_traits/integer_of_size.hpp>
#include <boost/container/static_vector.hpp> #include <boost/container/static_vector.hpp>
#include "dynarmic/backend/x64/a64_jitstate.h" #include "dynarmic/backend/x64/a64_jitstate.h"

1
src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64_memory.cpp

@ -10,7 +10,6 @@
#include <fmt/format.h> #include <fmt/format.h>
#include <fmt/ostream.h> #include <fmt/ostream.h>
#include <mcl/type_traits/integer_of_size.hpp>
#include <xbyak/xbyak.h> #include <xbyak/xbyak.h>
#include "dynarmic/backend/x64/a64_emit_x64.h" #include "dynarmic/backend/x64/a64_emit_x64.h"

150
src/dynarmic/src/dynarmic/backend/x64/emit_x64_floating_point.cpp

@ -18,7 +18,6 @@
#include <mcl/mp/typelist/list.hpp> #include <mcl/mp/typelist/list.hpp>
#include <mcl/mp/typelist/lower_to_tuple.hpp> #include <mcl/mp/typelist/lower_to_tuple.hpp>
#include "dynarmic/common/common_types.h" #include "dynarmic/common/common_types.h"
#include <mcl/type_traits/integer_of_size.hpp>
#include <xbyak/xbyak.h> #include <xbyak/xbyak.h>
#include "dynarmic/backend/x64/abi.h" #include "dynarmic/backend/x64/abi.h"
@ -38,7 +37,7 @@
#define FCODE(NAME) \ #define FCODE(NAME) \
[&code](auto... args) { \ [&code](auto... args) { \
if constexpr (fsize == 32) { \
if (fsize == 32) { \
code.NAME##s(args...); \ code.NAME##s(args...); \
} else { \ } else { \
code.NAME##d(args...); \ code.NAME##d(args...); \
@ -46,7 +45,7 @@
} }
#define ICODE(NAME) \ #define ICODE(NAME) \
[&code](auto... args) { \ [&code](auto... args) { \
if constexpr (fsize == 32) { \
if (fsize == 32) { \
code.NAME##d(args...); \ code.NAME##d(args...); \
} else { \ } else { \
code.NAME##q(args...); \ code.NAME##q(args...); \
@ -105,7 +104,7 @@ void ForceDenormalsToZero(BlockOfCode& code, std::initializer_list<Xbyak::Xmm> t
for (const Xbyak::Xmm& xmm : to_daz) { for (const Xbyak::Xmm& xmm : to_daz) {
code.movaps(xmm0, code.Const(xword, fsize == 32 ? f32_non_sign_mask : f64_non_sign_mask)); code.movaps(xmm0, code.Const(xword, fsize == 32 ? f32_non_sign_mask : f64_non_sign_mask));
code.andps(xmm0, xmm); code.andps(xmm0, xmm);
if constexpr (fsize == 32) {
if (fsize == 32) {
code.pcmpgtd(xmm0, code.Const(xword, f32_smallest_normal - 1)); code.pcmpgtd(xmm0, code.Const(xword, f32_smallest_normal - 1));
} else if (code.HasHostFeature(HostFeature::SSE42)) { } else if (code.HasHostFeature(HostFeature::SSE42)) {
code.pcmpgtq(xmm0, code.Const(xword, f64_smallest_normal - 1)); code.pcmpgtq(xmm0, code.Const(xword, f64_smallest_normal - 1));
@ -120,13 +119,11 @@ void ForceDenormalsToZero(BlockOfCode& code, std::initializer_list<Xbyak::Xmm> t
template<size_t fsize> template<size_t fsize>
void DenormalsAreZero(BlockOfCode& code, EmitContext& ctx, std::initializer_list<Xbyak::Xmm> to_daz) { void DenormalsAreZero(BlockOfCode& code, EmitContext& ctx, std::initializer_list<Xbyak::Xmm> to_daz) {
if (ctx.FPCR().FZ()) {
if (ctx.FPCR().FZ())
ForceDenormalsToZero<fsize>(code, to_daz); ForceDenormalsToZero<fsize>(code, to_daz);
}
} }
template<size_t fsize>
void ZeroIfNaN(BlockOfCode& code, Xbyak::Xmm xmm_value, Xbyak::Xmm xmm_scratch) {
void ZeroIfNaN(BlockOfCode& code, Xbyak::Xmm xmm_value, Xbyak::Xmm xmm_scratch, size_t fsize) {
if (code.HasHostFeature(HostFeature::AVX512_OrthoFloat)) { if (code.HasHostFeature(HostFeature::AVX512_OrthoFloat)) {
constexpr u32 nan_to_zero = FixupLUT(FpFixup::PosZero, constexpr u32 nan_to_zero = FixupLUT(FpFixup::PosZero,
FpFixup::PosZero); FpFixup::PosZero);
@ -141,8 +138,7 @@ void ZeroIfNaN(BlockOfCode& code, Xbyak::Xmm xmm_value, Xbyak::Xmm xmm_scratch)
} }
} }
template<size_t fsize>
void ForceToDefaultNaN(BlockOfCode& code, Xbyak::Xmm result) {
void ForceToDefaultNaN(BlockOfCode& code, Xbyak::Xmm result, size_t fsize) {
if (code.HasHostFeature(HostFeature::AVX512_OrthoFloat)) { if (code.HasHostFeature(HostFeature::AVX512_OrthoFloat)) {
const Xbyak::Opmask nan_mask = k1; const Xbyak::Opmask nan_mask = k1;
FCODE(vfpclasss)(nan_mask, result, u8(FpClass::QNaN | FpClass::SNaN)); FCODE(vfpclasss)(nan_mask, result, u8(FpClass::QNaN | FpClass::SNaN));
@ -208,7 +204,7 @@ void PostProcessNaN(BlockOfCode& code, Xbyak::Xmm result, Xbyak::Xmm tmp) {
// We allow for the case where op1 and result are the same register. We do not read from op1 once result is written to. // We allow for the case where op1 and result are the same register. We do not read from op1 once result is written to.
template<size_t fsize> template<size_t fsize>
void EmitPostProcessNaNs(BlockOfCode& code, Xbyak::Xmm result, Xbyak::Xmm op1, Xbyak::Xmm op2, Xbyak::Reg64 tmp, Xbyak::Label end) { void EmitPostProcessNaNs(BlockOfCode& code, Xbyak::Xmm result, Xbyak::Xmm op1, Xbyak::Xmm op2, Xbyak::Reg64 tmp, Xbyak::Label end) {
using FPT = mcl::unsigned_integer_of_size<fsize>;
using FPT = UnsignedIntegerN<fsize>;
constexpr FPT exponent_mask = FP::FPInfo<FPT>::exponent_mask; constexpr FPT exponent_mask = FP::FPInfo<FPT>::exponent_mask;
constexpr FPT mantissa_msb = FP::FPInfo<FPT>::mantissa_msb; constexpr FPT mantissa_msb = FP::FPInfo<FPT>::mantissa_msb;
constexpr u8 mantissa_msb_bit = static_cast<u8>(FP::FPInfo<FPT>::explicit_mantissa_width - 1); constexpr u8 mantissa_msb_bit = static_cast<u8>(FP::FPInfo<FPT>::explicit_mantissa_width - 1);
@ -236,7 +232,7 @@ void EmitPostProcessNaNs(BlockOfCode& code, Xbyak::Xmm result, Xbyak::Xmm op1, X
} }
constexpr size_t shift = fsize == 32 ? 0 : 48; constexpr size_t shift = fsize == 32 ? 0 : 48;
if constexpr (fsize == 32) {
if (fsize == 32) {
code.movd(tmp.cvt32(), xmm0); code.movd(tmp.cvt32(), xmm0);
} else { } else {
// We do this to avoid requiring 64-bit immediates // We do this to avoid requiring 64-bit immediates
@ -252,7 +248,7 @@ void EmitPostProcessNaNs(BlockOfCode& code, Xbyak::Xmm result, Xbyak::Xmm op1, X
// op1 == QNaN && op2 == SNaN <<< The problematic case // op1 == QNaN && op2 == SNaN <<< The problematic case
// op1 == QNaN && op2 == Inf // op1 == QNaN && op2 == Inf
if constexpr (fsize == 32) {
if (fsize == 32) {
code.movd(tmp.cvt32(), op2); code.movd(tmp.cvt32(), op2);
code.shl(tmp.cvt32(), 32 - mantissa_msb_bit); code.shl(tmp.cvt32(), 32 - mantissa_msb_bit);
} else { } else {
@ -283,7 +279,7 @@ void FPTwoOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) {
if (!ctx.FPCR().DN() && !ctx.HasOptimization(OptimizationFlag::Unsafe_InaccurateNaN)) { if (!ctx.FPCR().DN() && !ctx.HasOptimization(OptimizationFlag::Unsafe_InaccurateNaN)) {
end = ProcessNaN<fsize>(code, ctx, result); end = ProcessNaN<fsize>(code, ctx, result);
} }
if constexpr (std::is_member_function_pointer_v<Function>) {
if (std::is_member_function_pointer_v<Function>) {
(code.*fn)(result, result); (code.*fn)(result, result);
} else { } else {
fn(result); fn(result);
@ -291,7 +287,7 @@ void FPTwoOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) {
if (ctx.HasOptimization(OptimizationFlag::Unsafe_InaccurateNaN)) { if (ctx.HasOptimization(OptimizationFlag::Unsafe_InaccurateNaN)) {
// Do nothing // Do nothing
} else if (ctx.FPCR().DN()) { } else if (ctx.FPCR().DN()) {
ForceToDefaultNaN<fsize>(code, result);
ForceToDefaultNaN(code, result, fsize);
} else { } else {
PostProcessNaN<fsize>(code, result, xmm0); PostProcessNaN<fsize>(code, result, xmm0);
} }
@ -302,7 +298,7 @@ void FPTwoOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) {
template<size_t fsize, typename Function> template<size_t fsize, typename Function>
void FPThreeOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) { void FPThreeOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) {
using FPT = mcl::unsigned_integer_of_size<fsize>;
using FPT = UnsignedIntegerN<fsize>;
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
@ -310,14 +306,14 @@ void FPThreeOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn)
const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm operand = ctx.reg_alloc.UseScratchXmm(args[1]); const Xbyak::Xmm operand = ctx.reg_alloc.UseScratchXmm(args[1]);
if constexpr (std::is_member_function_pointer_v<Function>) {
if (std::is_member_function_pointer_v<Function>) {
(code.*fn)(result, operand); (code.*fn)(result, operand);
} else { } else {
fn(result, operand); fn(result, operand);
} }
if (!ctx.HasOptimization(OptimizationFlag::Unsafe_InaccurateNaN)) { if (!ctx.HasOptimization(OptimizationFlag::Unsafe_InaccurateNaN)) {
ForceToDefaultNaN<fsize>(code, result);
ForceToDefaultNaN(code, result, fsize);
} }
ctx.reg_alloc.DefineValue(inst, result); ctx.reg_alloc.DefineValue(inst, result);
@ -332,7 +328,7 @@ void FPThreeOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn)
SharedLabel end = GenSharedLabel(), nan = GenSharedLabel(); SharedLabel end = GenSharedLabel(), nan = GenSharedLabel();
code.movaps(result, op1); code.movaps(result, op1);
if constexpr (std::is_member_function_pointer_v<Function>) {
if (std::is_member_function_pointer_v<Function>) {
(code.*fn)(result, op2); (code.*fn)(result, op2);
} else { } else {
fn(result, op2); fn(result, op2);
@ -361,7 +357,7 @@ void FPThreeOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn)
template<size_t fsize> template<size_t fsize>
void FPAbs(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { void FPAbs(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
using FPT = mcl::unsigned_integer_of_size<fsize>;
using FPT = UnsignedIntegerN<fsize>;
constexpr FPT non_sign_mask = FP::FPInfo<FPT>::sign_mask - FPT(1u); constexpr FPT non_sign_mask = FP::FPInfo<FPT>::sign_mask - FPT(1u);
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
@ -387,7 +383,7 @@ void EmitX64::EmitFPAbs64(EmitContext& ctx, IR::Inst* inst) {
template<size_t fsize> template<size_t fsize>
void FPNeg(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { void FPNeg(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
using FPT = mcl::unsigned_integer_of_size<fsize>;
using FPT = UnsignedIntegerN<fsize>;
constexpr FPT sign_mask = FP::FPInfo<FPT>::sign_mask; constexpr FPT sign_mask = FP::FPInfo<FPT>::sign_mask;
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
@ -442,7 +438,7 @@ static void EmitFPMinMax(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
FCODE(ucomis)(result, operand); FCODE(ucomis)(result, operand);
code.jz(*equal, code.T_NEAR); code.jz(*equal, code.T_NEAR);
if constexpr (is_max) {
if (is_max) {
FCODE(maxs)(result, operand); FCODE(maxs)(result, operand);
} else { } else {
FCODE(mins)(result, operand); FCODE(mins)(result, operand);
@ -454,7 +450,7 @@ static void EmitFPMinMax(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
code.L(*equal); code.L(*equal);
code.jp(nan); code.jp(nan);
if constexpr (is_max) {
if (is_max) {
code.andps(result, operand); code.andps(result, operand);
} else { } else {
code.orps(result, operand); code.orps(result, operand);
@ -477,7 +473,7 @@ static void EmitFPMinMax(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
template<size_t fsize, bool is_max> template<size_t fsize, bool is_max>
static inline void EmitFPMinMaxNumeric(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) noexcept { static inline void EmitFPMinMaxNumeric(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) noexcept {
using FPT = mcl::unsigned_integer_of_size<fsize>;
using FPT = UnsignedIntegerN<fsize>;
constexpr FPT default_nan = FP::FPInfo<FPT>::DefaultNaN(); constexpr FPT default_nan = FP::FPInfo<FPT>::DefaultNaN();
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
@ -502,7 +498,7 @@ static inline void EmitFPMinMaxNumeric(BlockOfCode& code, EmitContext& ctx, IR::
tmp.setBit(fsize); tmp.setBit(fsize);
const auto move_to_tmp = [=, &code](const Xbyak::Xmm& xmm) { const auto move_to_tmp = [=, &code](const Xbyak::Xmm& xmm) {
if constexpr (fsize == 32) {
if (fsize == 32) {
code.movd(tmp.cvt32(), xmm); code.movd(tmp.cvt32(), xmm);
} else { } else {
code.movq(tmp.cvt64(), xmm); code.movq(tmp.cvt64(), xmm);
@ -513,7 +509,7 @@ static inline void EmitFPMinMaxNumeric(BlockOfCode& code, EmitContext& ctx, IR::
FCODE(ucomis)(op1, op2); FCODE(ucomis)(op1, op2);
code.jz(*z, code.T_NEAR); code.jz(*z, code.T_NEAR);
if constexpr (is_max) {
if (is_max) {
FCODE(maxs)(op2, op1); FCODE(maxs)(op2, op1);
} else { } else {
FCODE(mins)(op2, op1); FCODE(mins)(op2, op1);
@ -527,7 +523,7 @@ static inline void EmitFPMinMaxNumeric(BlockOfCode& code, EmitContext& ctx, IR::
code.L(*z); code.L(*z);
code.jp(nan); code.jp(nan);
if constexpr (is_max) {
if (is_max) {
code.andps(op2, op1); code.andps(op2, op1);
} else { } else {
code.orps(op2, op1); code.orps(op2, op1);
@ -629,12 +625,12 @@ void EmitX64::EmitFPMul64(EmitContext& ctx, IR::Inst* inst) {
template<size_t fsize, bool negate_product> template<size_t fsize, bool negate_product>
static void EmitFPMulAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { static void EmitFPMulAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
using FPT = mcl::unsigned_integer_of_size<fsize>;
using FPT = UnsignedIntegerN<fsize>;
const auto fallback_fn = negate_product ? &FP::FPMulSub<FPT> : &FP::FPMulAdd<FPT>; const auto fallback_fn = negate_product ? &FP::FPMulSub<FPT> : &FP::FPMulAdd<FPT>;
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
if constexpr (fsize != 16) {
if (fsize != 16) {
const bool needs_rounding_correction = ctx.FPCR().FZ(); const bool needs_rounding_correction = ctx.FPCR().FZ();
const bool needs_nan_correction = !ctx.FPCR().DN(); const bool needs_nan_correction = !ctx.FPCR().DN();
@ -643,13 +639,13 @@ static void EmitFPMulAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]); const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]);
const Xbyak::Xmm operand3 = ctx.reg_alloc.UseXmm(args[2]); const Xbyak::Xmm operand3 = ctx.reg_alloc.UseXmm(args[2]);
if constexpr (negate_product) {
if (negate_product) {
FCODE(vfnmadd231s)(result, operand2, operand3); FCODE(vfnmadd231s)(result, operand2, operand3);
} else { } else {
FCODE(vfmadd231s)(result, operand2, operand3); FCODE(vfmadd231s)(result, operand2, operand3);
} }
if (ctx.FPCR().DN()) { if (ctx.FPCR().DN()) {
ForceToDefaultNaN<fsize>(code, result);
ForceToDefaultNaN(code, result, fsize);
} }
ctx.reg_alloc.DefineValue(inst, result); ctx.reg_alloc.DefineValue(inst, result);
@ -665,7 +661,7 @@ static void EmitFPMulAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
code.movaps(result, operand1); code.movaps(result, operand1);
if constexpr (negate_product) {
if (negate_product) {
FCODE(vfnmadd231s)(result, operand2, operand3); FCODE(vfnmadd231s)(result, operand2, operand3);
} else { } else {
FCODE(vfmadd231s)(result, operand2, operand3); FCODE(vfmadd231s)(result, operand2, operand3);
@ -686,9 +682,8 @@ static void EmitFPMulAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
} else { } else {
UNREACHABLE(); UNREACHABLE();
} }
if (ctx.FPCR().DN()) {
ForceToDefaultNaN<fsize>(code, result);
}
if (ctx.FPCR().DN())
ForceToDefaultNaN(code, result, fsize);
code.L(*end); code.L(*end);
ctx.deferred_emits.emplace_back([=, &code, &ctx] { ctx.deferred_emits.emplace_back([=, &code, &ctx] {
@ -769,7 +764,7 @@ static void EmitFPMulAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
code.ptest(operand2, xmm0); code.ptest(operand2, xmm0);
code.jnz(op2_done); code.jnz(op2_done);
code.vorps(result, operand2, xmm0); code.vorps(result, operand2, xmm0);
if constexpr (negate_product) {
if (negate_product) {
code.xorps(result, code.Const(xword, FP::FPInfo<FPT>::sign_mask)); code.xorps(result, code.Const(xword, FP::FPInfo<FPT>::sign_mask));
} }
code.jmp(*end); code.jmp(*end);
@ -785,7 +780,7 @@ static void EmitFPMulAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
// at this point, all SNaNs have been handled // at this point, all SNaNs have been handled
// if op1 was not a QNaN and op2 is, negate the result // if op1 was not a QNaN and op2 is, negate the result
if constexpr (negate_product) {
if (negate_product) {
FCODE(ucomis)(operand1, operand1); FCODE(ucomis)(operand1, operand1);
code.jp(*end); code.jp(*end);
FCODE(ucomis)(operand2, operand2); FCODE(ucomis)(operand2, operand2);
@ -806,7 +801,7 @@ static void EmitFPMulAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseScratchXmm(args[1]); const Xbyak::Xmm operand2 = ctx.reg_alloc.UseScratchXmm(args[1]);
const Xbyak::Xmm operand3 = ctx.reg_alloc.UseXmm(args[2]); const Xbyak::Xmm operand3 = ctx.reg_alloc.UseXmm(args[2]);
if constexpr (negate_product) {
if (negate_product) {
code.xorps(operand2, code.Const(xword, FP::FPInfo<FPT>::sign_mask)); code.xorps(operand2, code.Const(xword, FP::FPInfo<FPT>::sign_mask));
} }
FCODE(muls)(operand2, operand3); FCODE(muls)(operand2, operand3);
@ -857,7 +852,7 @@ void EmitX64::EmitFPMulSub64(EmitContext& ctx, IR::Inst* inst) {
template<size_t fsize> template<size_t fsize>
static void EmitFPMulX(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { static void EmitFPMulX(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
using FPT = mcl::unsigned_integer_of_size<fsize>;
using FPT = UnsignedIntegerN<fsize>;
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
@ -917,9 +912,9 @@ void EmitX64::EmitFPMulX64(EmitContext& ctx, IR::Inst* inst) {
template<size_t fsize> template<size_t fsize>
static void EmitFPRecipEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { static void EmitFPRecipEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
using FPT = mcl::unsigned_integer_of_size<fsize>;
using FPT = UnsignedIntegerN<fsize>;
if constexpr (fsize != 16) {
if (fsize != 16) {
if (ctx.HasOptimization(OptimizationFlag::Unsafe_ReducedErrorFP)) { if (ctx.HasOptimization(OptimizationFlag::Unsafe_ReducedErrorFP)) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Xmm operand = ctx.reg_alloc.UseXmm(args[0]); const Xbyak::Xmm operand = ctx.reg_alloc.UseXmm(args[0]);
@ -928,7 +923,7 @@ static void EmitFPRecipEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* i
if (code.HasHostFeature(HostFeature::AVX512_OrthoFloat)) { if (code.HasHostFeature(HostFeature::AVX512_OrthoFloat)) {
FCODE(vrcp14s)(result, operand, operand); FCODE(vrcp14s)(result, operand, operand);
} else { } else {
if constexpr (fsize == 32) {
if (fsize == 32) {
code.rcpss(result, operand); code.rcpss(result, operand);
} else { } else {
code.cvtsd2ss(result, operand); code.cvtsd2ss(result, operand);
@ -963,7 +958,7 @@ void EmitX64::EmitFPRecipEstimate64(EmitContext& ctx, IR::Inst* inst) {
template<size_t fsize> template<size_t fsize>
static void EmitFPRecipExponent(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { static void EmitFPRecipExponent(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
using FPT = mcl::unsigned_integer_of_size<fsize>;
using FPT = UnsignedIntegerN<fsize>;
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ctx.reg_alloc.HostCall(inst, args[0]); ctx.reg_alloc.HostCall(inst, args[0]);
@ -986,11 +981,11 @@ void EmitX64::EmitFPRecipExponent64(EmitContext& ctx, IR::Inst* inst) {
template<size_t fsize> template<size_t fsize>
static void EmitFPRecipStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { static void EmitFPRecipStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
using FPT = mcl::unsigned_integer_of_size<fsize>;
using FPT = UnsignedIntegerN<fsize>;
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
if constexpr (fsize != 16) {
if (fsize != 16) {
if (code.HasHostFeature(HostFeature::FMA) && ctx.HasOptimization(OptimizationFlag::Unsafe_InaccurateNaN)) { if (code.HasHostFeature(HostFeature::FMA) && ctx.HasOptimization(OptimizationFlag::Unsafe_InaccurateNaN)) {
Xbyak::Label end, fallback; Xbyak::Label end, fallback;
@ -1123,9 +1118,9 @@ void EmitX64::EmitFPRoundInt64(EmitContext& ctx, IR::Inst* inst) {
template<size_t fsize> template<size_t fsize>
static void EmitFPRSqrtEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { static void EmitFPRSqrtEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
using FPT = mcl::unsigned_integer_of_size<fsize>;
using FPT = UnsignedIntegerN<fsize>;
if constexpr (fsize != 16) {
if (fsize != 16) {
if (ctx.HasOptimization(OptimizationFlag::Unsafe_ReducedErrorFP)) { if (ctx.HasOptimization(OptimizationFlag::Unsafe_ReducedErrorFP)) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Xmm operand = ctx.reg_alloc.UseXmm(args[0]); const Xbyak::Xmm operand = ctx.reg_alloc.UseXmm(args[0]);
@ -1134,7 +1129,7 @@ static void EmitFPRSqrtEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* i
if (code.HasHostFeature(HostFeature::AVX512_OrthoFloat)) { if (code.HasHostFeature(HostFeature::AVX512_OrthoFloat)) {
FCODE(vrsqrt14s)(result, operand, operand); FCODE(vrsqrt14s)(result, operand, operand);
} else { } else {
if constexpr (fsize == 32) {
if (fsize == 32) {
code.rsqrtss(result, operand); code.rsqrtss(result, operand);
} else { } else {
code.cvtsd2ss(result, operand); code.cvtsd2ss(result, operand);
@ -1180,7 +1175,7 @@ static void EmitFPRSqrtEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* i
bool needs_fallback = false; bool needs_fallback = false;
code.L(*bad_values); code.L(*bad_values);
if constexpr (fsize == 32) {
if (fsize == 32) {
code.movd(tmp, operand); code.movd(tmp, operand);
if (!ctx.FPCR().FZ()) { if (!ctx.FPCR().FZ()) {
@ -1302,11 +1297,11 @@ void EmitX64::EmitFPRSqrtEstimate64(EmitContext& ctx, IR::Inst* inst) {
template<size_t fsize> template<size_t fsize>
static void EmitFPRSqrtStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { static void EmitFPRSqrtStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
using FPT = mcl::unsigned_integer_of_size<fsize>;
using FPT = UnsignedIntegerN<fsize>;
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
if constexpr (fsize != 16) {
if (fsize != 16) {
if (code.HasHostFeature(HostFeature::FMA | HostFeature::AVX) && ctx.HasOptimization(OptimizationFlag::Unsafe_InaccurateNaN)) { if (code.HasHostFeature(HostFeature::FMA | HostFeature::AVX) && ctx.HasOptimization(OptimizationFlag::Unsafe_InaccurateNaN)) {
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(args[0]); const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(args[0]);
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]); const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]);
@ -1485,9 +1480,8 @@ void EmitX64::EmitFPHalfToDouble(EmitContext& ctx, IR::Inst* inst) {
// Double-conversion here is acceptable as this is expanding precision. // Double-conversion here is acceptable as this is expanding precision.
code.vcvtph2ps(result, value); code.vcvtph2ps(result, value);
code.vcvtps2pd(result, result); code.vcvtps2pd(result, result);
if (ctx.FPCR().DN()) {
ForceToDefaultNaN<64>(code, result);
}
if (ctx.FPCR().DN())
ForceToDefaultNaN(code, result, 64);
ctx.reg_alloc.DefineValue(inst, result); ctx.reg_alloc.DefineValue(inst, result);
return; return;
@ -1509,9 +1503,8 @@ void EmitX64::EmitFPHalfToSingle(EmitContext& ctx, IR::Inst* inst) {
const Xbyak::Xmm value = ctx.reg_alloc.UseXmm(args[0]); const Xbyak::Xmm value = ctx.reg_alloc.UseXmm(args[0]);
code.vcvtph2ps(result, value); code.vcvtph2ps(result, value);
if (ctx.FPCR().DN()) {
ForceToDefaultNaN<32>(code, result);
}
if (ctx.FPCR().DN())
ForceToDefaultNaN(code, result, 32);
ctx.reg_alloc.DefineValue(inst, result); ctx.reg_alloc.DefineValue(inst, result);
return; return;
@ -1519,23 +1512,22 @@ void EmitX64::EmitFPHalfToSingle(EmitContext& ctx, IR::Inst* inst) {
ctx.reg_alloc.HostCall(inst, args[0]); ctx.reg_alloc.HostCall(inst, args[0]);
code.mov(code.ABI_PARAM2.cvt32(), ctx.FPCR().Value()); code.mov(code.ABI_PARAM2.cvt32(), ctx.FPCR().Value());
code.mov(code.ABI_PARAM3.cvt32(), static_cast<u32>(rounding_mode));
code.mov(code.ABI_PARAM3.cvt32(), u32(rounding_mode));
code.lea(code.ABI_PARAM4, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]); code.lea(code.ABI_PARAM4, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]);
code.CallFunction(&FP::FPConvert<u32, u16>); code.CallFunction(&FP::FPConvert<u32, u16>);
} }
void EmitX64::EmitFPSingleToDouble(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitFPSingleToDouble(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const auto rounding_mode = static_cast<FP::RoundingMode>(args[1].GetImmediateU8());
const auto rounding_mode = FP::RoundingMode(args[1].GetImmediateU8());
// We special-case the non-IEEE-defined ToOdd rounding mode. // We special-case the non-IEEE-defined ToOdd rounding mode.
if (rounding_mode == ctx.FPCR().RMode() && rounding_mode != FP::RoundingMode::ToOdd) { if (rounding_mode == ctx.FPCR().RMode() && rounding_mode != FP::RoundingMode::ToOdd) {
const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
code.cvtss2sd(result, result); code.cvtss2sd(result, result);
if (ctx.FPCR().DN()) {
ForceToDefaultNaN<64>(code, result);
}
if (ctx.FPCR().DN())
ForceToDefaultNaN(code, result, 64);
ctx.reg_alloc.DefineValue(inst, result); ctx.reg_alloc.DefineValue(inst, result);
} else { } else {
ctx.reg_alloc.HostCall(inst, args[0]); ctx.reg_alloc.HostCall(inst, args[0]);
@ -1553,12 +1545,9 @@ void EmitX64::EmitFPSingleToHalf(EmitContext& ctx, IR::Inst* inst) {
if (code.HasHostFeature(HostFeature::F16C) && !ctx.FPCR().AHP() && !ctx.FPCR().FZ16()) { if (code.HasHostFeature(HostFeature::F16C) && !ctx.FPCR().AHP() && !ctx.FPCR().FZ16()) {
const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
if (ctx.FPCR().DN()) {
ForceToDefaultNaN<32>(code, result);
}
code.vcvtps2ph(result, result, static_cast<u8>(*round_imm));
if (ctx.FPCR().DN())
ForceToDefaultNaN(code, result, 32);
code.vcvtps2ph(result, result, u8(*round_imm));
ctx.reg_alloc.DefineValue(inst, result); ctx.reg_alloc.DefineValue(inst, result);
return; return;
} }
@ -1586,21 +1575,18 @@ void EmitX64::EmitFPDoubleToHalf(EmitContext& ctx, IR::Inst* inst) {
void EmitX64::EmitFPDoubleToSingle(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitFPDoubleToSingle(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const auto rounding_mode = static_cast<FP::RoundingMode>(args[1].GetImmediateU8());
const auto rounding_mode = FP::RoundingMode(args[1].GetImmediateU8());
// We special-case the non-IEEE-defined ToOdd rounding mode. // We special-case the non-IEEE-defined ToOdd rounding mode.
if (rounding_mode == ctx.FPCR().RMode() && rounding_mode != FP::RoundingMode::ToOdd) { if (rounding_mode == ctx.FPCR().RMode() && rounding_mode != FP::RoundingMode::ToOdd) {
const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
code.cvtsd2ss(result, result); code.cvtsd2ss(result, result);
if (ctx.FPCR().DN()) {
ForceToDefaultNaN<32>(code, result);
}
if (ctx.FPCR().DN())
ForceToDefaultNaN(code, result, 32);
ctx.reg_alloc.DefineValue(inst, result); ctx.reg_alloc.DefineValue(inst, result);
} else { } else {
ctx.reg_alloc.HostCall(inst, args[0]); ctx.reg_alloc.HostCall(inst, args[0]);
code.mov(code.ABI_PARAM2.cvt32(), ctx.FPCR().Value()); code.mov(code.ABI_PARAM2.cvt32(), ctx.FPCR().Value());
code.mov(code.ABI_PARAM3.cvt32(), static_cast<u32>(rounding_mode));
code.mov(code.ABI_PARAM3.cvt32(), u32(rounding_mode));
code.lea(code.ABI_PARAM4, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]); code.lea(code.ABI_PARAM4, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]);
code.CallFunction(&FP::FPConvert<u32, u64>); code.CallFunction(&FP::FPConvert<u32, u64>);
} }
@ -1630,7 +1616,7 @@ static void EmitFPToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
const size_t fbits = args[1].GetImmediateU8(); const size_t fbits = args[1].GetImmediateU8();
const auto rounding_mode = FP::RoundingMode(args[2].GetImmediateU8()); const auto rounding_mode = FP::RoundingMode(args[2].GetImmediateU8());
if constexpr (fsize != 16) {
if (fsize != 16) {
const auto round_imm = ConvertRoundingModeToX64Immediate(rounding_mode); const auto round_imm = ConvertRoundingModeToX64Immediate(rounding_mode);
// cvttsd2si truncates during operation so rounding (and thus SSE4.1) not required // cvttsd2si truncates during operation so rounding (and thus SSE4.1) not required
@ -1640,7 +1626,7 @@ static void EmitFPToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
const Xbyak::Xmm src = ctx.reg_alloc.UseScratchXmm(args[0]); const Xbyak::Xmm src = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr().cvt64(); const Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr().cvt64();
if constexpr (fsize == 64) {
if (fsize == 64) {
if (fbits != 0) { if (fbits != 0) {
const u64 scale_factor = static_cast<u64>((fbits + 1023) << 52); const u64 scale_factor = static_cast<u64>((fbits + 1023) << 52);
code.mulsd(src, code.Const(xword, scale_factor)); code.mulsd(src, code.Const(xword, scale_factor));
@ -1662,13 +1648,13 @@ static void EmitFPToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
code.cvtss2sd(src, src); code.cvtss2sd(src, src);
} }
if constexpr (isize == 64) {
if (isize == 64) {
const Xbyak::Xmm scratch = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm scratch = ctx.reg_alloc.ScratchXmm();
if (!unsigned_) { if (!unsigned_) {
SharedLabel saturate_max = GenSharedLabel(), end = GenSharedLabel(); SharedLabel saturate_max = GenSharedLabel(), end = GenSharedLabel();
ZeroIfNaN<64>(code, src, scratch);
ZeroIfNaN(code, src, scratch, 64);
code.movsd(scratch, code.Const(xword, f64_max_s64_lim)); code.movsd(scratch, code.Const(xword, f64_max_s64_lim));
code.comisd(scratch, src); code.comisd(scratch, src);
@ -1706,11 +1692,11 @@ static void EmitFPToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
code.sar(result2, 63); code.sar(result2, 63);
code.or_(result, result2); code.or_(result, result2);
} }
} else if constexpr (isize == 32) {
} else if (isize == 32) {
if (!unsigned_) { if (!unsigned_) {
const Xbyak::Xmm scratch = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm scratch = ctx.reg_alloc.ScratchXmm();
ZeroIfNaN<64>(code, src, scratch);
ZeroIfNaN(code, src, scratch, 64);
code.minsd(src, code.Const(xword, f64_max_s32)); code.minsd(src, code.Const(xword, f64_max_s32));
// maxsd not required as cvttsd2si results in 0x8000'0000 when out of range // maxsd not required as cvttsd2si results in 0x8000'0000 when out of range
code.cvttsd2si(result.cvt32(), src); // 32 bit gpr code.cvttsd2si(result.cvt32(), src); // 32 bit gpr
@ -1723,7 +1709,7 @@ static void EmitFPToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
} else { } else {
const Xbyak::Xmm scratch = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm scratch = ctx.reg_alloc.ScratchXmm();
ZeroIfNaN<64>(code, src, scratch);
ZeroIfNaN(code, src, scratch, 64);
code.maxsd(src, code.Const(xword, unsigned_ ? f64_min_u16 : f64_min_s16)); code.maxsd(src, code.Const(xword, unsigned_ ? f64_min_u16 : f64_min_s16));
code.minsd(src, code.Const(xword, unsigned_ ? f64_max_u16 : f64_max_s16)); code.minsd(src, code.Const(xword, unsigned_ ? f64_max_u16 : f64_max_s16));
code.cvttsd2si(result, src); // 64 bit gpr code.cvttsd2si(result, src); // 64 bit gpr

4
src/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.cpp.inc

@ -223,7 +223,7 @@ void AxxEmitX64::EmitExclusiveReadMemory(AxxEmitContext& ctx, IR::Inst* inst) {
const bool ordered = IsOrdered(args[2].GetImmediateAccType()); const bool ordered = IsOrdered(args[2].GetImmediateAccType());
if constexpr (bitsize != 128) { if constexpr (bitsize != 128) {
using T = mcl::unsigned_integer_of_size<bitsize>;
using T = UnsignedIntegerN<bitsize>;
ctx.reg_alloc.HostCall(inst, {}, args[1]); ctx.reg_alloc.HostCall(inst, {}, args[1]);
@ -290,7 +290,7 @@ void AxxEmitX64::EmitExclusiveWriteMemory(AxxEmitContext& ctx, IR::Inst* inst) {
code.mov(code.byte[code.ABI_JIT_PTR + offsetof(AxxJitState, exclusive_state)], u8(0)); code.mov(code.byte[code.ABI_JIT_PTR + offsetof(AxxJitState, exclusive_state)], u8(0));
code.mov(code.ABI_PARAM1, reinterpret_cast<u64>(&conf)); code.mov(code.ABI_PARAM1, reinterpret_cast<u64>(&conf));
if constexpr (bitsize != 128) { if constexpr (bitsize != 128) {
using T = mcl::unsigned_integer_of_size<bitsize>;
using T = UnsignedIntegerN<bitsize>;
code.CallLambda( code.CallLambda(
[](AxxUserConfig& conf, Axx::VAddr vaddr, T value) -> u32 { [](AxxUserConfig& conf, Axx::VAddr vaddr, T value) -> u32 {

5
src/dynarmic/src/dynarmic/backend/x64/emit_x64_saturation.cpp

@ -11,7 +11,6 @@
#include "dynarmic/common/assert.h" #include "dynarmic/common/assert.h"
#include <mcl/bit/bit_field.hpp> #include <mcl/bit/bit_field.hpp>
#include "dynarmic/common/common_types.h" #include "dynarmic/common/common_types.h"
#include <mcl/type_traits/integer_of_size.hpp>
#include "dynarmic/backend/x64/block_of_code.h" #include "dynarmic/backend/x64/block_of_code.h"
#include "dynarmic/backend/x64/emit_x64.h" #include "dynarmic/backend/x64/emit_x64.h"
@ -38,7 +37,7 @@ void EmitSignedSaturatedOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst)
Xbyak::Reg addend = ctx.reg_alloc.UseGpr(args[1]).changeBit(size); Xbyak::Reg addend = ctx.reg_alloc.UseGpr(args[1]).changeBit(size);
Xbyak::Reg overflow = ctx.reg_alloc.ScratchGpr().changeBit(size); Xbyak::Reg overflow = ctx.reg_alloc.ScratchGpr().changeBit(size);
constexpr u64 int_max = static_cast<u64>((std::numeric_limits<mcl::signed_integer_of_size<size>>::max)());
constexpr u64 int_max = static_cast<u64>((std::numeric_limits<SignedIntegerN<size>>::max)());
if constexpr (size < 64) { if constexpr (size < 64) {
code.xor_(overflow.cvt32(), overflow.cvt32()); code.xor_(overflow.cvt32(), overflow.cvt32());
code.bt(result.cvt32(), size - 1); code.bt(result.cvt32(), size - 1);
@ -82,7 +81,7 @@ void EmitUnsignedSaturatedOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst
Xbyak::Reg op_result = ctx.reg_alloc.UseScratchGpr(args[0]).changeBit(size); Xbyak::Reg op_result = ctx.reg_alloc.UseScratchGpr(args[0]).changeBit(size);
Xbyak::Reg addend = ctx.reg_alloc.UseScratchGpr(args[1]).changeBit(size); Xbyak::Reg addend = ctx.reg_alloc.UseScratchGpr(args[1]).changeBit(size);
constexpr u64 boundary = op == Op::Add ? (std::numeric_limits<mcl::unsigned_integer_of_size<size>>::max)() : 0;
constexpr u64 boundary = op == Op::Add ? (std::numeric_limits<UnsignedIntegerN<size>>::max)() : 0;
if constexpr (op == Op::Add) { if constexpr (op == Op::Add) {
code.add(op_result, addend); code.add(op_result, addend);

37
src/dynarmic/src/dynarmic/backend/x64/emit_x64_vector_floating_point.cpp

@ -20,7 +20,6 @@
#include <mcl/mp/typelist/list.hpp> #include <mcl/mp/typelist/list.hpp>
#include <mcl/mp/typelist/lower_to_tuple.hpp> #include <mcl/mp/typelist/lower_to_tuple.hpp>
#include <mcl/type_traits/function_info.hpp> #include <mcl/type_traits/function_info.hpp>
#include <mcl/type_traits/integer_of_size.hpp>
#include <xbyak/xbyak.h> #include <xbyak/xbyak.h>
#include "dynarmic/backend/x64/abi.h" #include "dynarmic/backend/x64/abi.h"
@ -76,7 +75,7 @@ void MaybeStandardFPSCRValue(BlockOfCode& code, EmitContext& ctx, bool fpcr_cont
template<size_t fsize, template<typename> class Indexer, size_t narg> template<size_t fsize, template<typename> class Indexer, size_t narg>
struct NaNHandler { struct NaNHandler {
public: public:
using FPT = mcl::unsigned_integer_of_size<fsize>;
using FPT = UnsignedIntegerN<fsize>;
using function_type = void (*)(std::array<VectorArray<FPT>, narg>&, FP::FPCR); using function_type = void (*)(std::array<VectorArray<FPT>, narg>&, FP::FPCR);
@ -158,33 +157,33 @@ Xbyak::Address GetVectorOf(BlockOfCode& code) {
template<size_t fsize> template<size_t fsize>
Xbyak::Address GetNaNVector(BlockOfCode& code) { Xbyak::Address GetNaNVector(BlockOfCode& code) {
using FPT = mcl::unsigned_integer_of_size<fsize>;
using FPT = UnsignedIntegerN<fsize>;
return GetVectorOf<fsize, FP::FPInfo<FPT>::DefaultNaN()>(code); return GetVectorOf<fsize, FP::FPInfo<FPT>::DefaultNaN()>(code);
} }
template<size_t fsize> template<size_t fsize>
Xbyak::Address GetNegativeZeroVector(BlockOfCode& code) { Xbyak::Address GetNegativeZeroVector(BlockOfCode& code) {
using FPT = mcl::unsigned_integer_of_size<fsize>;
using FPT = UnsignedIntegerN<fsize>;
return GetVectorOf<fsize, FP::FPInfo<FPT>::Zero(true)>(code); return GetVectorOf<fsize, FP::FPInfo<FPT>::Zero(true)>(code);
} }
template<size_t fsize> template<size_t fsize>
Xbyak::Address GetNonSignMaskVector(BlockOfCode& code) { Xbyak::Address GetNonSignMaskVector(BlockOfCode& code) {
using FPT = mcl::unsigned_integer_of_size<fsize>;
using FPT = UnsignedIntegerN<fsize>;
constexpr FPT non_sign_mask = FP::FPInfo<FPT>::exponent_mask | FP::FPInfo<FPT>::mantissa_mask; constexpr FPT non_sign_mask = FP::FPInfo<FPT>::exponent_mask | FP::FPInfo<FPT>::mantissa_mask;
return GetVectorOf<fsize, non_sign_mask>(code); return GetVectorOf<fsize, non_sign_mask>(code);
} }
template<size_t fsize> template<size_t fsize>
Xbyak::Address GetSmallestNormalVector(BlockOfCode& code) { Xbyak::Address GetSmallestNormalVector(BlockOfCode& code) {
using FPT = mcl::unsigned_integer_of_size<fsize>;
using FPT = UnsignedIntegerN<fsize>;
constexpr FPT smallest_normal_number = FP::FPValue<FPT, false, FP::FPInfo<FPT>::exponent_min, 1>(); constexpr FPT smallest_normal_number = FP::FPValue<FPT, false, FP::FPInfo<FPT>::exponent_min, 1>();
return GetVectorOf<fsize, smallest_normal_number>(code); return GetVectorOf<fsize, smallest_normal_number>(code);
} }
template<size_t fsize, bool sign, int exponent, mcl::unsigned_integer_of_size<fsize> value>
template<size_t fsize, bool sign, int exponent, UnsignedIntegerN<fsize> value>
Xbyak::Address GetVectorOf(BlockOfCode& code) { Xbyak::Address GetVectorOf(BlockOfCode& code) {
using FPT = mcl::unsigned_integer_of_size<fsize>;
using FPT = UnsignedIntegerN<fsize>;
return GetVectorOf<fsize, FP::FPValue<FPT, sign, exponent, value>()>(code); return GetVectorOf<fsize, FP::FPValue<FPT, sign, exponent, value>()>(code);
} }
@ -1085,7 +1084,7 @@ static void EmitFPVectorMinMaxNumeric(BlockOfCode& code, EmitContext& ctx, IR::I
if (code.HasHostFeature(HostFeature::AVX)) { if (code.HasHostFeature(HostFeature::AVX)) {
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] { MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
using FPT = mcl::unsigned_integer_of_size<fsize>;
using FPT = UnsignedIntegerN<fsize>;
// result = xmm_a == SNaN || xmm_b == QNaN // result = xmm_a == SNaN || xmm_b == QNaN
{ {
@ -1158,7 +1157,7 @@ static void EmitFPVectorMinMaxNumeric(BlockOfCode& code, EmitContext& ctx, IR::I
} }
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] { MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
using FPT = mcl::unsigned_integer_of_size<fsize>;
using FPT = UnsignedIntegerN<fsize>;
// result = xmm_a == SNaN || xmm_b == QNaN // result = xmm_a == SNaN || xmm_b == QNaN
{ {
@ -1314,7 +1313,7 @@ static void EmitFPVectorMulAddFallback(VectorArray<FPT>& result, const VectorArr
template<size_t fsize> template<size_t fsize>
void EmitFPVectorMulAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { void EmitFPVectorMulAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
using FPT = mcl::unsigned_integer_of_size<fsize>;
using FPT = UnsignedIntegerN<fsize>;
const auto fallback_fn = [](VectorArray<FPT>& result, const VectorArray<FPT>& addend, const VectorArray<FPT>& op1, const VectorArray<FPT>& op2, FP::FPCR fpcr, FP::FPSR& fpsr) { const auto fallback_fn = [](VectorArray<FPT>& result, const VectorArray<FPT>& addend, const VectorArray<FPT>& op1, const VectorArray<FPT>& op2, FP::FPCR fpcr, FP::FPSR& fpsr) {
for (size_t i = 0; i < result.size(); i++) { for (size_t i = 0; i < result.size(); i++) {
@ -1425,7 +1424,7 @@ void EmitX64::EmitFPVectorMulAdd64(EmitContext& ctx, IR::Inst* inst) {
template<size_t fsize> template<size_t fsize>
static void EmitFPVectorMulX(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { static void EmitFPVectorMulX(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
using FPT = mcl::unsigned_integer_of_size<fsize>;
using FPT = UnsignedIntegerN<fsize>;
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const bool fpcr_controlled = args[2].GetImmediateU1(); const bool fpcr_controlled = args[2].GetImmediateU1();
@ -1491,7 +1490,7 @@ void EmitX64::EmitFPVectorMulX64(EmitContext& ctx, IR::Inst* inst) {
template<size_t fsize> template<size_t fsize>
void FPVectorNeg(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { void FPVectorNeg(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
using FPT = mcl::unsigned_integer_of_size<fsize>;
using FPT = UnsignedIntegerN<fsize>;
constexpr FPT sign_mask = FP::FPInfo<FPT>::sign_mask; constexpr FPT sign_mask = FP::FPInfo<FPT>::sign_mask;
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
@ -1544,7 +1543,7 @@ void EmitX64::EmitFPVectorPairedAddLower64(EmitContext& ctx, IR::Inst* inst) {
template<size_t fsize> template<size_t fsize>
static void EmitRecipEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { static void EmitRecipEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
using FPT = mcl::unsigned_integer_of_size<fsize>;
using FPT = UnsignedIntegerN<fsize>;
if constexpr (fsize != 16) { if constexpr (fsize != 16) {
if (ctx.HasOptimization(OptimizationFlag::Unsafe_ReducedErrorFP)) { if (ctx.HasOptimization(OptimizationFlag::Unsafe_ReducedErrorFP)) {
@ -1590,7 +1589,7 @@ void EmitX64::EmitFPVectorRecipEstimate64(EmitContext& ctx, IR::Inst* inst) {
template<size_t fsize> template<size_t fsize>
static void EmitRecipStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { static void EmitRecipStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
using FPT = mcl::unsigned_integer_of_size<fsize>;
using FPT = UnsignedIntegerN<fsize>;
const auto fallback_fn = [](VectorArray<FPT>& result, const VectorArray<FPT>& op1, const VectorArray<FPT>& op2, FP::FPCR fpcr, FP::FPSR& fpsr) { const auto fallback_fn = [](VectorArray<FPT>& result, const VectorArray<FPT>& op1, const VectorArray<FPT>& op2, FP::FPCR fpcr, FP::FPSR& fpsr) {
for (size_t i = 0; i < result.size(); i++) { for (size_t i = 0; i < result.size(); i++) {
@ -1760,7 +1759,7 @@ void EmitX64::EmitFPVectorRoundInt64(EmitContext& ctx, IR::Inst* inst) {
template<size_t fsize> template<size_t fsize>
static void EmitRSqrtEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { static void EmitRSqrtEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
using FPT = mcl::unsigned_integer_of_size<fsize>;
using FPT = UnsignedIntegerN<fsize>;
const auto fallback_fn = [](VectorArray<FPT>& result, const VectorArray<FPT>& operand, FP::FPCR fpcr, FP::FPSR& fpsr) { const auto fallback_fn = [](VectorArray<FPT>& result, const VectorArray<FPT>& operand, FP::FPCR fpcr, FP::FPSR& fpsr) {
for (size_t i = 0; i < result.size(); i++) { for (size_t i = 0; i < result.size(); i++) {
@ -1852,7 +1851,7 @@ void EmitX64::EmitFPVectorRSqrtEstimate64(EmitContext& ctx, IR::Inst* inst) {
template<size_t fsize> template<size_t fsize>
static void EmitRSqrtStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { static void EmitRSqrtStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
using FPT = mcl::unsigned_integer_of_size<fsize>;
using FPT = UnsignedIntegerN<fsize>;
const auto fallback_fn = [](VectorArray<FPT>& result, const VectorArray<FPT>& op1, const VectorArray<FPT>& op2, FP::FPCR fpcr, FP::FPSR& fpsr) { const auto fallback_fn = [](VectorArray<FPT>& result, const VectorArray<FPT>& op1, const VectorArray<FPT>& op2, FP::FPCR fpcr, FP::FPSR& fpsr) {
for (size_t i = 0; i < result.size(); i++) { for (size_t i = 0; i < result.size(); i++) {
@ -2126,7 +2125,7 @@ void EmitFPVectorToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
FCODE(orp)(src, exceed_unsigned); FCODE(orp)(src, exceed_unsigned);
} }
} else { } else {
using FPT = mcl::unsigned_integer_of_size<fsize>; // WORKAROUND: For issue 678 on MSVC
using FPT = UnsignedIntegerN<fsize>; // WORKAROUND: For issue 678 on MSVC
constexpr u64 integer_max = FPT((std::numeric_limits<std::conditional_t<unsigned_, FPT, std::make_signed_t<FPT>>>::max)()); constexpr u64 integer_max = FPT((std::numeric_limits<std::conditional_t<unsigned_, FPT, std::make_signed_t<FPT>>>::max)());
code.movaps(xmm0, GetVectorOf<fsize, float_upper_limit_signed>(code)); code.movaps(xmm0, GetVectorOf<fsize, float_upper_limit_signed>(code));
@ -2150,7 +2149,7 @@ void EmitFPVectorToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
mp::lift_value<FP::RoundingMode::ToNearest_TieAwayFromZero>>; mp::lift_value<FP::RoundingMode::ToNearest_TieAwayFromZero>>;
static const auto lut = Common::GenerateLookupTableFromList([]<typename I>(I) { static const auto lut = Common::GenerateLookupTableFromList([]<typename I>(I) {
using FPT = mcl::unsigned_integer_of_size<fsize>; // WORKAROUND: For issue 678 on MSVC
using FPT = UnsignedIntegerN<fsize>; // WORKAROUND: For issue 678 on MSVC
return std::pair{ return std::pair{
mp::lower_to_tuple_v<I>, mp::lower_to_tuple_v<I>,
Common::FptrCast([](VectorArray<FPT>& output, const VectorArray<FPT>& input, FP::FPCR fpcr, FP::FPSR& fpsr) { Common::FptrCast([](VectorArray<FPT>& output, const VectorArray<FPT>& input, FP::FPCR fpcr, FP::FPSR& fpsr) {

11
src/dynarmic/src/dynarmic/common/fp/util.h

@ -6,6 +6,7 @@
#pragma once #pragma once
#include <optional> #include <optional>
#include <cstdint>
#include "dynarmic/common/fp/fpcr.h" #include "dynarmic/common/fp/fpcr.h"
#include "dynarmic/common/fp/info.h" #include "dynarmic/common/fp/info.h"
@ -96,4 +97,14 @@ constexpr std::optional<FPT> ProcessNaNs(FPT a, FPT b, FPT c) {
return std::nullopt; return std::nullopt;
} }
namespace Detail {
template<std::size_t size> struct IntegerOfSize {};
template<> struct IntegerOfSize<8> { using U = std::uint8_t, S = std::int8_t; };
template<> struct IntegerOfSize<16> { using U = std::uint16_t, S = std::int16_t; };
template<> struct IntegerOfSize<32> { using U = std::uint32_t, S = std::int32_t; };
template<> struct IntegerOfSize<64> { using U = std::uint64_t, S = std::int64_t; };
}
using UnsignedIntegerN = typename Detail::IntegerOfSize<size>::U;
using SignedIntegerN = typename Detail::IntegerOfSize<size>::S;
} // namespace Dynarmic::FP } // namespace Dynarmic::FP
Loading…
Cancel
Save