Browse Source

[dynarmic] AVX512CD impl for lzcnt16 (#3499)

dont have AVX512CD to verify the change; but should[tm] work

Signed-off-by: lizzie <lizzie@eden-emu.dev>
Reviewed-on: https://git.eden-emu.dev/eden-emu/eden/pulls/3499
Reviewed-by: DraVee <dravee@eden-emu.dev>
Co-authored-by: lizzie <lizzie@eden-emu.dev>
Co-committed-by: lizzie <lizzie@eden-emu.dev>
pull/3526/head
lizzie 2 days ago
committed by crueter
parent
commit
850fc372b7
No known key found for this signature in database GPG Key ID: 425ACD2D4830EBC6
  1. 16
      src/dynarmic/src/dynarmic/backend/x64/emit_x64_vector.cpp

16
src/dynarmic/src/dynarmic/backend/x64/emit_x64_vector.cpp

@ -927,7 +927,21 @@ void EmitX64::EmitVectorCountLeadingZeros8(EmitContext& ctx, IR::Inst* inst) {
} }
void EmitX64::EmitVectorCountLeadingZeros16(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorCountLeadingZeros16(EmitContext& ctx, IR::Inst* inst) {
if (code.HasHostFeature(HostFeature::AVX)) {
if (code.HasHostFeature(HostFeature::AVX512_Ortho | HostFeature::AVX512CD)) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto const tmp0 = ctx.reg_alloc.UseScratchXmm(code, args[0]);
auto const tmp1 = ctx.reg_alloc.ScratchXmm(code);
// 11-latency: unpack 16-bit(128-bit) XMM into 32-bit(256-bit) YMM vector
// then just lzcnt that, pack it back (unsigned) to 16-bit
// then subtract 16-bit cuz of the zext32
/*4*/ code.vpmovzxwd(tmp0.cvt256(), tmp0);
/*4*/ code.vplzcntd(tmp0.cvt256(), tmp0.cvt256());
/*1*/ code.vpxor(tmp1, tmp1, tmp1);
/*1*/ code.vpackusdw(tmp0, tmp0, tmp1);
/*1*/ code.vpaddw(tmp0, tmp0, code.BConst<16>(xword, 65520));
/*4*/ code.vzeroupper();
ctx.reg_alloc.DefineValue(code, inst, tmp0);
} else if (code.HasHostFeature(HostFeature::AVX)) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto const data = ctx.reg_alloc.UseScratchXmm(code, args[0]); auto const data = ctx.reg_alloc.UseScratchXmm(code, args[0]);
auto const result = ctx.reg_alloc.ScratchXmm(code); auto const result = ctx.reg_alloc.ScratchXmm(code);

Loading…
Cancel
Save