From 850fc372b787662c1fe18e3800a500983a754bb5 Mon Sep 17 00:00:00 2001 From: lizzie Date: Wed, 11 Feb 2026 19:36:23 +0100 Subject: [PATCH] [dynarmic] AVX512CD impl for lzcnt16 (#3499) dont have AVX512CD to verify the change; but should[tm] work Signed-off-by: lizzie Reviewed-on: https://git.eden-emu.dev/eden-emu/eden/pulls/3499 Reviewed-by: DraVee Co-authored-by: lizzie Co-committed-by: lizzie --- .../src/dynarmic/backend/x64/emit_x64_vector.cpp | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_vector.cpp b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_vector.cpp index 55bd0abb5d..5c5b376a8b 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_vector.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_vector.cpp @@ -927,7 +927,21 @@ void EmitX64::EmitVectorCountLeadingZeros8(EmitContext& ctx, IR::Inst* inst) { } void EmitX64::EmitVectorCountLeadingZeros16(EmitContext& ctx, IR::Inst* inst) { - if (code.HasHostFeature(HostFeature::AVX)) { + if (code.HasHostFeature(HostFeature::AVX512_Ortho | HostFeature::AVX512CD)) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + auto const tmp0 = ctx.reg_alloc.UseScratchXmm(code, args[0]); + auto const tmp1 = ctx.reg_alloc.ScratchXmm(code); + // 11-latency: unpack 16-bit(128-bit) XMM into 32-bit(256-bit) YMM vector + // then just lzcnt that, pack it back (unsigned) to 16-bit + // then subtract 16-bit cuz of the zext32 + /*4*/ code.vpmovzxwd(tmp0.cvt256(), tmp0); + /*4*/ code.vplzcntd(tmp0.cvt256(), tmp0.cvt256()); + /*1*/ code.vpxor(tmp1, tmp1, tmp1); + /*1*/ code.vpackusdw(tmp0, tmp0, tmp1); + /*1*/ code.vpaddw(tmp0, tmp0, code.BConst<16>(xword, 65520)); + /*4*/ code.vzeroupper(); + ctx.reg_alloc.DefineValue(code, inst, tmp0); + } else if (code.HasHostFeature(HostFeature::AVX)) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto const data = ctx.reg_alloc.UseScratchXmm(code, args[0]); auto const result = ctx.reg_alloc.ScratchXmm(code);