diff --git a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_vector.cpp b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_vector.cpp index c6b0e3b864..d2d2dd5bd7 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_vector.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_vector.cpp @@ -3521,48 +3521,47 @@ void EmitX64::EmitVectorReverseElementsInHalfGroups8(EmitContext& ctx, IR::Inst* void EmitX64::EmitVectorReverseElementsInWordGroups8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); - - // TODO: PSHUFB - - code.movdqa(tmp, data); - code.psllw(tmp, 8); - code.psrlw(data, 8); - code.por(data, tmp); - code.pshuflw(data, data, 0b10110001); - code.pshufhw(data, data, 0b10110001); - + if (code.HasHostFeature(HostFeature::AVX)) { + code.vpshufb(data, data, code.Const(xword, 0x0405060700010203, 0x0c0d0e0f08090a0b)); + } else if (code.HasHostFeature(HostFeature::SSSE3)) { + code.pshufb(data, code.Const(xword, 0x0405060700010203, 0x0c0d0e0f08090a0b)); + } else { + const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); + code.movdqa(tmp, data); + code.psllw(tmp, 8); + code.psrlw(data, 8); + code.por(data, tmp); + code.pshuflw(data, data, 0b10110001); + code.pshufhw(data, data, 0b10110001); + } ctx.reg_alloc.DefineValue(inst, data); } void EmitX64::EmitVectorReverseElementsInWordGroups16(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(args[0]); - code.pshuflw(data, data, 0b10110001); code.pshufhw(data, data, 0b10110001); - ctx.reg_alloc.DefineValue(inst, data); } void EmitX64::EmitVectorReverseElementsInLongGroups8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); - - // TODO: PSHUFB - - code.movdqa(tmp, data); - code.psllw(tmp, 8); - code.psrlw(data, 8); - code.por(data, tmp); - code.pshuflw(data, data, 0b00011011); - code.pshufhw(data, data, 0b00011011); - + if (code.HasHostFeature(HostFeature::AVX)) { + code.vpshufb(data, data, code.Const(xword, 0x0001020304050607, 0x08090a0b0c0d0e0f)); + } else if (code.HasHostFeature(HostFeature::SSSE3)) { + code.pshufb(data, code.Const(xword, 0x0001020304050607, 0x08090a0b0c0d0e0f)); + } else { + const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); + code.movdqa(tmp, data); + code.psllw(tmp, 8); + code.psrlw(data, 8); + code.por(data, tmp); + code.pshuflw(data, data, 0b00011011); + code.pshufhw(data, data, 0b00011011); + } ctx.reg_alloc.DefineValue(inst, data); } diff --git a/src/dynarmic/tests/A64/a64.cpp b/src/dynarmic/tests/A64/a64.cpp index bc51eca164..5eddf8f7b2 100644 --- a/src/dynarmic/tests/A64/a64.cpp +++ b/src/dynarmic/tests/A64/a64.cpp @@ -91,6 +91,28 @@ TEST_CASE("A64: CLZ", "[a64]") { REQUIRE(jit.GetVector(5) == Vector{0x0, 0x0000001e0000001f}); } +TEST_CASE("A64: VREV", "[a64]") { + A64TestEnv env; + A64::UserConfig jit_user_config{}; + jit_user_config.callbacks = &env; + A64::Jit jit{jit_user_config}; + oaknut::VectorCodeGenerator code{env.code_mem, nullptr}; + code.REV32(V0.B16(), V5.B16()); + code.REV32(V1.H8(), V5.H8()); + code.REV64(V2.B16(), V5.B16()); + code.REV64(V3.H8(), V5.H8()); + code.REV64(V4.S4(), V5.S4()); + jit.SetPC(0); + jit.SetVector(5, {0x1020304050607080, 0x90A0B0C0D0E0F000}); + env.ticks_left = env.code_mem.size(); + CheckedRun([&]() { jit.Run(); }); + REQUIRE(jit.GetVector(0) == Vector{0x4030201080706050, 0xc0b0a09000f0e0d0}); + REQUIRE(jit.GetVector(1) == Vector{0x3040102070805060, 0xb0c090a0f000d0e0}); + REQUIRE(jit.GetVector(2) == Vector{0x8070605040302010, 0x00f0e0d0c0b0a090}); + REQUIRE(jit.GetVector(3) == Vector{0x7080506030401020, 0xf000d0e0b0c090a0}); + REQUIRE(jit.GetVector(4) == Vector{0x5060708010203040, 0xd0e0f00090a0b0c0}); +} + TEST_CASE("A64: UADDL{V,P}", "[a64]") { A64TestEnv env; A64::UserConfig jit_user_config{};