Browse Source

[dynarmic] use (v)pshufb (SSSE3+) for VREV32/64 emits (#2851)

Signed-off-by: lizzie <lizzie@eden-emu.dev>
Reviewed-on: https://git.eden-emu.dev/eden-emu/eden/pulls/2851
Reviewed-by: crueter <crueter@eden-emu.dev>
Reviewed-by: MaranBr <maranbr@eden-emu.dev>
Reviewed-by: Caio Oliveira <caiooliveirafarias0@gmail.com>
Co-authored-by: lizzie <lizzie@eden-emu.dev>
Co-committed-by: lizzie <lizzie@eden-emu.dev>
pull/2892/head
lizzie 2 months ago
committed by crueter
parent
commit
e4b0c03a22
No known key found for this signature in database GPG Key ID: 425ACD2D4830EBC6
  1. 25
      src/dynarmic/src/dynarmic/backend/x64/emit_x64_vector.cpp
  2. 22
      src/dynarmic/tests/A64/a64.cpp

25
src/dynarmic/src/dynarmic/backend/x64/emit_x64_vector.cpp

@ -3521,48 +3521,47 @@ void EmitX64::EmitVectorReverseElementsInHalfGroups8(EmitContext& ctx, IR::Inst*
void EmitX64::EmitVectorReverseElementsInWordGroups8(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorReverseElementsInWordGroups8(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(args[0]); const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(args[0]);
if (code.HasHostFeature(HostFeature::AVX)) {
code.vpshufb(data, data, code.Const(xword, 0x0405060700010203, 0x0c0d0e0f08090a0b));
} else if (code.HasHostFeature(HostFeature::SSSE3)) {
code.pshufb(data, code.Const(xword, 0x0405060700010203, 0x0c0d0e0f08090a0b));
} else {
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
// TODO: PSHUFB
code.movdqa(tmp, data); code.movdqa(tmp, data);
code.psllw(tmp, 8); code.psllw(tmp, 8);
code.psrlw(data, 8); code.psrlw(data, 8);
code.por(data, tmp); code.por(data, tmp);
code.pshuflw(data, data, 0b10110001); code.pshuflw(data, data, 0b10110001);
code.pshufhw(data, data, 0b10110001); code.pshufhw(data, data, 0b10110001);
}
ctx.reg_alloc.DefineValue(inst, data); ctx.reg_alloc.DefineValue(inst, data);
} }
void EmitX64::EmitVectorReverseElementsInWordGroups16(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorReverseElementsInWordGroups16(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(args[0]); const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(args[0]);
code.pshuflw(data, data, 0b10110001); code.pshuflw(data, data, 0b10110001);
code.pshufhw(data, data, 0b10110001); code.pshufhw(data, data, 0b10110001);
ctx.reg_alloc.DefineValue(inst, data); ctx.reg_alloc.DefineValue(inst, data);
} }
void EmitX64::EmitVectorReverseElementsInLongGroups8(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorReverseElementsInLongGroups8(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(args[0]); const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(args[0]);
if (code.HasHostFeature(HostFeature::AVX)) {
code.vpshufb(data, data, code.Const(xword, 0x0001020304050607, 0x08090a0b0c0d0e0f));
} else if (code.HasHostFeature(HostFeature::SSSE3)) {
code.pshufb(data, code.Const(xword, 0x0001020304050607, 0x08090a0b0c0d0e0f));
} else {
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
// TODO: PSHUFB
code.movdqa(tmp, data); code.movdqa(tmp, data);
code.psllw(tmp, 8); code.psllw(tmp, 8);
code.psrlw(data, 8); code.psrlw(data, 8);
code.por(data, tmp); code.por(data, tmp);
code.pshuflw(data, data, 0b00011011); code.pshuflw(data, data, 0b00011011);
code.pshufhw(data, data, 0b00011011); code.pshufhw(data, data, 0b00011011);
}
ctx.reg_alloc.DefineValue(inst, data); ctx.reg_alloc.DefineValue(inst, data);
} }

22
src/dynarmic/tests/A64/a64.cpp

@ -91,6 +91,28 @@ TEST_CASE("A64: CLZ", "[a64]") {
REQUIRE(jit.GetVector(5) == Vector{0x0, 0x0000001e0000001f}); REQUIRE(jit.GetVector(5) == Vector{0x0, 0x0000001e0000001f});
} }
TEST_CASE("A64: VREV", "[a64]") {
A64TestEnv env;
A64::UserConfig jit_user_config{};
jit_user_config.callbacks = &env;
A64::Jit jit{jit_user_config};
oaknut::VectorCodeGenerator code{env.code_mem, nullptr};
code.REV32(V0.B16(), V5.B16());
code.REV32(V1.H8(), V5.H8());
code.REV64(V2.B16(), V5.B16());
code.REV64(V3.H8(), V5.H8());
code.REV64(V4.S4(), V5.S4());
jit.SetPC(0);
jit.SetVector(5, {0x1020304050607080, 0x90A0B0C0D0E0F000});
env.ticks_left = env.code_mem.size();
CheckedRun([&]() { jit.Run(); });
REQUIRE(jit.GetVector(0) == Vector{0x4030201080706050, 0xc0b0a09000f0e0d0});
REQUIRE(jit.GetVector(1) == Vector{0x3040102070805060, 0xb0c090a0f000d0e0});
REQUIRE(jit.GetVector(2) == Vector{0x8070605040302010, 0x00f0e0d0c0b0a090});
REQUIRE(jit.GetVector(3) == Vector{0x7080506030401020, 0xf000d0e0b0c090a0});
REQUIRE(jit.GetVector(4) == Vector{0x5060708010203040, 0xd0e0f00090a0b0c0});
}
TEST_CASE("A64: UADDL{V,P}", "[a64]") { TEST_CASE("A64: UADDL{V,P}", "[a64]") {
A64TestEnv env; A64TestEnv env;
A64::UserConfig jit_user_config{}; A64::UserConfig jit_user_config{};

Loading…
Cancel
Save