|
|
|
@ -202,7 +202,7 @@ template<std::size_t bitsize> |
|
|
|
const void* EmitReadMemoryMov(BlockOfCode& code, int value_idx, const Xbyak::RegExp& addr, bool ordered) { |
|
|
|
if (ordered) { |
|
|
|
if constexpr (bitsize != 128) { |
|
|
|
code.xor_(Xbyak::Reg32{value_idx}, Xbyak::Reg32{value_idx}); |
|
|
|
code.xor_(Xbyak::Reg32(value_idx), Xbyak::Reg32(value_idx)); |
|
|
|
} else { |
|
|
|
code.xor_(eax, eax); |
|
|
|
code.xor_(ebx, ebx); |
|
|
|
@ -214,30 +214,30 @@ const void* EmitReadMemoryMov(BlockOfCode& code, int value_idx, const Xbyak::Reg |
|
|
|
switch (bitsize) { |
|
|
|
case 8: |
|
|
|
code.lock(); |
|
|
|
code.xadd(code.byte[addr], Xbyak::Reg32{value_idx}.cvt8()); |
|
|
|
code.xadd(code.byte[addr], Xbyak::Reg32(value_idx).cvt8()); |
|
|
|
break; |
|
|
|
case 16: |
|
|
|
code.lock(); |
|
|
|
code.xadd(word[addr], Xbyak::Reg16{value_idx}); |
|
|
|
code.xadd(word[addr], Xbyak::Reg64(value_idx).cvt16()); |
|
|
|
break; |
|
|
|
case 32: |
|
|
|
code.lock(); |
|
|
|
code.xadd(dword[addr], Xbyak::Reg32{value_idx}); |
|
|
|
code.xadd(dword[addr], Xbyak::Reg64(value_idx).cvt32()); |
|
|
|
break; |
|
|
|
case 64: |
|
|
|
code.lock(); |
|
|
|
code.xadd(qword[addr], Xbyak::Reg64{value_idx}); |
|
|
|
code.xadd(qword[addr], Xbyak::Reg64(value_idx)); |
|
|
|
break; |
|
|
|
case 128: |
|
|
|
code.lock(); |
|
|
|
code.cmpxchg16b(xword[addr]); |
|
|
|
if (code.HasHostFeature(HostFeature::SSE41)) { |
|
|
|
code.movq(Xbyak::Xmm{value_idx}, rax); |
|
|
|
code.pinsrq(Xbyak::Xmm{value_idx}, rdx, 1); |
|
|
|
code.movq(Xbyak::Xmm(value_idx), rax); |
|
|
|
code.pinsrq(Xbyak::Xmm(value_idx), rdx, 1); |
|
|
|
} else { |
|
|
|
code.movq(Xbyak::Xmm{value_idx}, rax); |
|
|
|
code.movq(Xbyak::Xmm(value_idx), rax); |
|
|
|
code.movq(xmm0, rdx); |
|
|
|
code.punpcklqdq(Xbyak::Xmm{value_idx}, xmm0); |
|
|
|
code.punpcklqdq(Xbyak::Xmm(value_idx), xmm0); |
|
|
|
} |
|
|
|
break; |
|
|
|
default: |
|
|
|
@ -249,19 +249,19 @@ const void* EmitReadMemoryMov(BlockOfCode& code, int value_idx, const Xbyak::Reg |
|
|
|
const void* fastmem_location = code.getCurr(); |
|
|
|
switch (bitsize) { |
|
|
|
case 8: |
|
|
|
code.movzx(Xbyak::Reg32{value_idx}, code.byte[addr]); |
|
|
|
code.movzx(Xbyak::Reg64(value_idx).cvt32(), code.byte[addr]); |
|
|
|
break; |
|
|
|
case 16: |
|
|
|
code.movzx(Xbyak::Reg32{value_idx}, word[addr]); |
|
|
|
code.movzx(Xbyak::Reg64(value_idx).cvt32(), word[addr]); |
|
|
|
break; |
|
|
|
case 32: |
|
|
|
code.mov(Xbyak::Reg32{value_idx}, dword[addr]); |
|
|
|
code.mov(Xbyak::Reg64(value_idx).cvt32(), dword[addr]); |
|
|
|
break; |
|
|
|
case 64: |
|
|
|
code.mov(Xbyak::Reg64{value_idx}, qword[addr]); |
|
|
|
code.mov(Xbyak::Reg64(value_idx), qword[addr]); |
|
|
|
break; |
|
|
|
case 128: |
|
|
|
code.movups(Xbyak::Xmm{value_idx}, xword[addr]); |
|
|
|
code.movups(Xbyak::Xmm(value_idx), xword[addr]); |
|
|
|
break; |
|
|
|
default: |
|
|
|
ASSERT_FALSE("Invalid bitsize"); |
|
|
|
@ -276,10 +276,10 @@ const void* EmitWriteMemoryMov(BlockOfCode& code, const Xbyak::RegExp& addr, int |
|
|
|
code.xor_(eax, eax); |
|
|
|
code.xor_(edx, edx); |
|
|
|
if (code.HasHostFeature(HostFeature::SSE41)) { |
|
|
|
code.movq(rbx, Xbyak::Xmm{value_idx}); |
|
|
|
code.pextrq(rcx, Xbyak::Xmm{value_idx}, 1); |
|
|
|
code.movq(rbx, Xbyak::Xmm(value_idx)); |
|
|
|
code.pextrq(rcx, Xbyak::Xmm(value_idx), 1); |
|
|
|
} else { |
|
|
|
code.movaps(xmm0, Xbyak::Xmm{value_idx}); |
|
|
|
code.movaps(xmm0, Xbyak::Xmm(value_idx)); |
|
|
|
code.movq(rbx, xmm0); |
|
|
|
code.punpckhqdq(xmm0, xmm0); |
|
|
|
code.movq(rcx, xmm0); |
|
|
|
@ -289,16 +289,16 @@ const void* EmitWriteMemoryMov(BlockOfCode& code, const Xbyak::RegExp& addr, int |
|
|
|
const void* fastmem_location = code.getCurr(); |
|
|
|
switch (bitsize) { |
|
|
|
case 8: |
|
|
|
code.xchg(code.byte[addr], Xbyak::Reg64{value_idx}.cvt8()); |
|
|
|
code.xchg(code.byte[addr], Xbyak::Reg64(value_idx).cvt8()); |
|
|
|
break; |
|
|
|
case 16: |
|
|
|
code.xchg(word[addr], Xbyak::Reg16{value_idx}); |
|
|
|
code.xchg(word[addr], Xbyak::Reg64(value_idx).cvt16()); |
|
|
|
break; |
|
|
|
case 32: |
|
|
|
code.xchg(dword[addr], Xbyak::Reg32{value_idx}); |
|
|
|
code.xchg(dword[addr], Xbyak::Reg64(value_idx).cvt32()); |
|
|
|
break; |
|
|
|
case 64: |
|
|
|
code.xchg(qword[addr], Xbyak::Reg64{value_idx}); |
|
|
|
code.xchg(qword[addr], Xbyak::Reg64(value_idx)); |
|
|
|
break; |
|
|
|
case 128: { |
|
|
|
Xbyak::Label loop; |
|
|
|
@ -317,19 +317,19 @@ const void* EmitWriteMemoryMov(BlockOfCode& code, const Xbyak::RegExp& addr, int |
|
|
|
const void* fastmem_location = code.getCurr(); |
|
|
|
switch (bitsize) { |
|
|
|
case 8: |
|
|
|
code.mov(code.byte[addr], Xbyak::Reg64{value_idx}.cvt8()); |
|
|
|
code.mov(code.byte[addr], Xbyak::Reg64(value_idx).cvt8()); |
|
|
|
break; |
|
|
|
case 16: |
|
|
|
code.mov(word[addr], Xbyak::Reg16{value_idx}); |
|
|
|
code.mov(word[addr], Xbyak::Reg64(value_idx).cvt16()); |
|
|
|
break; |
|
|
|
case 32: |
|
|
|
code.mov(dword[addr], Xbyak::Reg32{value_idx}); |
|
|
|
code.mov(dword[addr], Xbyak::Reg64(value_idx).cvt32()); |
|
|
|
break; |
|
|
|
case 64: |
|
|
|
code.mov(qword[addr], Xbyak::Reg64{value_idx}); |
|
|
|
code.mov(qword[addr], Xbyak::Reg64(value_idx)); |
|
|
|
break; |
|
|
|
case 128: |
|
|
|
code.movups(xword[addr], Xbyak::Xmm{value_idx}); |
|
|
|
code.movups(xword[addr], Xbyak::Xmm(value_idx)); |
|
|
|
break; |
|
|
|
default: |
|
|
|
ASSERT_FALSE("Invalid bitsize"); |
|
|
|
|