|
|
|
@ -115,6 +115,8 @@ static const X64Reg SRC1 = XMM1; |
|
|
|
static const X64Reg SRC2 = XMM2; |
|
|
|
/// Loaded with the third swizzled source register, otherwise can be used as a scratch register
|
|
|
|
static const X64Reg SRC3 = XMM3; |
|
|
|
/// Additional scratch register
|
|
|
|
static const X64Reg SCRATCH2 = XMM4; |
|
|
|
/// Constant vector of [1.0f, 1.0f, 1.0f, 1.0f], used to efficiently set a vector to one
|
|
|
|
static const X64Reg ONE = XMM14; |
|
|
|
/// Constant vector of [-0.f, -0.f, -0.f, -0.f], used to efficiently negate a vector with XOR
|
|
|
|
@ -227,8 +229,8 @@ void JitCompiler::Compile_DestEnable(Instruction instr,X64Reg src) { |
|
|
|
u8 mask = ((swiz.dest_mask & 1) << 3) | ((swiz.dest_mask & 8) >> 3) | ((swiz.dest_mask & 2) << 1) | ((swiz.dest_mask & 4) >> 1); |
|
|
|
BLENDPS(SCRATCH, R(src), mask); |
|
|
|
} else { |
|
|
|
MOVAPS(XMM4, R(src)); |
|
|
|
UNPCKHPS(XMM4, R(SCRATCH)); // Unpack X/Y components of source and destination
|
|
|
|
MOVAPS(SCRATCH2, R(src)); |
|
|
|
UNPCKHPS(SCRATCH2, R(SCRATCH)); // Unpack X/Y components of source and destination
|
|
|
|
UNPCKLPS(SCRATCH, R(src)); // Unpack Z/W components of source and destination
|
|
|
|
|
|
|
|
// Compute selector to selectively copy source components to destination for SHUFPS instruction
|
|
|
|
@ -236,7 +238,7 @@ void JitCompiler::Compile_DestEnable(Instruction instr,X64Reg src) { |
|
|
|
((swiz.DestComponentEnabled(1) ? 3 : 2) << 2) | |
|
|
|
((swiz.DestComponentEnabled(2) ? 0 : 1) << 4) | |
|
|
|
((swiz.DestComponentEnabled(3) ? 2 : 3) << 6); |
|
|
|
SHUFPS(SCRATCH, R(XMM4), sel); |
|
|
|
SHUFPS(SCRATCH, R(SCRATCH2), sel); |
|
|
|
} |
|
|
|
|
|
|
|
// Store dest back to memory
|
|
|
|
|