|
|
@ -14,22 +14,16 @@ MICROPROFILE_DEFINE(MacroJitCompile, "GPU", "Compile macro JIT", MP_RGB(173, 255 |
|
|
MICROPROFILE_DEFINE(MacroJitExecute, "GPU", "Execute macro JIT", MP_RGB(255, 255, 0)); |
|
|
MICROPROFILE_DEFINE(MacroJitExecute, "GPU", "Execute macro JIT", MP_RGB(255, 255, 0)); |
|
|
|
|
|
|
|
|
namespace Tegra { |
|
|
namespace Tegra { |
|
|
static const Xbyak::Reg64 PARAMETERS = Xbyak::util::r9; |
|
|
|
|
|
static const Xbyak::Reg64 REGISTERS = Xbyak::util::r10; |
|
|
|
|
|
static const Xbyak::Reg64 STATE = Xbyak::util::r11; |
|
|
|
|
|
static const Xbyak::Reg64 NEXT_PARAMETER = Xbyak::util::r12; |
|
|
|
|
|
static const Xbyak::Reg32 RESULT = Xbyak::util::r13d; |
|
|
|
|
|
static const Xbyak::Reg64 RESULT_64 = Xbyak::util::r13; |
|
|
|
|
|
|
|
|
static const Xbyak::Reg64 STATE = Xbyak::util::rbx; |
|
|
|
|
|
static const Xbyak::Reg32 RESULT = Xbyak::util::ebp; |
|
|
|
|
|
static const Xbyak::Reg64 PARAMETERS = Xbyak::util::r12; |
|
|
static const Xbyak::Reg32 METHOD_ADDRESS = Xbyak::util::r14d; |
|
|
static const Xbyak::Reg32 METHOD_ADDRESS = Xbyak::util::r14d; |
|
|
static const Xbyak::Reg64 METHOD_ADDRESS_64 = Xbyak::util::r14; |
|
|
|
|
|
static const Xbyak::Reg64 BRANCH_HOLDER = Xbyak::util::r15; |
|
|
static const Xbyak::Reg64 BRANCH_HOLDER = Xbyak::util::r15; |
|
|
|
|
|
|
|
|
static const std::bitset<32> PERSISTENT_REGISTERS = Common::X64::BuildRegSet({ |
|
|
static const std::bitset<32> PERSISTENT_REGISTERS = Common::X64::BuildRegSet({ |
|
|
PARAMETERS, |
|
|
|
|
|
REGISTERS, |
|
|
|
|
|
STATE, |
|
|
STATE, |
|
|
NEXT_PARAMETER, |
|
|
|
|
|
RESULT, |
|
|
RESULT, |
|
|
|
|
|
PARAMETERS, |
|
|
METHOD_ADDRESS, |
|
|
METHOD_ADDRESS, |
|
|
BRANCH_HOLDER, |
|
|
BRANCH_HOLDER, |
|
|
}); |
|
|
}); |
|
|
@ -53,8 +47,7 @@ void MacroJITx64Impl::Execute(const std::vector<u32>& parameters, u32 method) { |
|
|
JITState state{}; |
|
|
JITState state{}; |
|
|
state.maxwell3d = &maxwell3d; |
|
|
state.maxwell3d = &maxwell3d; |
|
|
state.registers = {}; |
|
|
state.registers = {}; |
|
|
state.parameters = parameters.data(); |
|
|
|
|
|
program(&state); |
|
|
|
|
|
|
|
|
program(&state, parameters.data()); |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
void MacroJITx64Impl::Compile_ALU(Macro::Opcode opcode) { |
|
|
void MacroJITx64Impl::Compile_ALU(Macro::Opcode opcode) { |
|
|
@ -64,18 +57,18 @@ void MacroJITx64Impl::Compile_ALU(Macro::Opcode opcode) { |
|
|
const bool is_move_operation = !is_a_zero && is_b_zero; |
|
|
const bool is_move_operation = !is_a_zero && is_b_zero; |
|
|
const bool has_zero_register = is_a_zero || is_b_zero; |
|
|
const bool has_zero_register = is_a_zero || is_b_zero; |
|
|
|
|
|
|
|
|
Xbyak::Reg64 src_a; |
|
|
|
|
|
|
|
|
Xbyak::Reg32 src_a; |
|
|
Xbyak::Reg32 src_b; |
|
|
Xbyak::Reg32 src_b; |
|
|
|
|
|
|
|
|
if (!optimizer.zero_reg_skip) { |
|
|
if (!optimizer.zero_reg_skip) { |
|
|
src_a = Compile_GetRegister(opcode.src_a, RESULT_64); |
|
|
|
|
|
src_b = Compile_GetRegister(opcode.src_b, ebx); |
|
|
|
|
|
|
|
|
src_a = Compile_GetRegister(opcode.src_a, RESULT); |
|
|
|
|
|
src_b = Compile_GetRegister(opcode.src_b, eax); |
|
|
} else { |
|
|
} else { |
|
|
if (!is_a_zero) { |
|
|
if (!is_a_zero) { |
|
|
src_a = Compile_GetRegister(opcode.src_a, RESULT_64); |
|
|
|
|
|
|
|
|
src_a = Compile_GetRegister(opcode.src_a, RESULT); |
|
|
} |
|
|
} |
|
|
if (!is_b_zero) { |
|
|
if (!is_b_zero) { |
|
|
src_b = Compile_GetRegister(opcode.src_b, ebx); |
|
|
|
|
|
|
|
|
src_b = Compile_GetRegister(opcode.src_b, eax); |
|
|
} |
|
|
} |
|
|
} |
|
|
} |
|
|
Xbyak::Label skip_carry{}; |
|
|
Xbyak::Label skip_carry{}; |
|
|
@ -329,7 +322,7 @@ void Tegra::MacroJITx64Impl::Compile_Send(Xbyak::Reg32 value) { |
|
|
and_(METHOD_ADDRESS, 0xfff); |
|
|
and_(METHOD_ADDRESS, 0xfff); |
|
|
shr(ecx, 12); |
|
|
shr(ecx, 12); |
|
|
and_(ecx, 0x3f); |
|
|
and_(ecx, 0x3f); |
|
|
lea(eax, ptr[rcx + METHOD_ADDRESS_64]); |
|
|
|
|
|
|
|
|
lea(eax, ptr[rcx + METHOD_ADDRESS.cvt64()]); |
|
|
sal(ecx, 12); |
|
|
sal(ecx, 12); |
|
|
or_(eax, ecx); |
|
|
or_(eax, ecx); |
|
|
|
|
|
|
|
|
@ -424,16 +417,12 @@ void MacroJITx64Impl::Compile() { |
|
|
Common::X64::ABI_PushRegistersAndAdjustStackGPS(*this, Common::X64::ABI_ALL_CALLEE_SAVED, 8); |
|
|
Common::X64::ABI_PushRegistersAndAdjustStackGPS(*this, Common::X64::ABI_ALL_CALLEE_SAVED, 8); |
|
|
// JIT state
|
|
|
// JIT state
|
|
|
mov(STATE, Common::X64::ABI_PARAM1); |
|
|
mov(STATE, Common::X64::ABI_PARAM1); |
|
|
mov(PARAMETERS, qword[Common::X64::ABI_PARAM1 + |
|
|
|
|
|
static_cast<Xbyak::uint32>(offsetof(JITState, parameters))]); |
|
|
|
|
|
mov(REGISTERS, Common::X64::ABI_PARAM1); |
|
|
|
|
|
add(REGISTERS, static_cast<Xbyak::uint32>(offsetof(JITState, registers))); |
|
|
|
|
|
|
|
|
mov(PARAMETERS, Common::X64::ABI_PARAM2); |
|
|
xor_(RESULT, RESULT); |
|
|
xor_(RESULT, RESULT); |
|
|
xor_(METHOD_ADDRESS, METHOD_ADDRESS); |
|
|
xor_(METHOD_ADDRESS, METHOD_ADDRESS); |
|
|
xor_(NEXT_PARAMETER, NEXT_PARAMETER); |
|
|
|
|
|
xor_(BRANCH_HOLDER, BRANCH_HOLDER); |
|
|
xor_(BRANCH_HOLDER, BRANCH_HOLDER); |
|
|
|
|
|
|
|
|
mov(dword[REGISTERS + 4], Compile_FetchParameter()); |
|
|
|
|
|
|
|
|
mov(dword[STATE + offsetof(JITState, registers) + 4], Compile_FetchParameter()); |
|
|
|
|
|
|
|
|
// Track get register for zero registers and mark it as no-op
|
|
|
// Track get register for zero registers and mark it as no-op
|
|
|
optimizer.zero_reg_skip = true; |
|
|
optimizer.zero_reg_skip = true; |
|
|
@ -537,8 +526,8 @@ bool MacroJITx64Impl::Compile_NextInstruction() { |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
Xbyak::Reg32 Tegra::MacroJITx64Impl::Compile_FetchParameter() { |
|
|
Xbyak::Reg32 Tegra::MacroJITx64Impl::Compile_FetchParameter() { |
|
|
mov(eax, dword[PARAMETERS + NEXT_PARAMETER * sizeof(u32)]); |
|
|
|
|
|
inc(NEXT_PARAMETER); |
|
|
|
|
|
|
|
|
mov(eax, dword[PARAMETERS]); |
|
|
|
|
|
add(PARAMETERS, sizeof(u32)); |
|
|
return eax; |
|
|
return eax; |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
@ -547,31 +536,12 @@ Xbyak::Reg32 MacroJITx64Impl::Compile_GetRegister(u32 index, Xbyak::Reg32 dst) { |
|
|
// Register 0 is always zero
|
|
|
// Register 0 is always zero
|
|
|
xor_(dst, dst); |
|
|
xor_(dst, dst); |
|
|
} else { |
|
|
} else { |
|
|
mov(dst, dword[REGISTERS + index * sizeof(u32)]); |
|
|
|
|
|
|
|
|
mov(dst, dword[STATE + offsetof(JITState, registers) + index * sizeof(u32)]); |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
return dst; |
|
|
return dst; |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
Xbyak::Reg64 Tegra::MacroJITx64Impl::Compile_GetRegister(u32 index, Xbyak::Reg64 dst) { |
|
|
|
|
|
if (index == 0) { |
|
|
|
|
|
// Register 0 is always zero
|
|
|
|
|
|
xor_(dst, dst); |
|
|
|
|
|
} else { |
|
|
|
|
|
mov(dst, dword[REGISTERS + index * sizeof(u32)]); |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
return dst; |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
void Tegra::MacroJITx64Impl::Compile_WriteCarry(Xbyak::Reg64 dst) { |
|
|
|
|
|
Xbyak::Label zero{}, end{}; |
|
|
|
|
|
xor_(ecx, ecx); |
|
|
|
|
|
shr(dst, 32); |
|
|
|
|
|
setne(cl); |
|
|
|
|
|
mov(dword[STATE + offsetof(JITState, carry_flag)], ecx); |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
void MacroJITx64Impl::Compile_ProcessResult(Macro::ResultOperation operation, u32 reg) { |
|
|
void MacroJITx64Impl::Compile_ProcessResult(Macro::ResultOperation operation, u32 reg) { |
|
|
auto SetRegister = [=](u32 reg, Xbyak::Reg32 result) { |
|
|
auto SetRegister = [=](u32 reg, Xbyak::Reg32 result) { |
|
|
// Register 0 is supposed to always return 0. NOP is implemented as a store to the zero
|
|
|
// Register 0 is supposed to always return 0. NOP is implemented as a store to the zero
|
|
|
@ -579,7 +549,7 @@ void MacroJITx64Impl::Compile_ProcessResult(Macro::ResultOperation operation, u3 |
|
|
if (reg == 0) { |
|
|
if (reg == 0) { |
|
|
return; |
|
|
return; |
|
|
} |
|
|
} |
|
|
mov(dword[REGISTERS + reg * sizeof(u32)], result); |
|
|
|
|
|
|
|
|
mov(dword[STATE + offsetof(JITState, registers) + reg * sizeof(u32)], result); |
|
|
}; |
|
|
}; |
|
|
auto SetMethodAddress = [=](Xbyak::Reg32 reg) { mov(METHOD_ADDRESS, reg); }; |
|
|
auto SetMethodAddress = [=](Xbyak::Reg32 reg) { mov(METHOD_ADDRESS, reg); }; |
|
|
|
|
|
|
|
|
|