|
|
|
@ -59,6 +59,8 @@ const std::array<u32, 1024>& GetSwizzlePatterns() |
|
|
|
return swizzle_data; |
|
|
|
} |
|
|
|
|
|
|
|
// TODO: Is there actually a limit on hardware?
|
|
|
|
const int if_stack_size = 8; |
|
|
|
|
|
|
|
struct VertexShaderState { |
|
|
|
u32* program_counter; |
|
|
|
@ -67,7 +69,11 @@ struct VertexShaderState { |
|
|
|
float24* output_register_table[7*4]; |
|
|
|
|
|
|
|
Math::Vec4<float24> temporary_registers[16]; |
|
|
|
bool status_registers[2]; |
|
|
|
bool conditional_code[2]; |
|
|
|
|
|
|
|
// Two Address registers and one loop counter
|
|
|
|
// TODO: How many bits do these actually have?
|
|
|
|
s32 address_registers[3]; |
|
|
|
|
|
|
|
enum { |
|
|
|
INVALID_ADDRESS = 0xFFFFFFFF |
|
|
|
@ -75,6 +81,12 @@ struct VertexShaderState { |
|
|
|
u32 call_stack[8]; // TODO: What is the maximal call stack depth?
|
|
|
|
u32* call_stack_pointer; |
|
|
|
|
|
|
|
struct IfStackElement { |
|
|
|
u32 else_addr; |
|
|
|
u32 else_instructions; |
|
|
|
} if_stack[if_stack_size]; |
|
|
|
IfStackElement* if_stack_pointer; |
|
|
|
|
|
|
|
struct { |
|
|
|
u32 max_offset; // maximum program counter ever reached
|
|
|
|
u32 max_opdesc_id; // maximum swizzle pattern index ever used
|
|
|
|
@ -107,11 +119,20 @@ static void ProcessShaderCode(VertexShaderState& state) { |
|
|
|
case Instruction::OpCodeType::Arithmetic: |
|
|
|
{ |
|
|
|
bool is_inverted = 0 != (instr.opcode.GetInfo().subtype & Instruction::OpCodeInfo::SrcInversed); |
|
|
|
const float24* src1_ = LookupSourceRegister(instr.common.GetSrc1(is_inverted)); |
|
|
|
if (is_inverted) { |
|
|
|
// We don't really support this properly and/or reliably
|
|
|
|
LOG_ERROR(HW_GPU, "Bad condition..."); |
|
|
|
exit(0); |
|
|
|
} |
|
|
|
|
|
|
|
const int address_offset = (instr.common.address_register_index == 0) |
|
|
|
? 0 : state.address_registers[instr.common.address_register_index - 1]; |
|
|
|
|
|
|
|
const float24* src1_ = LookupSourceRegister(instr.common.GetSrc1(is_inverted) + address_offset); |
|
|
|
const float24* src2_ = LookupSourceRegister(instr.common.GetSrc2(is_inverted)); |
|
|
|
|
|
|
|
const bool negate_src1 = (swizzle.negate_src1 != 0); |
|
|
|
const bool negate_src2 = (swizzle.negate_src2 != 0); |
|
|
|
const bool negate_src1 = (swizzle.negate_src1 != false); |
|
|
|
const bool negate_src2 = (swizzle.negate_src2 != false); |
|
|
|
|
|
|
|
float24 src1[4] = { |
|
|
|
src1_[(int)swizzle.GetSelectorSrc1(0)], |
|
|
|
@ -217,6 +238,19 @@ static void ProcessShaderCode(VertexShaderState& state) { |
|
|
|
break; |
|
|
|
} |
|
|
|
|
|
|
|
case Instruction::OpCode::MOVA: |
|
|
|
{ |
|
|
|
for (int i = 0; i < 2; ++i) { |
|
|
|
if (!swizzle.DestComponentEnabled(i)) |
|
|
|
continue; |
|
|
|
|
|
|
|
// TODO: Figure out how the rounding is done on hardware
|
|
|
|
state.address_registers[i] = static_cast<s32>(src1[i].ToFloat32()); |
|
|
|
} |
|
|
|
|
|
|
|
break; |
|
|
|
} |
|
|
|
|
|
|
|
case Instruction::OpCode::MOV: |
|
|
|
{ |
|
|
|
for (int i = 0; i < 4; ++i) { |
|
|
|
@ -228,16 +262,56 @@ static void ProcessShaderCode(VertexShaderState& state) { |
|
|
|
break; |
|
|
|
} |
|
|
|
|
|
|
|
case Instruction::OpCode::CMP: |
|
|
|
for (int i = 0; i < 2; ++i) { |
|
|
|
// TODO: Can you restrict to one compare via dest masking?
|
|
|
|
|
|
|
|
auto compare_op = instr.common.compare_op; |
|
|
|
auto op = (i == 0) ? compare_op.x.Value() : compare_op.y.Value(); |
|
|
|
|
|
|
|
switch (op) { |
|
|
|
case compare_op.Equal: |
|
|
|
state.conditional_code[i] = (src1[i] == src2[i]); |
|
|
|
break; |
|
|
|
|
|
|
|
case compare_op.NotEqual: |
|
|
|
state.conditional_code[i] = (src1[i] != src2[i]); |
|
|
|
break; |
|
|
|
|
|
|
|
case compare_op.LessThan: |
|
|
|
state.conditional_code[i] = (src1[i] < src2[i]); |
|
|
|
break; |
|
|
|
|
|
|
|
case compare_op.LessEqual: |
|
|
|
state.conditional_code[i] = (src1[i] <= src2[i]); |
|
|
|
break; |
|
|
|
|
|
|
|
case compare_op.GreaterThan: |
|
|
|
state.conditional_code[i] = (src1[i] > src2[i]); |
|
|
|
break; |
|
|
|
|
|
|
|
case compare_op.GreaterEqual: |
|
|
|
state.conditional_code[i] = (src1[i] >= src2[i]); |
|
|
|
break; |
|
|
|
|
|
|
|
default: |
|
|
|
LOG_ERROR(HW_GPU, "Unknown compare mode %x", static_cast<int>(op)); |
|
|
|
break; |
|
|
|
} |
|
|
|
} |
|
|
|
break; |
|
|
|
|
|
|
|
default: |
|
|
|
LOG_ERROR(HW_GPU, "Unhandled arithmetic instruction: 0x%02x (%s): 0x%08x", |
|
|
|
(int)instr.opcode.Value(), instr.opcode.GetInfo().name, instr.hex); |
|
|
|
_dbg_assert_(HW_GPU, 0); |
|
|
|
break; |
|
|
|
} |
|
|
|
|
|
|
|
break; |
|
|
|
} |
|
|
|
default: |
|
|
|
// Process instruction explicitly
|
|
|
|
// Handle each instruction on its own
|
|
|
|
switch (instr.opcode) { |
|
|
|
// NOP is currently used as a heuristic for leaving from a function.
|
|
|
|
// TODO: This is completely incorrect.
|
|
|
|
@ -265,6 +339,44 @@ static void ProcessShaderCode(VertexShaderState& state) { |
|
|
|
// TODO
|
|
|
|
break; |
|
|
|
|
|
|
|
case Instruction::OpCode::IFC: |
|
|
|
{ |
|
|
|
// TODO: Do we need to consider swizzlers here?
|
|
|
|
|
|
|
|
auto flow_control = instr.flow_control; |
|
|
|
bool results[3] = { flow_control.refx == state.conditional_code[0], |
|
|
|
flow_control.refy == state.conditional_code[1] }; |
|
|
|
|
|
|
|
switch (flow_control.op) { |
|
|
|
case flow_control.Or: |
|
|
|
results[2] = results[0] || results[1]; |
|
|
|
break; |
|
|
|
|
|
|
|
case flow_control.And: |
|
|
|
results[2] = results[0] && results[1]; |
|
|
|
break; |
|
|
|
|
|
|
|
case flow_control.JustX: |
|
|
|
results[2] = results[0]; |
|
|
|
break; |
|
|
|
|
|
|
|
case flow_control.JustY: |
|
|
|
results[2] = results[1]; |
|
|
|
break; |
|
|
|
} |
|
|
|
|
|
|
|
if (results[2]) { |
|
|
|
++state.if_stack_pointer; |
|
|
|
|
|
|
|
state.if_stack_pointer->else_addr = instr.flow_control.dest_offset; |
|
|
|
state.if_stack_pointer->else_instructions = instr.flow_control.num_instructions; |
|
|
|
} else { |
|
|
|
state.program_counter = &shader_memory[instr.flow_control.dest_offset] - 1; |
|
|
|
} |
|
|
|
|
|
|
|
break; |
|
|
|
} |
|
|
|
|
|
|
|
default: |
|
|
|
LOG_ERROR(HW_GPU, "Unhandled instruction: 0x%02x (%s): 0x%08x", |
|
|
|
(int)instr.opcode.Value(), instr.opcode.GetInfo().name, instr.hex); |
|
|
|
@ -277,6 +389,13 @@ static void ProcessShaderCode(VertexShaderState& state) { |
|
|
|
if (increment_pc) |
|
|
|
++state.program_counter; |
|
|
|
|
|
|
|
if (state.if_stack_pointer >= &state.if_stack[0]) { |
|
|
|
if (state.program_counter - shader_memory.data() == state.if_stack_pointer->else_addr) { |
|
|
|
state.program_counter += state.if_stack_pointer->else_instructions; |
|
|
|
state.if_stack_pointer--; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
if (exit_loop) |
|
|
|
break; |
|
|
|
} |
|
|
|
@ -326,11 +445,15 @@ OutputVertex RunShader(const InputVertex& input, int num_attributes) |
|
|
|
state.output_register_table[4*i+comp] = ((float24*)&ret) + semantics[comp]; |
|
|
|
} |
|
|
|
|
|
|
|
state.status_registers[0] = false; |
|
|
|
state.status_registers[1] = false; |
|
|
|
state.conditional_code[0] = false; |
|
|
|
state.conditional_code[1] = false; |
|
|
|
boost::fill(state.call_stack, VertexShaderState::INVALID_ADDRESS); |
|
|
|
state.call_stack_pointer = &state.call_stack[0]; |
|
|
|
|
|
|
|
std::fill(state.if_stack, state.if_stack + sizeof(state.if_stack) / sizeof(state.if_stack[0]), |
|
|
|
VertexShaderState::IfStackElement{VertexShaderState::INVALID_ADDRESS, VertexShaderState::INVALID_ADDRESS}); |
|
|
|
state.if_stack_pointer = state.if_stack - 1; // Meh. TODO: Make this less ugly
|
|
|
|
|
|
|
|
ProcessShaderCode(state); |
|
|
|
DebugUtils::DumpShader(shader_memory.data(), state.debug.max_offset, swizzle_data.data(), |
|
|
|
state.debug.max_opdesc_id, registers.vs_main_offset, |
|
|
|
|