7 changed files with 722 additions and 10 deletions
-
2src/video_core/CMakeLists.txt
-
108src/video_core/command_processor.cpp
-
137src/video_core/pica.h
-
270src/video_core/vertex_shader.cpp
-
211src/video_core/vertex_shader.h
-
2src/video_core/video_core.vcxproj
-
2src/video_core/video_core.vcxproj.filters
@ -0,0 +1,270 @@ |
|||
// Copyright 2014 Citra Emulator Project
|
|||
// Licensed under GPLv2
|
|||
// Refer to the license.txt file included.
|
|||
|
|||
#include "pica.h"
|
|||
#include "vertex_shader.h"
|
|||
#include <core/mem_map.h>
|
|||
#include <common/file_util.h>
|
|||
|
|||
namespace Pica { |
|||
|
|||
namespace VertexShader { |
|||
|
|||
static struct { |
|||
Math::Vec4<float24> f[96]; |
|||
} shader_uniforms; |
|||
|
|||
|
|||
// TODO: Not sure where the shader binary and swizzle patterns are supposed to be loaded to!
|
|||
// For now, we just keep these local arrays around.
|
|||
static u32 shader_memory[1024]; |
|||
static u32 swizzle_data[1024]; |
|||
|
|||
void SubmitShaderMemoryChange(u32 addr, u32 value) |
|||
{ |
|||
shader_memory[addr] = value; |
|||
} |
|||
|
|||
void SubmitSwizzleDataChange(u32 addr, u32 value) |
|||
{ |
|||
swizzle_data[addr] = value; |
|||
} |
|||
|
|||
Math::Vec4<float24>& GetFloatUniform(u32 index) |
|||
{ |
|||
return shader_uniforms.f[index]; |
|||
} |
|||
|
|||
struct VertexShaderState { |
|||
u32* program_counter; |
|||
|
|||
const float24* input_register_table[16]; |
|||
float24* output_register_table[7*4]; |
|||
|
|||
Math::Vec4<float24> temporary_registers[16]; |
|||
bool status_registers[2]; |
|||
|
|||
enum { |
|||
INVALID_ADDRESS = 0xFFFFFFFF |
|||
}; |
|||
u32 call_stack[8]; // TODO: What is the maximal call stack depth?
|
|||
u32* call_stack_pointer; |
|||
}; |
|||
|
|||
static void ProcessShaderCode(VertexShaderState& state) { |
|||
while (true) { |
|||
bool increment_pc = true; |
|||
bool exit_loop = false; |
|||
const Instruction& instr = *(const Instruction*)state.program_counter; |
|||
|
|||
const float24* src1_ = (instr.common.src1 < 0x10) ? state.input_register_table[instr.common.src1] |
|||
: (instr.common.src1 < 0x20) ? &state.temporary_registers[instr.common.src1-0x10].x |
|||
: (instr.common.src1 < 0x80) ? &shader_uniforms.f[instr.common.src1-0x20].x |
|||
: nullptr; |
|||
const float24* src2_ = (instr.common.src2 < 0x10) ? state.input_register_table[instr.common.src2] |
|||
: &state.temporary_registers[instr.common.src2-0x10].x; |
|||
// TODO: Unsure about the limit values
|
|||
float24* dest = (instr.common.dest <= 0x1C) ? state.output_register_table[instr.common.dest] |
|||
: (instr.common.dest <= 0x3C) ? nullptr |
|||
: (instr.common.dest <= 0x7C) ? &state.temporary_registers[(instr.common.dest-0x40)/4][instr.common.dest%4] |
|||
: nullptr; |
|||
|
|||
const SwizzlePattern& swizzle = *(SwizzlePattern*)&swizzle_data[instr.common.operand_desc_id]; |
|||
|
|||
const float24 src1[4] = { |
|||
src1_[(int)swizzle.GetSelectorSrc1(0)], |
|||
src1_[(int)swizzle.GetSelectorSrc1(1)], |
|||
src1_[(int)swizzle.GetSelectorSrc1(2)], |
|||
src1_[(int)swizzle.GetSelectorSrc1(3)], |
|||
}; |
|||
const float24 src2[4] = { |
|||
src2_[(int)swizzle.GetSelectorSrc2(0)], |
|||
src2_[(int)swizzle.GetSelectorSrc2(1)], |
|||
src2_[(int)swizzle.GetSelectorSrc2(2)], |
|||
src2_[(int)swizzle.GetSelectorSrc2(3)], |
|||
}; |
|||
|
|||
switch (instr.opcode) { |
|||
case Instruction::OpCode::ADD: |
|||
{ |
|||
for (int i = 0; i < 4; ++i) { |
|||
if (!swizzle.DestComponentEnabled(i)) |
|||
continue; |
|||
|
|||
dest[i] = src1[i] + src2[i]; |
|||
} |
|||
|
|||
break; |
|||
} |
|||
|
|||
case Instruction::OpCode::MUL: |
|||
{ |
|||
for (int i = 0; i < 4; ++i) { |
|||
if (!swizzle.DestComponentEnabled(i)) |
|||
continue; |
|||
|
|||
dest[i] = src1[i] * src2[i]; |
|||
} |
|||
|
|||
break; |
|||
} |
|||
|
|||
case Instruction::OpCode::DP3: |
|||
case Instruction::OpCode::DP4: |
|||
{ |
|||
float24 dot = float24::FromFloat32(0.f); |
|||
int num_components = (instr.opcode == Instruction::OpCode::DP3) ? 3 : 4; |
|||
for (int i = 0; i < num_components; ++i) |
|||
dot = dot + src1[i] * src2[i]; |
|||
|
|||
for (int i = 0; i < num_components; ++i) { |
|||
if (!swizzle.DestComponentEnabled(i)) |
|||
continue; |
|||
|
|||
dest[i] = dot; |
|||
} |
|||
break; |
|||
} |
|||
|
|||
// Reciprocal
|
|||
case Instruction::OpCode::RCP: |
|||
{ |
|||
for (int i = 0; i < 4; ++i) { |
|||
if (!swizzle.DestComponentEnabled(i)) |
|||
continue; |
|||
|
|||
// TODO: Be stable against division by zero!
|
|||
// TODO: I think this might be wrong... we should only use one component here
|
|||
dest[i] = float24::FromFloat32(1.0 / src1[i].ToFloat32()); |
|||
} |
|||
|
|||
break; |
|||
} |
|||
|
|||
// Reciprocal Square Root
|
|||
case Instruction::OpCode::RSQ: |
|||
{ |
|||
for (int i = 0; i < 4; ++i) { |
|||
if (!swizzle.DestComponentEnabled(i)) |
|||
continue; |
|||
|
|||
// TODO: Be stable against division by zero!
|
|||
// TODO: I think this might be wrong... we should only use one component here
|
|||
dest[i] = float24::FromFloat32(1.0 / sqrt(src1[i].ToFloat32())); |
|||
} |
|||
|
|||
break; |
|||
} |
|||
|
|||
case Instruction::OpCode::MOV: |
|||
{ |
|||
for (int i = 0; i < 4; ++i) { |
|||
if (!swizzle.DestComponentEnabled(i)) |
|||
continue; |
|||
|
|||
dest[i] = src1[i]; |
|||
} |
|||
break; |
|||
} |
|||
|
|||
case Instruction::OpCode::RET: |
|||
if (*state.call_stack_pointer == VertexShaderState::INVALID_ADDRESS) { |
|||
exit_loop = true; |
|||
} else { |
|||
state.program_counter = &shader_memory[*state.call_stack_pointer--]; |
|||
*state.call_stack_pointer = VertexShaderState::INVALID_ADDRESS; |
|||
} |
|||
|
|||
break; |
|||
|
|||
case Instruction::OpCode::CALL: |
|||
increment_pc = false; |
|||
|
|||
_dbg_assert_(GPU, state.call_stack_pointer - state.call_stack < sizeof(state.call_stack)); |
|||
|
|||
*++state.call_stack_pointer = state.program_counter - shader_memory; |
|||
// TODO: Does this offset refer to the beginning of shader memory?
|
|||
state.program_counter = &shader_memory[instr.flow_control.offset_words]; |
|||
break; |
|||
|
|||
case Instruction::OpCode::FLS: |
|||
// TODO: Do whatever needs to be done here?
|
|||
break; |
|||
|
|||
default: |
|||
ERROR_LOG(GPU, "Unhandled instruction: 0x%02x (%s): 0x%08x", |
|||
(int)instr.opcode.Value(), instr.GetOpCodeName().c_str(), instr.hex); |
|||
break; |
|||
} |
|||
|
|||
if (increment_pc) |
|||
++state.program_counter; |
|||
|
|||
if (exit_loop) |
|||
break; |
|||
} |
|||
} |
|||
|
|||
OutputVertex RunShader(const InputVertex& input, int num_attributes) |
|||
{ |
|||
VertexShaderState state; |
|||
|
|||
const u32* main = &shader_memory[registers.vs_main_offset]; |
|||
state.program_counter = (u32*)main; |
|||
|
|||
// Setup input register table
|
|||
const auto& attribute_register_map = registers.vs_input_register_map; |
|||
float24 dummy_register; |
|||
std::fill(&state.input_register_table[0], &state.input_register_table[16], &dummy_register); |
|||
if(num_attributes > 0) state.input_register_table[attribute_register_map.attribute0_register] = &input.attr[0].x; |
|||
if(num_attributes > 1) state.input_register_table[attribute_register_map.attribute1_register] = &input.attr[1].x; |
|||
if(num_attributes > 2) state.input_register_table[attribute_register_map.attribute2_register] = &input.attr[2].x; |
|||
if(num_attributes > 3) state.input_register_table[attribute_register_map.attribute3_register] = &input.attr[3].x; |
|||
if(num_attributes > 4) state.input_register_table[attribute_register_map.attribute4_register] = &input.attr[4].x; |
|||
if(num_attributes > 5) state.input_register_table[attribute_register_map.attribute5_register] = &input.attr[5].x; |
|||
if(num_attributes > 6) state.input_register_table[attribute_register_map.attribute6_register] = &input.attr[6].x; |
|||
if(num_attributes > 7) state.input_register_table[attribute_register_map.attribute7_register] = &input.attr[7].x; |
|||
if(num_attributes > 8) state.input_register_table[attribute_register_map.attribute8_register] = &input.attr[8].x; |
|||
if(num_attributes > 9) state.input_register_table[attribute_register_map.attribute9_register] = &input.attr[9].x; |
|||
if(num_attributes > 10) state.input_register_table[attribute_register_map.attribute10_register] = &input.attr[10].x; |
|||
if(num_attributes > 11) state.input_register_table[attribute_register_map.attribute11_register] = &input.attr[11].x; |
|||
if(num_attributes > 12) state.input_register_table[attribute_register_map.attribute12_register] = &input.attr[12].x; |
|||
if(num_attributes > 13) state.input_register_table[attribute_register_map.attribute13_register] = &input.attr[13].x; |
|||
if(num_attributes > 14) state.input_register_table[attribute_register_map.attribute14_register] = &input.attr[14].x; |
|||
if(num_attributes > 15) state.input_register_table[attribute_register_map.attribute15_register] = &input.attr[15].x; |
|||
|
|||
// Setup output register table
|
|||
OutputVertex ret; |
|||
for (int i = 0; i < 7; ++i) { |
|||
const auto& output_register_map = registers.vs_output_attributes[i]; |
|||
|
|||
u32 semantics[4] = { |
|||
output_register_map.map_x, output_register_map.map_y, |
|||
output_register_map.map_z, output_register_map.map_w |
|||
}; |
|||
|
|||
for (int comp = 0; comp < 4; ++comp) |
|||
state.output_register_table[4*i+comp] = ((float24*)&ret) + semantics[comp]; |
|||
} |
|||
|
|||
state.status_registers[0] = false; |
|||
state.status_registers[1] = false; |
|||
std::fill(state.call_stack, state.call_stack + sizeof(state.call_stack) / sizeof(state.call_stack[0]), |
|||
VertexShaderState::INVALID_ADDRESS); |
|||
state.call_stack_pointer = &state.call_stack[0]; |
|||
|
|||
ProcessShaderCode(state); |
|||
|
|||
DEBUG_LOG(GPU, "Output vertex: pos (%.2f, %.2f, %.2f, %.2f), col(%.2f, %.2f, %.2f, %.2f), tc0(%.2f, %.2f)", |
|||
ret.pos.x.ToFloat32(), ret.pos.y.ToFloat32(), ret.pos.z.ToFloat32(), ret.pos.w.ToFloat32(), |
|||
ret.color.x.ToFloat32(), ret.color.y.ToFloat32(), ret.color.z.ToFloat32(), ret.color.w.ToFloat32(), |
|||
ret.tc0.u().ToFloat32(), ret.tc0.v().ToFloat32()); |
|||
|
|||
return ret; |
|||
} |
|||
|
|||
|
|||
} // namespace
|
|||
|
|||
} // namespace
|
|||
@ -0,0 +1,211 @@ |
|||
// Copyright 2014 Citra Emulator Project |
|||
// Licensed under GPLv2 |
|||
// Refer to the license.txt file included. |
|||
|
|||
#pragma once |
|||
|
|||
#include <initializer_list> |
|||
|
|||
#include <common/common_types.h> |
|||
|
|||
#include "math.h" |
|||
#include "pica.h" |
|||
|
|||
namespace Pica { |
|||
|
|||
namespace VertexShader { |
|||
|
|||
struct InputVertex { |
|||
Math::Vec4<float24> attr[16]; |
|||
}; |
|||
|
|||
struct OutputVertex { |
|||
OutputVertex() = default; |
|||
|
|||
// VS output attributes |
|||
Math::Vec4<float24> pos; |
|||
Math::Vec4<float24> dummy; // quaternions (not implemented, yet) |
|||
Math::Vec4<float24> color; |
|||
Math::Vec2<float24> tc0; |
|||
float24 tc0_v; |
|||
|
|||
// Padding for optimal alignment |
|||
float24 pad[14]; |
|||
|
|||
// Attributes used to store intermediate results |
|||
|
|||
// position after perspective divide |
|||
Math::Vec3<float24> screenpos; |
|||
|
|||
// Linear interpolation |
|||
// factor: 0=this, 1=vtx |
|||
void Lerp(float24 factor, const OutputVertex& vtx) { |
|||
pos = pos * factor + vtx.pos * (float24::FromFloat32(1) - factor); |
|||
|
|||
// TODO: Should perform perspective correct interpolation here... |
|||
tc0 = tc0 * factor + vtx.tc0 * (float24::FromFloat32(1) - factor); |
|||
|
|||
screenpos = screenpos * factor + vtx.screenpos * (float24::FromFloat32(1) - factor); |
|||
|
|||
color = color * factor + vtx.color * (float24::FromFloat32(1) - factor); |
|||
} |
|||
|
|||
// Linear interpolation |
|||
// factor: 0=v0, 1=v1 |
|||
static OutputVertex Lerp(float24 factor, const OutputVertex& v0, const OutputVertex& v1) { |
|||
OutputVertex ret = v0; |
|||
ret.Lerp(factor, v1); |
|||
return ret; |
|||
} |
|||
}; |
|||
static_assert(std::is_pod<OutputVertex>::value, "Structure is not POD"); |
|||
|
|||
union Instruction { |
|||
enum class OpCode : u32 { |
|||
ADD = 0x0, |
|||
DP3 = 0x1, |
|||
DP4 = 0x2, |
|||
|
|||
MUL = 0x8, |
|||
|
|||
MAX = 0xC, |
|||
MIN = 0xD, |
|||
RCP = 0xE, |
|||
RSQ = 0xF, |
|||
|
|||
MOV = 0x13, |
|||
|
|||
RET = 0x21, |
|||
FLS = 0x22, // Flush |
|||
CALL = 0x24, |
|||
}; |
|||
|
|||
std::string GetOpCodeName() const { |
|||
std::map<OpCode, std::string> map = { |
|||
{ OpCode::ADD, "ADD" }, |
|||
{ OpCode::DP3, "DP3" }, |
|||
{ OpCode::DP4, "DP4" }, |
|||
{ OpCode::MUL, "MUL" }, |
|||
{ OpCode::MAX, "MAX" }, |
|||
{ OpCode::MIN, "MIN" }, |
|||
{ OpCode::RCP, "RCP" }, |
|||
{ OpCode::RSQ, "RSQ" }, |
|||
{ OpCode::MOV, "MOV" }, |
|||
{ OpCode::RET, "RET" }, |
|||
{ OpCode::FLS, "FLS" }, |
|||
}; |
|||
auto it = map.find(opcode); |
|||
if (it == map.end()) |
|||
return "UNK"; |
|||
else |
|||
return it->second; |
|||
} |
|||
|
|||
u32 hex; |
|||
|
|||
BitField<0x1a, 0x6, OpCode> opcode; |
|||
|
|||
// General notes: |
|||
// |
|||
// When two input registers are used, one of them uses a 5-bit index while the other |
|||
// one uses a 7-bit index. This is because at most one floating point uniform may be used |
|||
// as an input. |
|||
|
|||
|
|||
// Format used e.g. by arithmetic instructions and comparisons |
|||
// "src1" and "src2" specify register indices (i.e. indices referring to groups of 4 floats), |
|||
// while "dest" addresses individual floats. |
|||
union { |
|||
BitField<0x00, 0x5, u32> operand_desc_id; |
|||
BitField<0x07, 0x5, u32> src2; |
|||
BitField<0x0c, 0x7, u32> src1; |
|||
BitField<0x13, 0x7, u32> dest; |
|||
} common; |
|||
|
|||
// Format used for flow control instructions ("if") |
|||
union { |
|||
BitField<0x00, 0x8, u32> num_instructions; |
|||
BitField<0x0a, 0xc, u32> offset_words; |
|||
} flow_control; |
|||
}; |
|||
|
|||
union SwizzlePattern { |
|||
u32 hex; |
|||
|
|||
enum class Selector : u32 { |
|||
x = 0, |
|||
y = 1, |
|||
z = 2, |
|||
w = 3 |
|||
}; |
|||
|
|||
Selector GetSelectorSrc1(int comp) const { |
|||
Selector selectors[] = { |
|||
src1_selector_0, src1_selector_1, src1_selector_2, src1_selector_3 |
|||
}; |
|||
return selectors[comp]; |
|||
} |
|||
|
|||
Selector GetSelectorSrc2(int comp) const { |
|||
Selector selectors[] = { |
|||
src2_selector_0, src2_selector_1, src2_selector_2, src2_selector_3 |
|||
}; |
|||
return selectors[comp]; |
|||
} |
|||
|
|||
bool DestComponentEnabled(int i) const { |
|||
return (dest_mask & (0x8 >> i)); |
|||
} |
|||
|
|||
std::string SelectorToString(bool src2) const { |
|||
std::map<Selector, std::string> map = { |
|||
{ Selector::x, "x" }, |
|||
{ Selector::y, "y" }, |
|||
{ Selector::z, "z" }, |
|||
{ Selector::w, "w" } |
|||
}; |
|||
std::string ret; |
|||
for (int i = 0; i < 4; ++i) { |
|||
ret += map.at(src2 ? GetSelectorSrc2(i) : GetSelectorSrc1(i)); |
|||
} |
|||
return ret; |
|||
} |
|||
|
|||
std::string DestMaskToString() const { |
|||
std::string ret; |
|||
for (int i = 0; i < 4; ++i) { |
|||
if (!DestComponentEnabled(i)) |
|||
ret += "_"; |
|||
else |
|||
ret += "xyzw"[i]; |
|||
} |
|||
return ret; |
|||
} |
|||
|
|||
// Components of "dest" that should be written to: LSB=dest.w, MSB=dest.x |
|||
BitField< 0, 4, u32> dest_mask; |
|||
|
|||
BitField< 5, 2, Selector> src1_selector_3; |
|||
BitField< 7, 2, Selector> src1_selector_2; |
|||
BitField< 9, 2, Selector> src1_selector_1; |
|||
BitField<11, 2, Selector> src1_selector_0; |
|||
|
|||
BitField<14, 2, Selector> src2_selector_3; |
|||
BitField<16, 2, Selector> src2_selector_2; |
|||
BitField<18, 2, Selector> src2_selector_1; |
|||
BitField<20, 2, Selector> src2_selector_0; |
|||
|
|||
BitField<31, 1, u32> flag; // not sure what this means, maybe it's the sign? |
|||
}; |
|||
|
|||
void SubmitShaderMemoryChange(u32 addr, u32 value); |
|||
void SubmitSwizzleDataChange(u32 addr, u32 value); |
|||
|
|||
OutputVertex RunShader(const InputVertex& input, int num_attributes); |
|||
|
|||
Math::Vec4<float24>& GetFloatUniform(u32 index); |
|||
|
|||
} // namespace |
|||
|
|||
} // namespace |
|||
|
|||
Write
Preview
Loading…
Cancel
Save
Reference in new issue