7 changed files with 289 additions and 186 deletions
-
2src/video_core/CMakeLists.txt
-
5src/video_core/command_processor.cpp
-
6src/video_core/pica.h
-
105src/video_core/shader/shader.cpp
-
163src/video_core/shader/shader.h
-
135src/video_core/shader/shader_interpreter.cpp
-
59src/video_core/shader/shader_interpreter.h
@ -0,0 +1,105 @@ |
|||||
|
// Copyright 2015 Citra Emulator Project
|
||||
|
// Licensed under GPLv2 or any later version
|
||||
|
// Refer to the license.txt file included.
|
||||
|
|
||||
|
#include "common/logging/log.h"
|
||||
|
#include "common/profiler.h"
|
||||
|
|
||||
|
#include "video_core/debug_utils/debug_utils.h"
|
||||
|
#include "video_core/pica.h"
|
||||
|
|
||||
|
#include "shader.h"
|
||||
|
#include "shader_interpreter.h"
|
||||
|
|
||||
|
namespace Pica { |
||||
|
|
||||
|
namespace Shader { |
||||
|
|
||||
|
void Setup(UnitState& state) { |
||||
|
// TODO(bunnei): This will be used by the JIT in a subsequent commit
|
||||
|
} |
||||
|
|
||||
|
static Common::Profiling::TimingCategory shader_category("Vertex Shader"); |
||||
|
|
||||
|
OutputVertex Run(UnitState& state, const InputVertex& input, int num_attributes) { |
||||
|
auto& config = g_state.regs.vs; |
||||
|
auto& setup = g_state.vs; |
||||
|
|
||||
|
Common::Profiling::ScopeTimer timer(shader_category); |
||||
|
|
||||
|
state.program_counter = config.main_offset; |
||||
|
state.debug.max_offset = 0; |
||||
|
state.debug.max_opdesc_id = 0; |
||||
|
|
||||
|
// Setup input register table
|
||||
|
const auto& attribute_register_map = config.input_register_map; |
||||
|
|
||||
|
if (num_attributes > 0) state.input_registers[attribute_register_map.attribute0_register] = input.attr[0]; |
||||
|
if (num_attributes > 1) state.input_registers[attribute_register_map.attribute1_register] = input.attr[1]; |
||||
|
if (num_attributes > 2) state.input_registers[attribute_register_map.attribute2_register] = input.attr[2]; |
||||
|
if (num_attributes > 3) state.input_registers[attribute_register_map.attribute3_register] = input.attr[3]; |
||||
|
if (num_attributes > 4) state.input_registers[attribute_register_map.attribute4_register] = input.attr[4]; |
||||
|
if (num_attributes > 5) state.input_registers[attribute_register_map.attribute5_register] = input.attr[5]; |
||||
|
if (num_attributes > 6) state.input_registers[attribute_register_map.attribute6_register] = input.attr[6]; |
||||
|
if (num_attributes > 7) state.input_registers[attribute_register_map.attribute7_register] = input.attr[7]; |
||||
|
if (num_attributes > 8) state.input_registers[attribute_register_map.attribute8_register] = input.attr[8]; |
||||
|
if (num_attributes > 9) state.input_registers[attribute_register_map.attribute9_register] = input.attr[9]; |
||||
|
if (num_attributes > 10) state.input_registers[attribute_register_map.attribute10_register] = input.attr[10]; |
||||
|
if (num_attributes > 11) state.input_registers[attribute_register_map.attribute11_register] = input.attr[11]; |
||||
|
if (num_attributes > 12) state.input_registers[attribute_register_map.attribute12_register] = input.attr[12]; |
||||
|
if (num_attributes > 13) state.input_registers[attribute_register_map.attribute13_register] = input.attr[13]; |
||||
|
if (num_attributes > 14) state.input_registers[attribute_register_map.attribute14_register] = input.attr[14]; |
||||
|
if (num_attributes > 15) state.input_registers[attribute_register_map.attribute15_register] = input.attr[15]; |
||||
|
|
||||
|
state.conditional_code[0] = false; |
||||
|
state.conditional_code[1] = false; |
||||
|
|
||||
|
RunInterpreter(state); |
||||
|
|
||||
|
#if PICA_DUMP_SHADERS
|
||||
|
DebugUtils::DumpShader(setup.program_code.data(), state.debug.max_offset, setup.swizzle_data.data(), |
||||
|
state.debug.max_opdesc_id, config.main_offset, |
||||
|
g_state.regs.vs_output_attributes); // TODO: Don't hardcode VS here
|
||||
|
#endif
|
||||
|
|
||||
|
// Setup output data
|
||||
|
OutputVertex ret; |
||||
|
// TODO(neobrain): Under some circumstances, up to 16 attributes may be output. We need to
|
||||
|
// figure out what those circumstances are and enable the remaining outputs then.
|
||||
|
for (int i = 0; i < 7; ++i) { |
||||
|
const auto& output_register_map = g_state.regs.vs_output_attributes[i]; // TODO: Don't hardcode VS here
|
||||
|
|
||||
|
u32 semantics[4] = { |
||||
|
output_register_map.map_x, output_register_map.map_y, |
||||
|
output_register_map.map_z, output_register_map.map_w |
||||
|
}; |
||||
|
|
||||
|
for (int comp = 0; comp < 4; ++comp) { |
||||
|
float24* out = ((float24*)&ret) + semantics[comp]; |
||||
|
if (semantics[comp] != Regs::VSOutputAttributes::INVALID) { |
||||
|
*out = state.output_registers[i][comp]; |
||||
|
} else { |
||||
|
// Zero output so that attributes which aren't output won't have denormals in them,
|
||||
|
// which would slow us down later.
|
||||
|
memset(out, 0, sizeof(*out)); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
// The hardware takes the absolute and saturates vertex colors like this, *before* doing interpolation
|
||||
|
for (int i = 0; i < 4; ++i) { |
||||
|
ret.color[i] = float24::FromFloat32( |
||||
|
std::fmin(std::fabs(ret.color[i].ToFloat32()), 1.0f)); |
||||
|
} |
||||
|
|
||||
|
LOG_TRACE(Render_Software, "Output vertex: pos (%.2f, %.2f, %.2f, %.2f), col(%.2f, %.2f, %.2f, %.2f), tc0(%.2f, %.2f)", |
||||
|
ret.pos.x.ToFloat32(), ret.pos.y.ToFloat32(), ret.pos.z.ToFloat32(), ret.pos.w.ToFloat32(), |
||||
|
ret.color.x.ToFloat32(), ret.color.y.ToFloat32(), ret.color.z.ToFloat32(), ret.color.w.ToFloat32(), |
||||
|
ret.tc0.u().ToFloat32(), ret.tc0.v().ToFloat32()); |
||||
|
|
||||
|
return ret; |
||||
|
} |
||||
|
|
||||
|
} // namespace Shader
|
||||
|
|
||||
|
} // namespace Pica
|
||||
@ -0,0 +1,163 @@ |
|||||
|
// Copyright 2015 Citra Emulator Project |
||||
|
// Licensed under GPLv2 or any later version |
||||
|
// Refer to the license.txt file included. |
||||
|
|
||||
|
#pragma once |
||||
|
|
||||
|
#include <boost/container/static_vector.hpp> |
||||
|
#include <nihstro/shader_binary.h> |
||||
|
|
||||
|
#include "common/common_funcs.h" |
||||
|
#include "common/common_types.h" |
||||
|
#include "common/vector_math.h" |
||||
|
|
||||
|
#include "video_core/pica.h" |
||||
|
|
||||
|
using nihstro::RegisterType; |
||||
|
using nihstro::SourceRegister; |
||||
|
using nihstro::DestRegister; |
||||
|
|
||||
|
namespace Pica { |
||||
|
|
||||
|
namespace Shader { |
||||
|
|
||||
|
struct InputVertex { |
||||
|
Math::Vec4<float24> attr[16]; |
||||
|
}; |
||||
|
|
||||
|
struct OutputVertex { |
||||
|
OutputVertex() = default; |
||||
|
|
||||
|
// VS output attributes |
||||
|
Math::Vec4<float24> pos; |
||||
|
Math::Vec4<float24> dummy; // quaternions (not implemented, yet) |
||||
|
Math::Vec4<float24> color; |
||||
|
Math::Vec2<float24> tc0; |
||||
|
Math::Vec2<float24> tc1; |
||||
|
float24 pad[6]; |
||||
|
Math::Vec2<float24> tc2; |
||||
|
|
||||
|
// Padding for optimal alignment |
||||
|
float24 pad2[4]; |
||||
|
|
||||
|
// Attributes used to store intermediate results |
||||
|
|
||||
|
// position after perspective divide |
||||
|
Math::Vec3<float24> screenpos; |
||||
|
float24 pad3; |
||||
|
|
||||
|
// Linear interpolation |
||||
|
// factor: 0=this, 1=vtx |
||||
|
void Lerp(float24 factor, const OutputVertex& vtx) { |
||||
|
pos = pos * factor + vtx.pos * (float24::FromFloat32(1) - factor); |
||||
|
|
||||
|
// TODO: Should perform perspective correct interpolation here... |
||||
|
tc0 = tc0 * factor + vtx.tc0 * (float24::FromFloat32(1) - factor); |
||||
|
tc1 = tc1 * factor + vtx.tc1 * (float24::FromFloat32(1) - factor); |
||||
|
tc2 = tc2 * factor + vtx.tc2 * (float24::FromFloat32(1) - factor); |
||||
|
|
||||
|
screenpos = screenpos * factor + vtx.screenpos * (float24::FromFloat32(1) - factor); |
||||
|
|
||||
|
color = color * factor + vtx.color * (float24::FromFloat32(1) - factor); |
||||
|
} |
||||
|
|
||||
|
// Linear interpolation |
||||
|
// factor: 0=v0, 1=v1 |
||||
|
static OutputVertex Lerp(float24 factor, const OutputVertex& v0, const OutputVertex& v1) { |
||||
|
OutputVertex ret = v0; |
||||
|
ret.Lerp(factor, v1); |
||||
|
return ret; |
||||
|
} |
||||
|
}; |
||||
|
static_assert(std::is_pod<OutputVertex>::value, "Structure is not POD"); |
||||
|
static_assert(sizeof(OutputVertex) == 32 * sizeof(float), "OutputVertex has invalid size"); |
||||
|
|
||||
|
/** |
||||
|
* This structure contains the state information that needs to be unique for a shader unit. The 3DS |
||||
|
* has four shader units that process shaders in parallel. At the present, Citra only implements a |
||||
|
* single shader unit that processes all shaders serially. Putting the state information in a struct |
||||
|
* here will make it easier for us to parallelize the shader processing later. |
||||
|
*/ |
||||
|
struct UnitState { |
||||
|
// The registers are accessed by the shader JIT using SSE instructions, and are therefore |
||||
|
// required to be 16-byte aligned. |
||||
|
Math::Vec4<float24> MEMORY_ALIGNED16(input_registers[16]); |
||||
|
Math::Vec4<float24> MEMORY_ALIGNED16(output_registers[16]); |
||||
|
Math::Vec4<float24> MEMORY_ALIGNED16(temporary_registers[16]); |
||||
|
|
||||
|
u32 program_counter; |
||||
|
bool conditional_code[2]; |
||||
|
|
||||
|
// Two Address registers and one loop counter |
||||
|
// TODO: How many bits do these actually have? |
||||
|
s32 address_registers[3]; |
||||
|
|
||||
|
enum { |
||||
|
INVALID_ADDRESS = 0xFFFFFFFF |
||||
|
}; |
||||
|
|
||||
|
struct CallStackElement { |
||||
|
u32 final_address; // Address upon which we jump to return_address |
||||
|
u32 return_address; // Where to jump when leaving scope |
||||
|
u8 repeat_counter; // How often to repeat until this call stack element is removed |
||||
|
u8 loop_increment; // Which value to add to the loop counter after an iteration |
||||
|
// TODO: Should this be a signed value? Does it even matter? |
||||
|
u32 loop_address; // The address where we'll return to after each loop iteration |
||||
|
}; |
||||
|
|
||||
|
// TODO: Is there a maximal size for this? |
||||
|
boost::container::static_vector<CallStackElement, 16> call_stack; |
||||
|
|
||||
|
struct { |
||||
|
u32 max_offset; // maximum program counter ever reached |
||||
|
u32 max_opdesc_id; // maximum swizzle pattern index ever used |
||||
|
} debug; |
||||
|
|
||||
|
static int InputOffset(const SourceRegister& reg) { |
||||
|
switch (reg.GetRegisterType()) { |
||||
|
case RegisterType::Input: |
||||
|
return (int)offsetof(UnitState, input_registers) + reg.GetIndex()*sizeof(Math::Vec4<float24>); |
||||
|
|
||||
|
case RegisterType::Temporary: |
||||
|
return (int)offsetof(UnitState, temporary_registers) + reg.GetIndex()*sizeof(Math::Vec4<float24>); |
||||
|
|
||||
|
default: |
||||
|
UNREACHABLE(); |
||||
|
return 0; |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
static int OutputOffset(const DestRegister& reg) { |
||||
|
switch (reg.GetRegisterType()) { |
||||
|
case RegisterType::Output: |
||||
|
return (int)offsetof(UnitState, output_registers) + reg.GetIndex()*sizeof(Math::Vec4<float24>); |
||||
|
|
||||
|
case RegisterType::Temporary: |
||||
|
return (int)offsetof(UnitState, temporary_registers) + reg.GetIndex()*sizeof(Math::Vec4<float24>); |
||||
|
|
||||
|
default: |
||||
|
UNREACHABLE(); |
||||
|
return 0; |
||||
|
} |
||||
|
} |
||||
|
}; |
||||
|
|
||||
|
/** |
||||
|
* Performs any shader unit setup that only needs to happen once per shader (as opposed to once per |
||||
|
* vertex, which would happen within the `Run` function). |
||||
|
* @param state Shader unit state, must be setup per shader and per shader unit |
||||
|
*/ |
||||
|
void Setup(UnitState& state); |
||||
|
|
||||
|
/** |
||||
|
* Runs the currently setup shader |
||||
|
* @param state Shader unit state, must be setup per shader and per shader unit |
||||
|
* @param input Input vertex into the shader |
||||
|
* @param num_attributes The number of vertex shader attributes |
||||
|
* @return The output vertex, after having been processed by the vertex shader |
||||
|
*/ |
||||
|
OutputVertex Run(UnitState& state, const InputVertex& input, int num_attributes); |
||||
|
|
||||
|
} // namespace Shader |
||||
|
|
||||
|
} // namespace Pica |
||||
Write
Preview
Loading…
Cancel
Save
Reference in new issue