7 changed files with 289 additions and 186 deletions
-
2src/video_core/CMakeLists.txt
-
5src/video_core/command_processor.cpp
-
6src/video_core/pica.h
-
105src/video_core/shader/shader.cpp
-
163src/video_core/shader/shader.h
-
135src/video_core/shader/shader_interpreter.cpp
-
59src/video_core/shader/shader_interpreter.h
@ -0,0 +1,105 @@ |
|||
// Copyright 2015 Citra Emulator Project
|
|||
// Licensed under GPLv2 or any later version
|
|||
// Refer to the license.txt file included.
|
|||
|
|||
#include "common/logging/log.h"
|
|||
#include "common/profiler.h"
|
|||
|
|||
#include "video_core/debug_utils/debug_utils.h"
|
|||
#include "video_core/pica.h"
|
|||
|
|||
#include "shader.h"
|
|||
#include "shader_interpreter.h"
|
|||
|
|||
namespace Pica { |
|||
|
|||
namespace Shader { |
|||
|
|||
void Setup(UnitState& state) { |
|||
// TODO(bunnei): This will be used by the JIT in a subsequent commit
|
|||
} |
|||
|
|||
static Common::Profiling::TimingCategory shader_category("Vertex Shader"); |
|||
|
|||
OutputVertex Run(UnitState& state, const InputVertex& input, int num_attributes) { |
|||
auto& config = g_state.regs.vs; |
|||
auto& setup = g_state.vs; |
|||
|
|||
Common::Profiling::ScopeTimer timer(shader_category); |
|||
|
|||
state.program_counter = config.main_offset; |
|||
state.debug.max_offset = 0; |
|||
state.debug.max_opdesc_id = 0; |
|||
|
|||
// Setup input register table
|
|||
const auto& attribute_register_map = config.input_register_map; |
|||
|
|||
if (num_attributes > 0) state.input_registers[attribute_register_map.attribute0_register] = input.attr[0]; |
|||
if (num_attributes > 1) state.input_registers[attribute_register_map.attribute1_register] = input.attr[1]; |
|||
if (num_attributes > 2) state.input_registers[attribute_register_map.attribute2_register] = input.attr[2]; |
|||
if (num_attributes > 3) state.input_registers[attribute_register_map.attribute3_register] = input.attr[3]; |
|||
if (num_attributes > 4) state.input_registers[attribute_register_map.attribute4_register] = input.attr[4]; |
|||
if (num_attributes > 5) state.input_registers[attribute_register_map.attribute5_register] = input.attr[5]; |
|||
if (num_attributes > 6) state.input_registers[attribute_register_map.attribute6_register] = input.attr[6]; |
|||
if (num_attributes > 7) state.input_registers[attribute_register_map.attribute7_register] = input.attr[7]; |
|||
if (num_attributes > 8) state.input_registers[attribute_register_map.attribute8_register] = input.attr[8]; |
|||
if (num_attributes > 9) state.input_registers[attribute_register_map.attribute9_register] = input.attr[9]; |
|||
if (num_attributes > 10) state.input_registers[attribute_register_map.attribute10_register] = input.attr[10]; |
|||
if (num_attributes > 11) state.input_registers[attribute_register_map.attribute11_register] = input.attr[11]; |
|||
if (num_attributes > 12) state.input_registers[attribute_register_map.attribute12_register] = input.attr[12]; |
|||
if (num_attributes > 13) state.input_registers[attribute_register_map.attribute13_register] = input.attr[13]; |
|||
if (num_attributes > 14) state.input_registers[attribute_register_map.attribute14_register] = input.attr[14]; |
|||
if (num_attributes > 15) state.input_registers[attribute_register_map.attribute15_register] = input.attr[15]; |
|||
|
|||
state.conditional_code[0] = false; |
|||
state.conditional_code[1] = false; |
|||
|
|||
RunInterpreter(state); |
|||
|
|||
#if PICA_DUMP_SHADERS
|
|||
DebugUtils::DumpShader(setup.program_code.data(), state.debug.max_offset, setup.swizzle_data.data(), |
|||
state.debug.max_opdesc_id, config.main_offset, |
|||
g_state.regs.vs_output_attributes); // TODO: Don't hardcode VS here
|
|||
#endif
|
|||
|
|||
// Setup output data
|
|||
OutputVertex ret; |
|||
// TODO(neobrain): Under some circumstances, up to 16 attributes may be output. We need to
|
|||
// figure out what those circumstances are and enable the remaining outputs then.
|
|||
for (int i = 0; i < 7; ++i) { |
|||
const auto& output_register_map = g_state.regs.vs_output_attributes[i]; // TODO: Don't hardcode VS here
|
|||
|
|||
u32 semantics[4] = { |
|||
output_register_map.map_x, output_register_map.map_y, |
|||
output_register_map.map_z, output_register_map.map_w |
|||
}; |
|||
|
|||
for (int comp = 0; comp < 4; ++comp) { |
|||
float24* out = ((float24*)&ret) + semantics[comp]; |
|||
if (semantics[comp] != Regs::VSOutputAttributes::INVALID) { |
|||
*out = state.output_registers[i][comp]; |
|||
} else { |
|||
// Zero output so that attributes which aren't output won't have denormals in them,
|
|||
// which would slow us down later.
|
|||
memset(out, 0, sizeof(*out)); |
|||
} |
|||
} |
|||
} |
|||
|
|||
// The hardware takes the absolute and saturates vertex colors like this, *before* doing interpolation
|
|||
for (int i = 0; i < 4; ++i) { |
|||
ret.color[i] = float24::FromFloat32( |
|||
std::fmin(std::fabs(ret.color[i].ToFloat32()), 1.0f)); |
|||
} |
|||
|
|||
LOG_TRACE(Render_Software, "Output vertex: pos (%.2f, %.2f, %.2f, %.2f), col(%.2f, %.2f, %.2f, %.2f), tc0(%.2f, %.2f)", |
|||
ret.pos.x.ToFloat32(), ret.pos.y.ToFloat32(), ret.pos.z.ToFloat32(), ret.pos.w.ToFloat32(), |
|||
ret.color.x.ToFloat32(), ret.color.y.ToFloat32(), ret.color.z.ToFloat32(), ret.color.w.ToFloat32(), |
|||
ret.tc0.u().ToFloat32(), ret.tc0.v().ToFloat32()); |
|||
|
|||
return ret; |
|||
} |
|||
|
|||
} // namespace Shader
|
|||
|
|||
} // namespace Pica
|
|||
@ -0,0 +1,163 @@ |
|||
// Copyright 2015 Citra Emulator Project |
|||
// Licensed under GPLv2 or any later version |
|||
// Refer to the license.txt file included. |
|||
|
|||
#pragma once |
|||
|
|||
#include <boost/container/static_vector.hpp> |
|||
#include <nihstro/shader_binary.h> |
|||
|
|||
#include "common/common_funcs.h" |
|||
#include "common/common_types.h" |
|||
#include "common/vector_math.h" |
|||
|
|||
#include "video_core/pica.h" |
|||
|
|||
using nihstro::RegisterType; |
|||
using nihstro::SourceRegister; |
|||
using nihstro::DestRegister; |
|||
|
|||
namespace Pica { |
|||
|
|||
namespace Shader { |
|||
|
|||
struct InputVertex { |
|||
Math::Vec4<float24> attr[16]; |
|||
}; |
|||
|
|||
struct OutputVertex { |
|||
OutputVertex() = default; |
|||
|
|||
// VS output attributes |
|||
Math::Vec4<float24> pos; |
|||
Math::Vec4<float24> dummy; // quaternions (not implemented, yet) |
|||
Math::Vec4<float24> color; |
|||
Math::Vec2<float24> tc0; |
|||
Math::Vec2<float24> tc1; |
|||
float24 pad[6]; |
|||
Math::Vec2<float24> tc2; |
|||
|
|||
// Padding for optimal alignment |
|||
float24 pad2[4]; |
|||
|
|||
// Attributes used to store intermediate results |
|||
|
|||
// position after perspective divide |
|||
Math::Vec3<float24> screenpos; |
|||
float24 pad3; |
|||
|
|||
// Linear interpolation |
|||
// factor: 0=this, 1=vtx |
|||
void Lerp(float24 factor, const OutputVertex& vtx) { |
|||
pos = pos * factor + vtx.pos * (float24::FromFloat32(1) - factor); |
|||
|
|||
// TODO: Should perform perspective correct interpolation here... |
|||
tc0 = tc0 * factor + vtx.tc0 * (float24::FromFloat32(1) - factor); |
|||
tc1 = tc1 * factor + vtx.tc1 * (float24::FromFloat32(1) - factor); |
|||
tc2 = tc2 * factor + vtx.tc2 * (float24::FromFloat32(1) - factor); |
|||
|
|||
screenpos = screenpos * factor + vtx.screenpos * (float24::FromFloat32(1) - factor); |
|||
|
|||
color = color * factor + vtx.color * (float24::FromFloat32(1) - factor); |
|||
} |
|||
|
|||
// Linear interpolation |
|||
// factor: 0=v0, 1=v1 |
|||
static OutputVertex Lerp(float24 factor, const OutputVertex& v0, const OutputVertex& v1) { |
|||
OutputVertex ret = v0; |
|||
ret.Lerp(factor, v1); |
|||
return ret; |
|||
} |
|||
}; |
|||
static_assert(std::is_pod<OutputVertex>::value, "Structure is not POD"); |
|||
static_assert(sizeof(OutputVertex) == 32 * sizeof(float), "OutputVertex has invalid size"); |
|||
|
|||
/** |
|||
* This structure contains the state information that needs to be unique for a shader unit. The 3DS |
|||
* has four shader units that process shaders in parallel. At the present, Citra only implements a |
|||
* single shader unit that processes all shaders serially. Putting the state information in a struct |
|||
* here will make it easier for us to parallelize the shader processing later. |
|||
*/ |
|||
struct UnitState { |
|||
// The registers are accessed by the shader JIT using SSE instructions, and are therefore |
|||
// required to be 16-byte aligned. |
|||
Math::Vec4<float24> MEMORY_ALIGNED16(input_registers[16]); |
|||
Math::Vec4<float24> MEMORY_ALIGNED16(output_registers[16]); |
|||
Math::Vec4<float24> MEMORY_ALIGNED16(temporary_registers[16]); |
|||
|
|||
u32 program_counter; |
|||
bool conditional_code[2]; |
|||
|
|||
// Two Address registers and one loop counter |
|||
// TODO: How many bits do these actually have? |
|||
s32 address_registers[3]; |
|||
|
|||
enum { |
|||
INVALID_ADDRESS = 0xFFFFFFFF |
|||
}; |
|||
|
|||
struct CallStackElement { |
|||
u32 final_address; // Address upon which we jump to return_address |
|||
u32 return_address; // Where to jump when leaving scope |
|||
u8 repeat_counter; // How often to repeat until this call stack element is removed |
|||
u8 loop_increment; // Which value to add to the loop counter after an iteration |
|||
// TODO: Should this be a signed value? Does it even matter? |
|||
u32 loop_address; // The address where we'll return to after each loop iteration |
|||
}; |
|||
|
|||
// TODO: Is there a maximal size for this? |
|||
boost::container::static_vector<CallStackElement, 16> call_stack; |
|||
|
|||
struct { |
|||
u32 max_offset; // maximum program counter ever reached |
|||
u32 max_opdesc_id; // maximum swizzle pattern index ever used |
|||
} debug; |
|||
|
|||
static int InputOffset(const SourceRegister& reg) { |
|||
switch (reg.GetRegisterType()) { |
|||
case RegisterType::Input: |
|||
return (int)offsetof(UnitState, input_registers) + reg.GetIndex()*sizeof(Math::Vec4<float24>); |
|||
|
|||
case RegisterType::Temporary: |
|||
return (int)offsetof(UnitState, temporary_registers) + reg.GetIndex()*sizeof(Math::Vec4<float24>); |
|||
|
|||
default: |
|||
UNREACHABLE(); |
|||
return 0; |
|||
} |
|||
} |
|||
|
|||
static int OutputOffset(const DestRegister& reg) { |
|||
switch (reg.GetRegisterType()) { |
|||
case RegisterType::Output: |
|||
return (int)offsetof(UnitState, output_registers) + reg.GetIndex()*sizeof(Math::Vec4<float24>); |
|||
|
|||
case RegisterType::Temporary: |
|||
return (int)offsetof(UnitState, temporary_registers) + reg.GetIndex()*sizeof(Math::Vec4<float24>); |
|||
|
|||
default: |
|||
UNREACHABLE(); |
|||
return 0; |
|||
} |
|||
} |
|||
}; |
|||
|
|||
/** |
|||
* Performs any shader unit setup that only needs to happen once per shader (as opposed to once per |
|||
* vertex, which would happen within the `Run` function). |
|||
* @param state Shader unit state, must be setup per shader and per shader unit |
|||
*/ |
|||
void Setup(UnitState& state); |
|||
|
|||
/** |
|||
* Runs the currently setup shader |
|||
* @param state Shader unit state, must be setup per shader and per shader unit |
|||
* @param input Input vertex into the shader |
|||
* @param num_attributes The number of vertex shader attributes |
|||
* @return The output vertex, after having been processed by the vertex shader |
|||
*/ |
|||
OutputVertex Run(UnitState& state, const InputVertex& input, int num_attributes); |
|||
|
|||
} // namespace Shader |
|||
|
|||
} // namespace Pica |
|||
Write
Preview
Loading…
Cancel
Save
Reference in new issue