Browse Source
Merge pull request #37 from neobrain/pica
Merge pull request #37 from neobrain/pica
Initial work on Pica rendering.pull/15/merge
24 changed files with 2368 additions and 261 deletions
-
2src/citra_qt/debugger/graphics_cmdlists.cpp
-
31src/core/hle/service/gsp.cpp
-
110src/core/hw/gpu.cpp
-
66src/core/hw/gpu.h
-
4src/core/mem_map.cpp
-
22src/core/mem_map.h
-
68src/core/mem_map_funcs.cpp
-
16src/video_core/CMakeLists.txt
-
179src/video_core/clipper.cpp
-
21src/video_core/clipper.h
-
238src/video_core/command_processor.cpp
-
31src/video_core/command_processor.h
-
8src/video_core/gpu_debugger.h
-
578src/video_core/math.h
-
465src/video_core/pica.h
-
51src/video_core/primitive_assembly.cpp
-
21src/video_core/primitive_assembly.h
-
180src/video_core/rasterizer.cpp
-
21src/video_core/rasterizer.h
-
14src/video_core/renderer_opengl/renderer_opengl.cpp
-
270src/video_core/vertex_shader.cpp
-
211src/video_core/vertex_shader.h
-
11src/video_core/video_core.vcxproj
-
11src/video_core/video_core.vcxproj.filters
@ -1,10 +1,22 @@ |
|||
set(SRCS video_core.cpp |
|||
set(SRCS clipper.cpp |
|||
command_processor.cpp |
|||
primitive_assembly.cpp |
|||
rasterizer.cpp |
|||
utils.cpp |
|||
vertex_shader.cpp |
|||
video_core.cpp |
|||
renderer_opengl/renderer_opengl.cpp) |
|||
|
|||
set(HEADERS video_core.h |
|||
set(HEADERS clipper.h |
|||
command_processor.h |
|||
math.h |
|||
primitive_assembly.h |
|||
rasterizer.h |
|||
utils.h |
|||
video_core.h |
|||
renderer_base.h |
|||
vertex_shader.h |
|||
video_core.h |
|||
renderer_opengl/renderer_opengl.h) |
|||
|
|||
add_library(video_core STATIC ${SRCS} ${HEADERS}) |
|||
@ -0,0 +1,179 @@ |
|||
// Copyright 2014 Citra Emulator Project
|
|||
// Licensed under GPLv2
|
|||
// Refer to the license.txt file included.
|
|||
|
|||
#include <vector>
|
|||
|
|||
#include "clipper.h"
|
|||
#include "pica.h"
|
|||
#include "rasterizer.h"
|
|||
#include "vertex_shader.h"
|
|||
|
|||
namespace Pica { |
|||
|
|||
namespace Clipper { |
|||
|
|||
struct ClippingEdge { |
|||
public: |
|||
enum Type { |
|||
POS_X = 0, |
|||
NEG_X = 1, |
|||
POS_Y = 2, |
|||
NEG_Y = 3, |
|||
POS_Z = 4, |
|||
NEG_Z = 5, |
|||
}; |
|||
|
|||
ClippingEdge(Type type, float24 position) : type(type), pos(position) {} |
|||
|
|||
bool IsInside(const OutputVertex& vertex) const { |
|||
switch (type) { |
|||
case POS_X: return vertex.pos.x <= pos * vertex.pos.w; |
|||
case NEG_X: return vertex.pos.x >= pos * vertex.pos.w; |
|||
case POS_Y: return vertex.pos.y <= pos * vertex.pos.w; |
|||
case NEG_Y: return vertex.pos.y >= pos * vertex.pos.w; |
|||
|
|||
// TODO: Check z compares ... should be 0..1 instead?
|
|||
case POS_Z: return vertex.pos.z <= pos * vertex.pos.w; |
|||
|
|||
default: |
|||
case NEG_Z: return vertex.pos.z >= pos * vertex.pos.w; |
|||
} |
|||
} |
|||
|
|||
bool IsOutSide(const OutputVertex& vertex) const { |
|||
return !IsInside(vertex); |
|||
} |
|||
|
|||
OutputVertex GetIntersection(const OutputVertex& v0, const OutputVertex& v1) const { |
|||
auto dotpr = [this](const OutputVertex& vtx) { |
|||
switch (type) { |
|||
case POS_X: return vtx.pos.x - vtx.pos.w; |
|||
case NEG_X: return -vtx.pos.x - vtx.pos.w; |
|||
case POS_Y: return vtx.pos.y - vtx.pos.w; |
|||
case NEG_Y: return -vtx.pos.y - vtx.pos.w; |
|||
|
|||
// TODO: Verify z clipping
|
|||
case POS_Z: return vtx.pos.z - vtx.pos.w; |
|||
|
|||
default: |
|||
case NEG_Z: return -vtx.pos.w; |
|||
} |
|||
}; |
|||
|
|||
float24 dp = dotpr(v0); |
|||
float24 dp_prev = dotpr(v1); |
|||
float24 factor = dp_prev / (dp_prev - dp); |
|||
|
|||
return OutputVertex::Lerp(factor, v0, v1); |
|||
} |
|||
|
|||
private: |
|||
Type type; |
|||
float24 pos; |
|||
}; |
|||
|
|||
static void InitScreenCoordinates(OutputVertex& vtx) |
|||
{ |
|||
struct { |
|||
float24 halfsize_x; |
|||
float24 offset_x; |
|||
float24 halfsize_y; |
|||
float24 offset_y; |
|||
float24 zscale; |
|||
float24 offset_z; |
|||
} viewport; |
|||
|
|||
viewport.halfsize_x = float24::FromRawFloat24(registers.viewport_size_x); |
|||
viewport.halfsize_y = float24::FromRawFloat24(registers.viewport_size_y); |
|||
viewport.offset_x = float24::FromFloat32(registers.viewport_corner.x); |
|||
viewport.offset_y = float24::FromFloat32(registers.viewport_corner.y); |
|||
viewport.zscale = float24::FromRawFloat24(registers.viewport_depth_range); |
|||
viewport.offset_z = float24::FromRawFloat24(registers.viewport_depth_far_plane); |
|||
|
|||
// TODO: Not sure why the viewport width needs to be divided by 2 but the viewport height does not
|
|||
vtx.screenpos[0] = (vtx.pos.x / vtx.pos.w + float24::FromFloat32(1.0)) * viewport.halfsize_x / float24::FromFloat32(2.0) + viewport.offset_x; |
|||
vtx.screenpos[1] = (vtx.pos.y / vtx.pos.w + float24::FromFloat32(1.0)) * viewport.halfsize_y + viewport.offset_y; |
|||
vtx.screenpos[2] = viewport.offset_z - vtx.pos.z / vtx.pos.w * viewport.zscale; |
|||
} |
|||
|
|||
void ProcessTriangle(OutputVertex &v0, OutputVertex &v1, OutputVertex &v2) { |
|||
|
|||
// TODO (neobrain):
|
|||
// The list of output vertices has some fixed maximum size,
|
|||
// however I haven't taken the time to figure out what it is exactly.
|
|||
// For now, we hence just assume a maximal size of 1000 vertices.
|
|||
const size_t max_vertices = 1000; |
|||
std::vector<OutputVertex> buffer_vertices; |
|||
std::vector<OutputVertex*> output_list{ &v0, &v1, &v2 }; |
|||
|
|||
// Make sure to reserve space for all vertices.
|
|||
// Without this, buffer reallocation would invalidate references.
|
|||
buffer_vertices.reserve(max_vertices); |
|||
|
|||
// Simple implementation of the Sutherland-Hodgman clipping algorithm.
|
|||
// TODO: Make this less inefficient (currently lots of useless buffering overhead happens here)
|
|||
for (auto edge : { ClippingEdge(ClippingEdge::POS_X, float24::FromFloat32(+1.0)), |
|||
ClippingEdge(ClippingEdge::NEG_X, float24::FromFloat32(-1.0)), |
|||
ClippingEdge(ClippingEdge::POS_Y, float24::FromFloat32(+1.0)), |
|||
ClippingEdge(ClippingEdge::NEG_Y, float24::FromFloat32(-1.0)), |
|||
ClippingEdge(ClippingEdge::POS_Z, float24::FromFloat32(+1.0)), |
|||
ClippingEdge(ClippingEdge::NEG_Z, float24::FromFloat32(-1.0)) }) { |
|||
|
|||
const std::vector<OutputVertex*> input_list = output_list; |
|||
output_list.clear(); |
|||
|
|||
const OutputVertex* reference_vertex = input_list.back(); |
|||
|
|||
for (const auto& vertex : input_list) { |
|||
// NOTE: This algorithm changes vertex order in some cases!
|
|||
if (edge.IsInside(*vertex)) { |
|||
if (edge.IsOutSide(*reference_vertex)) { |
|||
buffer_vertices.push_back(edge.GetIntersection(*vertex, *reference_vertex)); |
|||
output_list.push_back(&(buffer_vertices.back())); |
|||
} |
|||
|
|||
output_list.push_back(vertex); |
|||
} else if (edge.IsInside(*reference_vertex)) { |
|||
buffer_vertices.push_back(edge.GetIntersection(*vertex, *reference_vertex)); |
|||
output_list.push_back(&(buffer_vertices.back())); |
|||
} |
|||
|
|||
reference_vertex = vertex; |
|||
} |
|||
|
|||
// Need to have at least a full triangle to continue...
|
|||
if (output_list.size() < 3) |
|||
return; |
|||
} |
|||
|
|||
InitScreenCoordinates(*(output_list[0])); |
|||
InitScreenCoordinates(*(output_list[1])); |
|||
|
|||
for (int i = 0; i < output_list.size() - 2; i ++) { |
|||
OutputVertex& vtx0 = *(output_list[0]); |
|||
OutputVertex& vtx1 = *(output_list[i+1]); |
|||
OutputVertex& vtx2 = *(output_list[i+2]); |
|||
|
|||
InitScreenCoordinates(vtx2); |
|||
|
|||
DEBUG_LOG(GPU, |
|||
"Triangle %d/%d (%d buffer vertices) at position (%.3f, %.3f, %.3f, %.3f), " |
|||
"(%.3f, %.3f, %.3f, %.3f), (%.3f, %.3f, %.3f, %.3f) and " |
|||
"screen position (%.2f, %.2f, %.2f), (%.2f, %.2f, %.2f), (%.2f, %.2f, %.2f)", |
|||
i,output_list.size(), buffer_vertices.size(), |
|||
vtx0.pos.x.ToFloat32(), vtx0.pos.y.ToFloat32(), vtx0.pos.z.ToFloat32(), vtx0.pos.w.ToFloat32(),output_list.size(), |
|||
vtx1.pos.x.ToFloat32(), vtx1.pos.y.ToFloat32(), vtx1.pos.z.ToFloat32(), vtx1.pos.w.ToFloat32(), |
|||
vtx2.pos.x.ToFloat32(), vtx2.pos.y.ToFloat32(), vtx2.pos.z.ToFloat32(), vtx2.pos.w.ToFloat32(), |
|||
vtx0.screenpos.x.ToFloat32(), vtx0.screenpos.y.ToFloat32(), vtx0.screenpos.z.ToFloat32(), |
|||
vtx1.screenpos.x.ToFloat32(), vtx1.screenpos.y.ToFloat32(), vtx1.screenpos.z.ToFloat32(), |
|||
vtx2.screenpos.x.ToFloat32(), vtx2.screenpos.y.ToFloat32(), vtx2.screenpos.z.ToFloat32()); |
|||
|
|||
Rasterizer::ProcessTriangle(vtx0, vtx1, vtx2); |
|||
} |
|||
} |
|||
|
|||
|
|||
} // namespace
|
|||
|
|||
} // namespace
|
|||
@ -0,0 +1,21 @@ |
|||
// Copyright 2014 Citra Emulator Project |
|||
// Licensed under GPLv2 |
|||
// Refer to the license.txt file included. |
|||
|
|||
#pragma once |
|||
|
|||
namespace Pica { |
|||
|
|||
namespace VertexShader { |
|||
struct OutputVertex; |
|||
} |
|||
|
|||
namespace Clipper { |
|||
|
|||
using VertexShader::OutputVertex; |
|||
|
|||
void ProcessTriangle(OutputVertex& v0, OutputVertex& v1, OutputVertex& v2); |
|||
|
|||
} // namespace |
|||
|
|||
} // namespace |
|||
@ -0,0 +1,238 @@ |
|||
// Copyright 2014 Citra Emulator Project
|
|||
// Licensed under GPLv2
|
|||
// Refer to the license.txt file included.
|
|||
|
|||
#include "command_processor.h"
|
|||
#include "math.h"
|
|||
#include "pica.h"
|
|||
#include "primitive_assembly.h"
|
|||
#include "vertex_shader.h"
|
|||
|
|||
|
|||
namespace Pica { |
|||
|
|||
Regs registers; |
|||
|
|||
namespace CommandProcessor { |
|||
|
|||
static int float_regs_counter = 0; |
|||
|
|||
static u32 uniform_write_buffer[4]; |
|||
|
|||
// Used for VSLoadProgramData and VSLoadSwizzleData
|
|||
static u32 vs_binary_write_offset = 0; |
|||
static u32 vs_swizzle_write_offset = 0; |
|||
|
|||
static inline void WritePicaReg(u32 id, u32 value) { |
|||
u32 old_value = registers[id]; |
|||
registers[id] = value; |
|||
|
|||
switch(id) { |
|||
// It seems like these trigger vertex rendering
|
|||
case PICA_REG_INDEX(trigger_draw): |
|||
case PICA_REG_INDEX(trigger_draw_indexed): |
|||
{ |
|||
const auto& attribute_config = registers.vertex_attributes; |
|||
const u8* const base_address = Memory::GetPointer(attribute_config.GetBaseAddress()); |
|||
|
|||
// Information about internal vertex attributes
|
|||
const u8* vertex_attribute_sources[16]; |
|||
u32 vertex_attribute_strides[16]; |
|||
u32 vertex_attribute_formats[16]; |
|||
u32 vertex_attribute_elements[16]; |
|||
u32 vertex_attribute_element_size[16]; |
|||
|
|||
// Setup attribute data from loaders
|
|||
for (int loader = 0; loader < 12; ++loader) { |
|||
const auto& loader_config = attribute_config.attribute_loaders[loader]; |
|||
|
|||
const u8* load_address = base_address + loader_config.data_offset; |
|||
|
|||
// TODO: What happens if a loader overwrites a previous one's data?
|
|||
for (int component = 0; component < loader_config.component_count; ++component) { |
|||
u32 attribute_index = loader_config.GetComponent(component); |
|||
vertex_attribute_sources[attribute_index] = load_address; |
|||
vertex_attribute_strides[attribute_index] = loader_config.byte_count; |
|||
vertex_attribute_formats[attribute_index] = (u32)attribute_config.GetFormat(attribute_index); |
|||
vertex_attribute_elements[attribute_index] = attribute_config.GetNumElements(attribute_index); |
|||
vertex_attribute_element_size[attribute_index] = attribute_config.GetElementSizeInBytes(attribute_index); |
|||
load_address += attribute_config.GetStride(attribute_index); |
|||
} |
|||
} |
|||
|
|||
// Load vertices
|
|||
bool is_indexed = (id == PICA_REG_INDEX(trigger_draw_indexed)); |
|||
|
|||
const auto& index_info = registers.index_array; |
|||
const u8* index_address_8 = (u8*)base_address + index_info.offset; |
|||
const u16* index_address_16 = (u16*)index_address_8; |
|||
bool index_u16 = (bool)index_info.format; |
|||
|
|||
for (int index = 0; index < registers.num_vertices; ++index) |
|||
{ |
|||
int vertex = is_indexed ? (index_u16 ? index_address_16[index] : index_address_8[index]) : index; |
|||
|
|||
if (is_indexed) { |
|||
// TODO: Implement some sort of vertex cache!
|
|||
} |
|||
|
|||
// Initialize data for the current vertex
|
|||
VertexShader::InputVertex input; |
|||
|
|||
for (int i = 0; i < attribute_config.GetNumTotalAttributes(); ++i) { |
|||
for (int comp = 0; comp < vertex_attribute_elements[i]; ++comp) { |
|||
const u8* srcdata = vertex_attribute_sources[i] + vertex_attribute_strides[i] * vertex + comp * vertex_attribute_element_size[i]; |
|||
const float srcval = (vertex_attribute_formats[i] == 0) ? *(s8*)srcdata : |
|||
(vertex_attribute_formats[i] == 1) ? *(u8*)srcdata : |
|||
(vertex_attribute_formats[i] == 2) ? *(s16*)srcdata : |
|||
*(float*)srcdata; |
|||
input.attr[i][comp] = float24::FromFloat32(srcval); |
|||
DEBUG_LOG(GPU, "Loaded component %x of attribute %x for vertex %x (index %x) from 0x%08x + 0x%08x + 0x%04x: %f", |
|||
comp, i, vertex, index, |
|||
attribute_config.GetBaseAddress(), |
|||
vertex_attribute_sources[i] - base_address, |
|||
srcdata - vertex_attribute_sources[i], |
|||
input.attr[i][comp].ToFloat32()); |
|||
} |
|||
} |
|||
VertexShader::OutputVertex output = VertexShader::RunShader(input, attribute_config.GetNumTotalAttributes()); |
|||
|
|||
if (is_indexed) { |
|||
// TODO: Add processed vertex to vertex cache!
|
|||
} |
|||
|
|||
PrimitiveAssembly::SubmitVertex(output); |
|||
} |
|||
break; |
|||
} |
|||
|
|||
case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[0], 0x2c1): |
|||
case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[1], 0x2c2): |
|||
case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[2], 0x2c3): |
|||
case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[3], 0x2c4): |
|||
case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[4], 0x2c5): |
|||
case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[5], 0x2c6): |
|||
case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[6], 0x2c7): |
|||
case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[7], 0x2c8): |
|||
{ |
|||
auto& uniform_setup = registers.vs_uniform_setup; |
|||
|
|||
// TODO: Does actual hardware indeed keep an intermediate buffer or does
|
|||
// it directly write the values?
|
|||
uniform_write_buffer[float_regs_counter++] = value; |
|||
|
|||
// Uniforms are written in a packed format such that 4 float24 values are encoded in
|
|||
// three 32-bit numbers. We write to internal memory once a full such vector is
|
|||
// written.
|
|||
if ((float_regs_counter >= 4 && uniform_setup.IsFloat32()) || |
|||
(float_regs_counter >= 3 && !uniform_setup.IsFloat32())) { |
|||
float_regs_counter = 0; |
|||
|
|||
auto& uniform = VertexShader::GetFloatUniform(uniform_setup.index); |
|||
|
|||
if (uniform_setup.index > 95) { |
|||
ERROR_LOG(GPU, "Invalid VS uniform index %d", (int)uniform_setup.index); |
|||
break; |
|||
} |
|||
|
|||
// NOTE: The destination component order indeed is "backwards"
|
|||
if (uniform_setup.IsFloat32()) { |
|||
for (auto i : {0,1,2,3}) |
|||
uniform[3 - i] = float24::FromFloat32(*(float*)(&uniform_write_buffer[i])); |
|||
} else { |
|||
// TODO: Untested
|
|||
uniform.w = float24::FromRawFloat24(uniform_write_buffer[0] >> 8); |
|||
uniform.z = float24::FromRawFloat24(((uniform_write_buffer[0] & 0xFF)<<16) | ((uniform_write_buffer[1] >> 16) & 0xFFFF)); |
|||
uniform.y = float24::FromRawFloat24(((uniform_write_buffer[1] & 0xFFFF)<<8) | ((uniform_write_buffer[2] >> 24) & 0xFF)); |
|||
uniform.x = float24::FromRawFloat24(uniform_write_buffer[2] & 0xFFFFFF); |
|||
} |
|||
|
|||
DEBUG_LOG(GPU, "Set uniform %x to (%f %f %f %f)", (int)uniform_setup.index, |
|||
uniform.x.ToFloat32(), uniform.y.ToFloat32(), uniform.z.ToFloat32(), |
|||
uniform.w.ToFloat32()); |
|||
|
|||
// TODO: Verify that this actually modifies the register!
|
|||
uniform_setup.index = uniform_setup.index + 1; |
|||
} |
|||
break; |
|||
} |
|||
|
|||
// Seems to be used to reset the write pointer for VSLoadProgramData
|
|||
case PICA_REG_INDEX(vs_program.begin_load): |
|||
vs_binary_write_offset = 0; |
|||
break; |
|||
|
|||
// Load shader program code
|
|||
case PICA_REG_INDEX_WORKAROUND(vs_program.set_word[0], 0x2cc): |
|||
case PICA_REG_INDEX_WORKAROUND(vs_program.set_word[1], 0x2cd): |
|||
case PICA_REG_INDEX_WORKAROUND(vs_program.set_word[2], 0x2ce): |
|||
case PICA_REG_INDEX_WORKAROUND(vs_program.set_word[3], 0x2cf): |
|||
case PICA_REG_INDEX_WORKAROUND(vs_program.set_word[4], 0x2d0): |
|||
case PICA_REG_INDEX_WORKAROUND(vs_program.set_word[5], 0x2d1): |
|||
case PICA_REG_INDEX_WORKAROUND(vs_program.set_word[6], 0x2d2): |
|||
case PICA_REG_INDEX_WORKAROUND(vs_program.set_word[7], 0x2d3): |
|||
{ |
|||
VertexShader::SubmitShaderMemoryChange(vs_binary_write_offset, value); |
|||
vs_binary_write_offset++; |
|||
break; |
|||
} |
|||
|
|||
// Seems to be used to reset the write pointer for VSLoadSwizzleData
|
|||
case PICA_REG_INDEX(vs_swizzle_patterns.begin_load): |
|||
vs_swizzle_write_offset = 0; |
|||
break; |
|||
|
|||
// Load swizzle pattern data
|
|||
case PICA_REG_INDEX_WORKAROUND(vs_swizzle_patterns.set_word[0], 0x2d6): |
|||
case PICA_REG_INDEX_WORKAROUND(vs_swizzle_patterns.set_word[1], 0x2d7): |
|||
case PICA_REG_INDEX_WORKAROUND(vs_swizzle_patterns.set_word[2], 0x2d8): |
|||
case PICA_REG_INDEX_WORKAROUND(vs_swizzle_patterns.set_word[3], 0x2d9): |
|||
case PICA_REG_INDEX_WORKAROUND(vs_swizzle_patterns.set_word[4], 0x2da): |
|||
case PICA_REG_INDEX_WORKAROUND(vs_swizzle_patterns.set_word[5], 0x2db): |
|||
case PICA_REG_INDEX_WORKAROUND(vs_swizzle_patterns.set_word[6], 0x2dc): |
|||
case PICA_REG_INDEX_WORKAROUND(vs_swizzle_patterns.set_word[7], 0x2dd): |
|||
{ |
|||
VertexShader::SubmitSwizzleDataChange(vs_swizzle_write_offset, value); |
|||
vs_swizzle_write_offset++; |
|||
break; |
|||
} |
|||
|
|||
default: |
|||
break; |
|||
} |
|||
} |
|||
|
|||
static std::ptrdiff_t ExecuteCommandBlock(const u32* first_command_word) { |
|||
const CommandHeader& header = *(const CommandHeader*)(&first_command_word[1]); |
|||
|
|||
u32* read_pointer = (u32*)first_command_word; |
|||
|
|||
// TODO: Take parameter mask into consideration!
|
|||
|
|||
WritePicaReg(header.cmd_id, *read_pointer); |
|||
read_pointer += 2; |
|||
|
|||
for (int i = 1; i < 1+header.extra_data_length; ++i) { |
|||
u32 cmd = header.cmd_id + ((header.group_commands) ? i : 0); |
|||
WritePicaReg(cmd, *read_pointer); |
|||
++read_pointer; |
|||
} |
|||
|
|||
// align read pointer to 8 bytes
|
|||
if ((first_command_word - read_pointer) % 2) |
|||
++read_pointer; |
|||
|
|||
return read_pointer - first_command_word; |
|||
} |
|||
|
|||
void ProcessCommandList(const u32* list, u32 size) { |
|||
u32* read_pointer = (u32*)list; |
|||
|
|||
while (read_pointer < list + size) { |
|||
read_pointer += ExecuteCommandBlock(read_pointer); |
|||
} |
|||
} |
|||
|
|||
} // namespace
|
|||
|
|||
} // namespace
|
|||
@ -0,0 +1,31 @@ |
|||
// Copyright 2014 Citra Emulator Project |
|||
// Licensed under GPLv2 |
|||
// Refer to the license.txt file included. |
|||
|
|||
#pragma once |
|||
|
|||
#include "common/bit_field.h" |
|||
#include "common/common_types.h" |
|||
|
|||
#include "pica.h" |
|||
|
|||
namespace Pica { |
|||
|
|||
namespace CommandProcessor { |
|||
|
|||
union CommandHeader { |
|||
u32 hex; |
|||
|
|||
BitField< 0, 16, u32> cmd_id; |
|||
BitField<16, 4, u32> parameter_mask; |
|||
BitField<20, 11, u32> extra_data_length; |
|||
BitField<31, 1, u32> group_commands; |
|||
}; |
|||
static_assert(std::is_standard_layout<CommandHeader>::value == true, "CommandHeader does not use standard layout"); |
|||
static_assert(sizeof(CommandHeader) == sizeof(u32), "CommandHeader has incorrect size!"); |
|||
|
|||
void ProcessCommandList(const u32* list, u32 size); |
|||
|
|||
} // namespace |
|||
|
|||
} // namespace |
|||
@ -0,0 +1,578 @@ |
|||
// Licensed under GPLv2 |
|||
// Refer to the license.txt file included. |
|||
|
|||
|
|||
// Copyright 2014 Tony Wasserka |
|||
// All rights reserved. |
|||
// |
|||
// Redistribution and use in source and binary forms, with or without |
|||
// modification, are permitted provided that the following conditions are met: |
|||
// |
|||
// * Redistributions of source code must retain the above copyright |
|||
// notice, this list of conditions and the following disclaimer. |
|||
// * Redistributions in binary form must reproduce the above copyright |
|||
// notice, this list of conditions and the following disclaimer in the |
|||
// documentation and/or other materials provided with the distribution. |
|||
// * Neither the name of the owner nor the names of its contributors may |
|||
// be used to endorse or promote products derived from this software |
|||
// without specific prior written permission. |
|||
// |
|||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
|||
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
|||
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
|||
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
|||
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
|||
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
|||
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
|||
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
|||
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
|||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
|||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
|||
|
|||
#pragma once |
|||
|
|||
#include <cmath> |
|||
|
|||
namespace Math { |
|||
|
|||
template<typename T> class Vec2; |
|||
template<typename T> class Vec3; |
|||
template<typename T> class Vec4; |
|||
|
|||
|
|||
template<typename T> |
|||
class Vec2 { |
|||
public: |
|||
struct { |
|||
T x,y; |
|||
}; |
|||
|
|||
T* AsArray() { return &x; } |
|||
|
|||
Vec2() = default; |
|||
Vec2(const T a[2]) : x(a[0]), y(a[1]) {} |
|||
Vec2(const T& _x, const T& _y) : x(_x), y(_y) {} |
|||
|
|||
template<typename T2> |
|||
Vec2<T2> Cast() const { |
|||
return Vec2<T2>((T2)x, (T2)y); |
|||
} |
|||
|
|||
static Vec2 AssignToAll(const T& f) |
|||
{ |
|||
return Vec2<T>(f, f); |
|||
} |
|||
|
|||
void Write(T a[2]) |
|||
{ |
|||
a[0] = x; a[1] = y; |
|||
} |
|||
|
|||
Vec2 operator +(const Vec2& other) const |
|||
{ |
|||
return Vec2(x+other.x, y+other.y); |
|||
} |
|||
void operator += (const Vec2 &other) |
|||
{ |
|||
x+=other.x; y+=other.y; |
|||
} |
|||
Vec2 operator -(const Vec2& other) const |
|||
{ |
|||
return Vec2(x-other.x, y-other.y); |
|||
} |
|||
void operator -= (const Vec2& other) |
|||
{ |
|||
x-=other.x; y-=other.y; |
|||
} |
|||
Vec2 operator -() const |
|||
{ |
|||
return Vec2(-x,-y); |
|||
} |
|||
Vec2 operator * (const Vec2& other) const |
|||
{ |
|||
return Vec2(x*other.x, y*other.y); |
|||
} |
|||
template<typename V> |
|||
Vec2 operator * (const V& f) const |
|||
{ |
|||
return Vec2(x*f,y*f); |
|||
} |
|||
template<typename V> |
|||
void operator *= (const V& f) |
|||
{ |
|||
x*=f; y*=f; |
|||
} |
|||
template<typename V> |
|||
Vec2 operator / (const V& f) const |
|||
{ |
|||
return Vec2(x/f,y/f); |
|||
} |
|||
template<typename V> |
|||
void operator /= (const V& f) |
|||
{ |
|||
*this = *this / f; |
|||
} |
|||
|
|||
T Length2() const |
|||
{ |
|||
return x*x + y*y; |
|||
} |
|||
|
|||
// Only implemented for T=float |
|||
float Length() const; |
|||
void SetLength(const float l); |
|||
Vec2 WithLength(const float l) const; |
|||
float Distance2To(Vec2 &other); |
|||
Vec2 Normalized() const; |
|||
float Normalize(); // returns the previous length, which is often useful |
|||
|
|||
T& operator [] (int i) //allow vector[1] = 3 (vector.y=3) |
|||
{ |
|||
return *((&x) + i); |
|||
} |
|||
T operator [] (const int i) const |
|||
{ |
|||
return *((&x) + i); |
|||
} |
|||
|
|||
void SetZero() |
|||
{ |
|||
x=0; y=0; |
|||
} |
|||
|
|||
// Common aliases: UV (texel coordinates), ST (texture coordinates) |
|||
T& u() { return x; } |
|||
T& v() { return y; } |
|||
T& s() { return x; } |
|||
T& t() { return y; } |
|||
|
|||
const T& u() const { return x; } |
|||
const T& v() const { return y; } |
|||
const T& s() const { return x; } |
|||
const T& t() const { return y; } |
|||
|
|||
// swizzlers - create a subvector of specific components |
|||
Vec2 yx() const { return Vec2(y, x); } |
|||
Vec2 vu() const { return Vec2(y, x); } |
|||
Vec2 ts() const { return Vec2(y, x); } |
|||
|
|||
// Inserters to add new elements to effectively create larger vectors containing this Vec2 |
|||
Vec3<T> InsertBeforeX(const T& value) { |
|||
return Vec3<T>(value, x, y); |
|||
} |
|||
Vec3<T> InsertBeforeY(const T& value) { |
|||
return Vec3<T>(x, value, y); |
|||
} |
|||
Vec3<T> Append(const T& value) { |
|||
return Vec3<T>(x, y, value); |
|||
} |
|||
}; |
|||
|
|||
template<typename T, typename V> |
|||
Vec2<T> operator * (const V& f, const Vec2<T>& vec) |
|||
{ |
|||
return Vec2<T>(f*vec.x,f*vec.y); |
|||
} |
|||
|
|||
typedef Vec2<float> Vec2f; |
|||
|
|||
template<typename T> |
|||
class Vec3 |
|||
{ |
|||
public: |
|||
struct |
|||
{ |
|||
T x,y,z; |
|||
}; |
|||
|
|||
T* AsArray() { return &x; } |
|||
|
|||
Vec3() = default; |
|||
Vec3(const T a[3]) : x(a[0]), y(a[1]), z(a[2]) {} |
|||
Vec3(const T& _x, const T& _y, const T& _z) : x(_x), y(_y), z(_z) {} |
|||
|
|||
template<typename T2> |
|||
Vec3<T2> Cast() const { |
|||
return Vec3<T2>((T2)x, (T2)y, (T2)z); |
|||
} |
|||
|
|||
// Only implemented for T=int and T=float |
|||
static Vec3 FromRGB(unsigned int rgb); |
|||
unsigned int ToRGB() const; // alpha bits set to zero |
|||
|
|||
static Vec3 AssignToAll(const T& f) |
|||
{ |
|||
return Vec3<T>(f, f, f); |
|||
} |
|||
|
|||
void Write(T a[3]) |
|||
{ |
|||
a[0] = x; a[1] = y; a[2] = z; |
|||
} |
|||
|
|||
Vec3 operator +(const Vec3 &other) const |
|||
{ |
|||
return Vec3(x+other.x, y+other.y, z+other.z); |
|||
} |
|||
void operator += (const Vec3 &other) |
|||
{ |
|||
x+=other.x; y+=other.y; z+=other.z; |
|||
} |
|||
Vec3 operator -(const Vec3 &other) const |
|||
{ |
|||
return Vec3(x-other.x, y-other.y, z-other.z); |
|||
} |
|||
void operator -= (const Vec3 &other) |
|||
{ |
|||
x-=other.x; y-=other.y; z-=other.z; |
|||
} |
|||
Vec3 operator -() const |
|||
{ |
|||
return Vec3(-x,-y,-z); |
|||
} |
|||
Vec3 operator * (const Vec3 &other) const |
|||
{ |
|||
return Vec3(x*other.x, y*other.y, z*other.z); |
|||
} |
|||
template<typename V> |
|||
Vec3 operator * (const V& f) const |
|||
{ |
|||
return Vec3(x*f,y*f,z*f); |
|||
} |
|||
template<typename V> |
|||
void operator *= (const V& f) |
|||
{ |
|||
x*=f; y*=f; z*=f; |
|||
} |
|||
template<typename V> |
|||
Vec3 operator / (const V& f) const |
|||
{ |
|||
return Vec3(x/f,y/f,z/f); |
|||
} |
|||
template<typename V> |
|||
void operator /= (const V& f) |
|||
{ |
|||
*this = *this / f; |
|||
} |
|||
|
|||
T Length2() const |
|||
{ |
|||
return x*x + y*y + z*z; |
|||
} |
|||
|
|||
// Only implemented for T=float |
|||
float Length() const; |
|||
void SetLength(const float l); |
|||
Vec3 WithLength(const float l) const; |
|||
float Distance2To(Vec3 &other); |
|||
Vec3 Normalized() const; |
|||
float Normalize(); // returns the previous length, which is often useful |
|||
|
|||
T& operator [] (int i) //allow vector[2] = 3 (vector.z=3) |
|||
{ |
|||
return *((&x) + i); |
|||
} |
|||
T operator [] (const int i) const |
|||
{ |
|||
return *((&x) + i); |
|||
} |
|||
|
|||
void SetZero() |
|||
{ |
|||
x=0; y=0; z=0; |
|||
} |
|||
|
|||
// Common aliases: UVW (texel coordinates), RGB (colors), STQ (texture coordinates) |
|||
T& u() { return x; } |
|||
T& v() { return y; } |
|||
T& w() { return z; } |
|||
|
|||
T& r() { return x; } |
|||
T& g() { return y; } |
|||
T& b() { return z; } |
|||
|
|||
T& s() { return x; } |
|||
T& t() { return y; } |
|||
T& q() { return z; } |
|||
|
|||
const T& u() const { return x; } |
|||
const T& v() const { return y; } |
|||
const T& w() const { return z; } |
|||
|
|||
const T& r() const { return x; } |
|||
const T& g() const { return y; } |
|||
const T& b() const { return z; } |
|||
|
|||
const T& s() const { return x; } |
|||
const T& t() const { return y; } |
|||
const T& q() const { return z; } |
|||
|
|||
// swizzlers - create a subvector of specific components |
|||
// e.g. Vec2 uv() { return Vec2(x,y); } |
|||
// _DEFINE_SWIZZLER2 defines a single such function, DEFINE_SWIZZLER2 defines all of them for all component names (x<->r) and permutations (xy<->yx) |
|||
#define _DEFINE_SWIZZLER2(a, b, name) Vec2<T> name() const { return Vec2<T>(a, b); } |
|||
#define DEFINE_SWIZZLER2(a, b, a2, b2, a3, b3, a4, b4) \ |
|||
_DEFINE_SWIZZLER2(a, b, a##b); \ |
|||
_DEFINE_SWIZZLER2(a, b, a2##b2); \ |
|||
_DEFINE_SWIZZLER2(a, b, a3##b3); \ |
|||
_DEFINE_SWIZZLER2(a, b, a4##b4); \ |
|||
_DEFINE_SWIZZLER2(b, a, b##a); \ |
|||
_DEFINE_SWIZZLER2(b, a, b2##a2); \ |
|||
_DEFINE_SWIZZLER2(b, a, b3##a3); \ |
|||
_DEFINE_SWIZZLER2(b, a, b4##a4); |
|||
|
|||
DEFINE_SWIZZLER2(x, y, r, g, u, v, s, t); |
|||
DEFINE_SWIZZLER2(x, z, r, b, u, w, s, q); |
|||
DEFINE_SWIZZLER2(y, z, g, b, v, w, t, q); |
|||
#undef DEFINE_SWIZZLER2 |
|||
#undef _DEFINE_SWIZZLER2 |
|||
|
|||
// Inserters to add new elements to effectively create larger vectors containing this Vec2 |
|||
Vec4<T> InsertBeforeX(const T& value) { |
|||
return Vec4<T>(value, x, y, z); |
|||
} |
|||
Vec4<T> InsertBeforeY(const T& value) { |
|||
return Vec4<T>(x, value, y, z); |
|||
} |
|||
Vec4<T> InsertBeforeZ(const T& value) { |
|||
return Vec4<T>(x, y, value, z); |
|||
} |
|||
Vec4<T> Append(const T& value) { |
|||
return Vec4<T>(x, y, z, value); |
|||
} |
|||
}; |
|||
|
|||
template<typename T, typename V> |
|||
Vec3<T> operator * (const V& f, const Vec3<T>& vec) |
|||
{ |
|||
return Vec3<T>(f*vec.x,f*vec.y,f*vec.z); |
|||
} |
|||
|
|||
typedef Vec3<float> Vec3f; |
|||
|
|||
template<typename T> |
|||
class Vec4 |
|||
{ |
|||
public: |
|||
struct |
|||
{ |
|||
T x,y,z,w; |
|||
}; |
|||
|
|||
T* AsArray() { return &x; } |
|||
|
|||
Vec4() = default; |
|||
Vec4(const T a[4]) : x(a[0]), y(a[1]), z(a[2]), w(a[3]) {} |
|||
Vec4(const T& _x, const T& _y, const T& _z, const T& _w) : x(_x), y(_y), z(_z), w(_w) {} |
|||
|
|||
template<typename T2> |
|||
Vec4<T2> Cast() const { |
|||
return Vec4<T2>((T2)x, (T2)y, (T2)z, (T2)w); |
|||
} |
|||
|
|||
// Only implemented for T=int and T=float |
|||
static Vec4 FromRGBA(unsigned int rgba); |
|||
unsigned int ToRGBA() const; |
|||
|
|||
static Vec4 AssignToAll(const T& f) { |
|||
return Vec4<T>(f, f, f, f); |
|||
} |
|||
|
|||
void Write(T a[4]) |
|||
{ |
|||
a[0] = x; a[1] = y; a[2] = z; a[3] = w; |
|||
} |
|||
|
|||
Vec4 operator +(const Vec4& other) const |
|||
{ |
|||
return Vec4(x+other.x, y+other.y, z+other.z, w+other.w); |
|||
} |
|||
void operator += (const Vec4& other) |
|||
{ |
|||
x+=other.x; y+=other.y; z+=other.z; w+=other.w; |
|||
} |
|||
Vec4 operator -(const Vec4 &other) const |
|||
{ |
|||
return Vec4(x-other.x, y-other.y, z-other.z, w-other.w); |
|||
} |
|||
void operator -= (const Vec4 &other) |
|||
{ |
|||
x-=other.x; y-=other.y; z-=other.z; w-=other.w; |
|||
} |
|||
Vec4 operator -() const |
|||
{ |
|||
return Vec4(-x,-y,-z,-w); |
|||
} |
|||
Vec4 operator * (const Vec4 &other) const |
|||
{ |
|||
return Vec4(x*other.x, y*other.y, z*other.z, w*other.w); |
|||
} |
|||
template<typename V> |
|||
Vec4 operator * (const V& f) const |
|||
{ |
|||
return Vec4(x*f,y*f,z*f,w*f); |
|||
} |
|||
template<typename V> |
|||
void operator *= (const V& f) |
|||
{ |
|||
x*=f; y*=f; z*=f; w*=f; |
|||
} |
|||
template<typename V> |
|||
Vec4 operator / (const V& f) const |
|||
{ |
|||
return Vec4(x/f,y/f,z/f,w/f); |
|||
} |
|||
template<typename V> |
|||
void operator /= (const V& f) |
|||
{ |
|||
*this = *this / f; |
|||
} |
|||
|
|||
T Length2() const |
|||
{ |
|||
return x*x + y*y + z*z + w*w; |
|||
} |
|||
|
|||
// Only implemented for T=float |
|||
float Length() const; |
|||
void SetLength(const float l); |
|||
Vec4 WithLength(const float l) const; |
|||
float Distance2To(Vec4 &other); |
|||
Vec4 Normalized() const; |
|||
float Normalize(); // returns the previous length, which is often useful |
|||
|
|||
T& operator [] (int i) //allow vector[2] = 3 (vector.z=3) |
|||
{ |
|||
return *((&x) + i); |
|||
} |
|||
T operator [] (const int i) const |
|||
{ |
|||
return *((&x) + i); |
|||
} |
|||
|
|||
void SetZero() |
|||
{ |
|||
x=0; y=0; z=0; |
|||
} |
|||
|
|||
// Common alias: RGBA (colors) |
|||
T& r() { return x; } |
|||
T& g() { return y; } |
|||
T& b() { return z; } |
|||
T& a() { return w; } |
|||
|
|||
const T& r() const { return x; } |
|||
const T& g() const { return y; } |
|||
const T& b() const { return z; } |
|||
const T& a() const { return w; } |
|||
|
|||
// swizzlers - create a subvector of specific components |
|||
// e.g. Vec2 uv() { return Vec2(x,y); } |
|||
// _DEFINE_SWIZZLER2 defines a single such function, DEFINE_SWIZZLER2 defines all of them for all component names (x<->r) and permutations (xy<->yx) |
|||
#define _DEFINE_SWIZZLER2(a, b, name) Vec2<T> name() const { return Vec2<T>(a, b); } |
|||
#define DEFINE_SWIZZLER2(a, b, a2, b2) \ |
|||
_DEFINE_SWIZZLER2(a, b, a##b); \ |
|||
_DEFINE_SWIZZLER2(a, b, a2##b2); \ |
|||
_DEFINE_SWIZZLER2(b, a, b##a); \ |
|||
_DEFINE_SWIZZLER2(b, a, b2##a2); |
|||
|
|||
DEFINE_SWIZZLER2(x, y, r, g); |
|||
DEFINE_SWIZZLER2(x, z, r, b); |
|||
DEFINE_SWIZZLER2(x, w, r, a); |
|||
DEFINE_SWIZZLER2(y, z, g, b); |
|||
DEFINE_SWIZZLER2(y, w, g, a); |
|||
DEFINE_SWIZZLER2(z, w, b, a); |
|||
#undef DEFINE_SWIZZLER2 |
|||
#undef _DEFINE_SWIZZLER2 |
|||
|
|||
#define _DEFINE_SWIZZLER3(a, b, c, name) Vec3<T> name() const { return Vec3<T>(a, b, c); } |
|||
#define DEFINE_SWIZZLER3(a, b, c, a2, b2, c2) \ |
|||
_DEFINE_SWIZZLER3(a, b, c, a##b##c); \ |
|||
_DEFINE_SWIZZLER3(a, c, b, a##c##b); \ |
|||
_DEFINE_SWIZZLER3(b, a, c, b##a##c); \ |
|||
_DEFINE_SWIZZLER3(b, c, a, b##c##a); \ |
|||
_DEFINE_SWIZZLER3(c, a, b, c##a##b); \ |
|||
_DEFINE_SWIZZLER3(c, b, a, c##b##a); \ |
|||
_DEFINE_SWIZZLER3(a, b, c, a2##b2##c2); \ |
|||
_DEFINE_SWIZZLER3(a, c, b, a2##c2##b2); \ |
|||
_DEFINE_SWIZZLER3(b, a, c, b2##a2##c2); \ |
|||
_DEFINE_SWIZZLER3(b, c, a, b2##c2##a2); \ |
|||
_DEFINE_SWIZZLER3(c, a, b, c2##a2##b2); \ |
|||
_DEFINE_SWIZZLER3(c, b, a, c2##b2##a2); |
|||
|
|||
DEFINE_SWIZZLER3(x, y, z, r, g, b); |
|||
DEFINE_SWIZZLER3(x, y, w, r, g, a); |
|||
DEFINE_SWIZZLER3(x, z, w, r, b, a); |
|||
DEFINE_SWIZZLER3(y, z, w, g, b, a); |
|||
#undef DEFINE_SWIZZLER3 |
|||
#undef _DEFINE_SWIZZLER3 |
|||
}; |
|||
|
|||
|
|||
template<typename T, typename V> |
|||
Vec4<T> operator * (const V& f, const Vec4<T>& vec) |
|||
{ |
|||
return Vec4<T>(f*vec.x,f*vec.y,f*vec.z,f*vec.w); |
|||
} |
|||
|
|||
typedef Vec4<float> Vec4f; |
|||
|
|||
|
|||
template<typename T> |
|||
static inline T Dot(const Vec2<T>& a, const Vec2<T>& b) |
|||
{ |
|||
return a.x*b.x + a.y*b.y; |
|||
} |
|||
|
|||
template<typename T> |
|||
static inline T Dot(const Vec3<T>& a, const Vec3<T>& b) |
|||
{ |
|||
return a.x*b.x + a.y*b.y + a.z*b.z; |
|||
} |
|||
|
|||
template<typename T> |
|||
static inline T Dot(const Vec4<T>& a, const Vec4<T>& b) |
|||
{ |
|||
return a.x*b.x + a.y*b.y + a.z*b.z + a.w*b.w; |
|||
} |
|||
|
|||
template<typename T> |
|||
static inline Vec3<T> Cross(const Vec3<T>& a, const Vec3<T>& b) |
|||
{ |
|||
return Vec3<T>(a.y*b.z-a.z*b.y, a.z*b.x-a.x*b.z, a.x*b.y-a.y*b.x); |
|||
} |
|||
|
|||
// linear interpolation via float: 0.0=begin, 1.0=end |
|||
template<typename X> |
|||
static inline X Lerp(const X& begin, const X& end, const float t) |
|||
{ |
|||
return begin*(1.f-t) + end*t; |
|||
} |
|||
|
|||
// linear interpolation via int: 0=begin, base=end |
|||
template<typename X, int base> |
|||
static inline X LerpInt(const X& begin, const X& end, const int t) |
|||
{ |
|||
return (begin*(base-t) + end*t) / base; |
|||
} |
|||
|
|||
// Utility vector factories |
|||
template<typename T> |
|||
static inline Vec2<T> MakeVec2(const T& x, const T& y) |
|||
{ |
|||
return Vec2<T>{x, y}; |
|||
} |
|||
|
|||
template<typename T> |
|||
static inline Vec3<T> MakeVec3(const T& x, const T& y, const T& z) |
|||
{ |
|||
return Vec3<T>{x, y, z}; |
|||
} |
|||
|
|||
template<typename T> |
|||
static inline Vec4<T> MakeVec4(const T& x, const T& y, const T& z, const T& w) |
|||
{ |
|||
return Vec4<T>{x, y, z, w}; |
|||
} |
|||
|
|||
} // namespace |
|||
@ -0,0 +1,51 @@ |
|||
// Copyright 2014 Citra Emulator Project
|
|||
// Licensed under GPLv2
|
|||
// Refer to the license.txt file included.
|
|||
|
|||
#include "clipper.h"
|
|||
#include "pica.h"
|
|||
#include "primitive_assembly.h"
|
|||
#include "vertex_shader.h"
|
|||
|
|||
namespace Pica { |
|||
|
|||
namespace PrimitiveAssembly { |
|||
|
|||
static OutputVertex buffer[2]; |
|||
static int buffer_index = 0; // TODO: reset this on emulation restart
|
|||
|
|||
void SubmitVertex(OutputVertex& vtx) |
|||
{ |
|||
switch (registers.triangle_topology) { |
|||
case Regs::TriangleTopology::List: |
|||
case Regs::TriangleTopology::ListIndexed: |
|||
if (buffer_index < 2) { |
|||
buffer[buffer_index++] = vtx; |
|||
} else { |
|||
buffer_index = 0; |
|||
|
|||
Clipper::ProcessTriangle(buffer[0], buffer[1], vtx); |
|||
} |
|||
break; |
|||
|
|||
case Regs::TriangleTopology::Fan: |
|||
if (buffer_index == 2) { |
|||
buffer_index = 0; |
|||
|
|||
Clipper::ProcessTriangle(buffer[0], buffer[1], vtx); |
|||
|
|||
buffer[1] = vtx; |
|||
} else { |
|||
buffer[buffer_index++] = vtx; |
|||
} |
|||
break; |
|||
|
|||
default: |
|||
ERROR_LOG(GPU, "Unknown triangle mode %x:", (int)registers.triangle_topology.Value()); |
|||
break; |
|||
} |
|||
} |
|||
|
|||
} // namespace
|
|||
|
|||
} // namespace
|
|||
@ -0,0 +1,21 @@ |
|||
// Copyright 2014 Citra Emulator Project |
|||
// Licensed under GPLv2 |
|||
// Refer to the license.txt file included. |
|||
|
|||
#pragma once |
|||
|
|||
namespace Pica { |
|||
|
|||
namespace VertexShader { |
|||
struct OutputVertex; |
|||
} |
|||
|
|||
namespace PrimitiveAssembly { |
|||
|
|||
using VertexShader::OutputVertex; |
|||
|
|||
void SubmitVertex(OutputVertex& vtx); |
|||
|
|||
} // namespace |
|||
|
|||
} // namespace |
|||
@ -0,0 +1,180 @@ |
|||
// Copyright 2014 Citra Emulator Project
|
|||
// Licensed under GPLv2
|
|||
// Refer to the license.txt file included.
|
|||
|
|||
#include <algorithm>
|
|||
|
|||
#include "common/common_types.h"
|
|||
|
|||
#include "math.h"
|
|||
#include "pica.h"
|
|||
#include "rasterizer.h"
|
|||
#include "vertex_shader.h"
|
|||
|
|||
namespace Pica { |
|||
|
|||
namespace Rasterizer { |
|||
|
|||
static void DrawPixel(int x, int y, const Math::Vec4<u8>& color) { |
|||
u32* color_buffer = (u32*)Memory::GetPointer(registers.framebuffer.GetColorBufferAddress()); |
|||
u32 value = (color.a() << 24) | (color.r() << 16) | (color.g() << 8) | color.b(); |
|||
|
|||
// Assuming RGBA8 format until actual framebuffer format handling is implemented
|
|||
*(color_buffer + x + y * registers.framebuffer.GetWidth() / 2) = value; |
|||
} |
|||
|
|||
static u32 GetDepth(int x, int y) { |
|||
u16* depth_buffer = (u16*)Memory::GetPointer(registers.framebuffer.GetDepthBufferAddress()); |
|||
|
|||
// Assuming 16-bit depth buffer format until actual format handling is implemented
|
|||
return *(depth_buffer + x + y * registers.framebuffer.GetWidth() / 2); |
|||
} |
|||
|
|||
static void SetDepth(int x, int y, u16 value) { |
|||
u16* depth_buffer = (u16*)Memory::GetPointer(registers.framebuffer.GetDepthBufferAddress()); |
|||
|
|||
// Assuming 16-bit depth buffer format until actual format handling is implemented
|
|||
*(depth_buffer + x + y * registers.framebuffer.GetWidth() / 2) = value; |
|||
} |
|||
|
|||
void ProcessTriangle(const VertexShader::OutputVertex& v0, |
|||
const VertexShader::OutputVertex& v1, |
|||
const VertexShader::OutputVertex& v2) |
|||
{ |
|||
// NOTE: Assuming that rasterizer coordinates are 12.4 fixed-point values
|
|||
struct Fix12P4 { |
|||
Fix12P4() {} |
|||
Fix12P4(u16 val) : val(val) {} |
|||
|
|||
static u16 FracMask() { return 0xF; } |
|||
static u16 IntMask() { return (u16)~0xF; } |
|||
|
|||
operator u16() const { |
|||
return val; |
|||
} |
|||
|
|||
bool operator < (const Fix12P4& oth) const { |
|||
return (u16)*this < (u16)oth; |
|||
} |
|||
|
|||
private: |
|||
u16 val; |
|||
}; |
|||
|
|||
// vertex positions in rasterizer coordinates
|
|||
auto FloatToFix = [](float24 flt) { |
|||
return Fix12P4(flt.ToFloat32() * 16.0f); |
|||
}; |
|||
auto ScreenToRasterizerCoordinates = [FloatToFix](const Math::Vec3<float24> vec) { |
|||
return Math::Vec3<Fix12P4>{FloatToFix(vec.x), FloatToFix(vec.y), FloatToFix(vec.z)}; |
|||
}; |
|||
Math::Vec3<Fix12P4> vtxpos[3]{ ScreenToRasterizerCoordinates(v0.screenpos), |
|||
ScreenToRasterizerCoordinates(v1.screenpos), |
|||
ScreenToRasterizerCoordinates(v2.screenpos) }; |
|||
|
|||
// TODO: Proper scissor rect test!
|
|||
u16 min_x = std::min({vtxpos[0].x, vtxpos[1].x, vtxpos[2].x}); |
|||
u16 min_y = std::min({vtxpos[0].y, vtxpos[1].y, vtxpos[2].y}); |
|||
u16 max_x = std::max({vtxpos[0].x, vtxpos[1].x, vtxpos[2].x}); |
|||
u16 max_y = std::max({vtxpos[0].y, vtxpos[1].y, vtxpos[2].y}); |
|||
|
|||
min_x = min_x & Fix12P4::IntMask(); |
|||
min_y = min_y & Fix12P4::IntMask(); |
|||
max_x = (max_x + Fix12P4::FracMask()) & Fix12P4::IntMask(); |
|||
max_y = (max_y + Fix12P4::FracMask()) & Fix12P4::IntMask(); |
|||
|
|||
// Triangle filling rules: Pixels on the right-sided edge or on flat bottom edges are not
|
|||
// drawn. Pixels on any other triangle border are drawn. This is implemented with three bias
|
|||
// values which are added to the barycentric coordinates w0, w1 and w2, respectively.
|
|||
// NOTE: These are the PSP filling rules. Not sure if the 3DS uses the same ones...
|
|||
auto IsRightSideOrFlatBottomEdge = [](const Math::Vec2<Fix12P4>& vtx, |
|||
const Math::Vec2<Fix12P4>& line1, |
|||
const Math::Vec2<Fix12P4>& line2) |
|||
{ |
|||
if (line1.y == line2.y) { |
|||
// just check if vertex is above us => bottom line parallel to x-axis
|
|||
return vtx.y < line1.y; |
|||
} else { |
|||
// check if vertex is on our left => right side
|
|||
// TODO: Not sure how likely this is to overflow
|
|||
return (int)vtx.x < (int)line1.x + ((int)line2.x - (int)line1.x) * ((int)vtx.y - (int)line1.y) / ((int)line2.y - (int)line1.y); |
|||
} |
|||
}; |
|||
int bias0 = IsRightSideOrFlatBottomEdge(vtxpos[0].xy(), vtxpos[1].xy(), vtxpos[2].xy()) ? -1 : 0; |
|||
int bias1 = IsRightSideOrFlatBottomEdge(vtxpos[1].xy(), vtxpos[2].xy(), vtxpos[0].xy()) ? -1 : 0; |
|||
int bias2 = IsRightSideOrFlatBottomEdge(vtxpos[2].xy(), vtxpos[0].xy(), vtxpos[1].xy()) ? -1 : 0; |
|||
|
|||
// TODO: Not sure if looping through x first might be faster
|
|||
for (u16 y = min_y; y < max_y; y += 0x10) { |
|||
for (u16 x = min_x; x < max_x; x += 0x10) { |
|||
|
|||
// Calculate the barycentric coordinates w0, w1 and w2
|
|||
auto orient2d = [](const Math::Vec2<Fix12P4>& vtx1, |
|||
const Math::Vec2<Fix12P4>& vtx2, |
|||
const Math::Vec2<Fix12P4>& vtx3) { |
|||
const auto vec1 = (vtx2.Cast<int>() - vtx1.Cast<int>()).Append(0); |
|||
const auto vec2 = (vtx3.Cast<int>() - vtx1.Cast<int>()).Append(0); |
|||
// TODO: There is a very small chance this will overflow for sizeof(int) == 4
|
|||
return Cross(vec1, vec2).z; |
|||
}; |
|||
|
|||
int w0 = bias0 + orient2d(vtxpos[1].xy(), vtxpos[2].xy(), {x, y}); |
|||
int w1 = bias1 + orient2d(vtxpos[2].xy(), vtxpos[0].xy(), {x, y}); |
|||
int w2 = bias2 + orient2d(vtxpos[0].xy(), vtxpos[1].xy(), {x, y}); |
|||
int wsum = w0 + w1 + w2; |
|||
|
|||
// If current pixel is not covered by the current primitive
|
|||
if (w0 < 0 || w1 < 0 || w2 < 0) |
|||
continue; |
|||
|
|||
// Perspective correct attribute interpolation:
|
|||
// Attribute values cannot be calculated by simple linear interpolation since
|
|||
// they are not linear in screen space. For example, when interpolating a
|
|||
// texture coordinate across two vertices, something simple like
|
|||
// u = (u0*w0 + u1*w1)/(w0+w1)
|
|||
// will not work. However, the attribute value divided by the
|
|||
// clipspace w-coordinate (u/w) and and the inverse w-coordinate (1/w) are linear
|
|||
// in screenspace. Hence, we can linearly interpolate these two independently and
|
|||
// calculate the interpolated attribute by dividing the results.
|
|||
// I.e.
|
|||
// u_over_w = ((u0/v0.pos.w)*w0 + (u1/v1.pos.w)*w1)/(w0+w1)
|
|||
// one_over_w = (( 1/v0.pos.w)*w0 + ( 1/v1.pos.w)*w1)/(w0+w1)
|
|||
// u = u_over_w / one_over_w
|
|||
//
|
|||
// The generalization to three vertices is straightforward in baricentric coordinates.
|
|||
auto GetInterpolatedAttribute = [&](float24 attr0, float24 attr1, float24 attr2) { |
|||
auto attr_over_w = Math::MakeVec3(attr0 / v0.pos.w, |
|||
attr1 / v1.pos.w, |
|||
attr2 / v2.pos.w); |
|||
auto w_inverse = Math::MakeVec3(float24::FromFloat32(1.f) / v0.pos.w, |
|||
float24::FromFloat32(1.f) / v1.pos.w, |
|||
float24::FromFloat32(1.f) / v2.pos.w); |
|||
auto baricentric_coordinates = Math::MakeVec3(float24::FromFloat32(w0), |
|||
float24::FromFloat32(w1), |
|||
float24::FromFloat32(w2)); |
|||
|
|||
float24 interpolated_attr_over_w = Math::Dot(attr_over_w, baricentric_coordinates); |
|||
float24 interpolated_w_inverse = Math::Dot(w_inverse, baricentric_coordinates); |
|||
return interpolated_attr_over_w / interpolated_w_inverse; |
|||
}; |
|||
|
|||
Math::Vec4<u8> primary_color{ |
|||
(u8)(GetInterpolatedAttribute(v0.color.r(), v1.color.r(), v2.color.r()).ToFloat32() * 255), |
|||
(u8)(GetInterpolatedAttribute(v0.color.g(), v1.color.g(), v2.color.g()).ToFloat32() * 255), |
|||
(u8)(GetInterpolatedAttribute(v0.color.b(), v1.color.b(), v2.color.b()).ToFloat32() * 255), |
|||
(u8)(GetInterpolatedAttribute(v0.color.a(), v1.color.a(), v2.color.a()).ToFloat32() * 255) |
|||
}; |
|||
|
|||
u16 z = (u16)(((float)v0.screenpos[2].ToFloat32() * w0 + |
|||
(float)v1.screenpos[2].ToFloat32() * w1 + |
|||
(float)v2.screenpos[2].ToFloat32() * w2) * 65535.f / wsum); // TODO: Shouldn't need to multiply by 65536?
|
|||
SetDepth(x >> 4, y >> 4, z); |
|||
|
|||
DrawPixel(x >> 4, y >> 4, primary_color); |
|||
} |
|||
} |
|||
} |
|||
|
|||
} // namespace Rasterizer
|
|||
|
|||
} // namespace Pica
|
|||
@ -0,0 +1,21 @@ |
|||
// Copyright 2014 Citra Emulator Project |
|||
// Licensed under GPLv2 |
|||
// Refer to the license.txt file included. |
|||
|
|||
#pragma once |
|||
|
|||
namespace Pica { |
|||
|
|||
namespace VertexShader { |
|||
struct OutputVertex; |
|||
} |
|||
|
|||
namespace Rasterizer { |
|||
|
|||
void ProcessTriangle(const VertexShader::OutputVertex& v0, |
|||
const VertexShader::OutputVertex& v1, |
|||
const VertexShader::OutputVertex& v2); |
|||
|
|||
} // namespace Rasterizer |
|||
|
|||
} // namespace Pica |
|||
@ -0,0 +1,270 @@ |
|||
// Copyright 2014 Citra Emulator Project
|
|||
// Licensed under GPLv2
|
|||
// Refer to the license.txt file included.
|
|||
|
|||
#include "pica.h"
|
|||
#include "vertex_shader.h"
|
|||
#include <core/mem_map.h>
|
|||
#include <common/file_util.h>
|
|||
|
|||
namespace Pica { |
|||
|
|||
namespace VertexShader { |
|||
|
|||
static struct { |
|||
Math::Vec4<float24> f[96]; |
|||
} shader_uniforms; |
|||
|
|||
|
|||
// TODO: Not sure where the shader binary and swizzle patterns are supposed to be loaded to!
|
|||
// For now, we just keep these local arrays around.
|
|||
static u32 shader_memory[1024]; |
|||
static u32 swizzle_data[1024]; |
|||
|
|||
void SubmitShaderMemoryChange(u32 addr, u32 value) |
|||
{ |
|||
shader_memory[addr] = value; |
|||
} |
|||
|
|||
void SubmitSwizzleDataChange(u32 addr, u32 value) |
|||
{ |
|||
swizzle_data[addr] = value; |
|||
} |
|||
|
|||
Math::Vec4<float24>& GetFloatUniform(u32 index) |
|||
{ |
|||
return shader_uniforms.f[index]; |
|||
} |
|||
|
|||
struct VertexShaderState { |
|||
u32* program_counter; |
|||
|
|||
const float24* input_register_table[16]; |
|||
float24* output_register_table[7*4]; |
|||
|
|||
Math::Vec4<float24> temporary_registers[16]; |
|||
bool status_registers[2]; |
|||
|
|||
enum { |
|||
INVALID_ADDRESS = 0xFFFFFFFF |
|||
}; |
|||
u32 call_stack[8]; // TODO: What is the maximal call stack depth?
|
|||
u32* call_stack_pointer; |
|||
}; |
|||
|
|||
static void ProcessShaderCode(VertexShaderState& state) { |
|||
while (true) { |
|||
bool increment_pc = true; |
|||
bool exit_loop = false; |
|||
const Instruction& instr = *(const Instruction*)state.program_counter; |
|||
|
|||
const float24* src1_ = (instr.common.src1 < 0x10) ? state.input_register_table[instr.common.src1] |
|||
: (instr.common.src1 < 0x20) ? &state.temporary_registers[instr.common.src1-0x10].x |
|||
: (instr.common.src1 < 0x80) ? &shader_uniforms.f[instr.common.src1-0x20].x |
|||
: nullptr; |
|||
const float24* src2_ = (instr.common.src2 < 0x10) ? state.input_register_table[instr.common.src2] |
|||
: &state.temporary_registers[instr.common.src2-0x10].x; |
|||
// TODO: Unsure about the limit values
|
|||
float24* dest = (instr.common.dest <= 0x1C) ? state.output_register_table[instr.common.dest] |
|||
: (instr.common.dest <= 0x3C) ? nullptr |
|||
: (instr.common.dest <= 0x7C) ? &state.temporary_registers[(instr.common.dest-0x40)/4][instr.common.dest%4] |
|||
: nullptr; |
|||
|
|||
const SwizzlePattern& swizzle = *(SwizzlePattern*)&swizzle_data[instr.common.operand_desc_id]; |
|||
|
|||
const float24 src1[4] = { |
|||
src1_[(int)swizzle.GetSelectorSrc1(0)], |
|||
src1_[(int)swizzle.GetSelectorSrc1(1)], |
|||
src1_[(int)swizzle.GetSelectorSrc1(2)], |
|||
src1_[(int)swizzle.GetSelectorSrc1(3)], |
|||
}; |
|||
const float24 src2[4] = { |
|||
src2_[(int)swizzle.GetSelectorSrc2(0)], |
|||
src2_[(int)swizzle.GetSelectorSrc2(1)], |
|||
src2_[(int)swizzle.GetSelectorSrc2(2)], |
|||
src2_[(int)swizzle.GetSelectorSrc2(3)], |
|||
}; |
|||
|
|||
switch (instr.opcode) { |
|||
case Instruction::OpCode::ADD: |
|||
{ |
|||
for (int i = 0; i < 4; ++i) { |
|||
if (!swizzle.DestComponentEnabled(i)) |
|||
continue; |
|||
|
|||
dest[i] = src1[i] + src2[i]; |
|||
} |
|||
|
|||
break; |
|||
} |
|||
|
|||
case Instruction::OpCode::MUL: |
|||
{ |
|||
for (int i = 0; i < 4; ++i) { |
|||
if (!swizzle.DestComponentEnabled(i)) |
|||
continue; |
|||
|
|||
dest[i] = src1[i] * src2[i]; |
|||
} |
|||
|
|||
break; |
|||
} |
|||
|
|||
case Instruction::OpCode::DP3: |
|||
case Instruction::OpCode::DP4: |
|||
{ |
|||
float24 dot = float24::FromFloat32(0.f); |
|||
int num_components = (instr.opcode == Instruction::OpCode::DP3) ? 3 : 4; |
|||
for (int i = 0; i < num_components; ++i) |
|||
dot = dot + src1[i] * src2[i]; |
|||
|
|||
for (int i = 0; i < num_components; ++i) { |
|||
if (!swizzle.DestComponentEnabled(i)) |
|||
continue; |
|||
|
|||
dest[i] = dot; |
|||
} |
|||
break; |
|||
} |
|||
|
|||
// Reciprocal
|
|||
case Instruction::OpCode::RCP: |
|||
{ |
|||
for (int i = 0; i < 4; ++i) { |
|||
if (!swizzle.DestComponentEnabled(i)) |
|||
continue; |
|||
|
|||
// TODO: Be stable against division by zero!
|
|||
// TODO: I think this might be wrong... we should only use one component here
|
|||
dest[i] = float24::FromFloat32(1.0 / src1[i].ToFloat32()); |
|||
} |
|||
|
|||
break; |
|||
} |
|||
|
|||
// Reciprocal Square Root
|
|||
case Instruction::OpCode::RSQ: |
|||
{ |
|||
for (int i = 0; i < 4; ++i) { |
|||
if (!swizzle.DestComponentEnabled(i)) |
|||
continue; |
|||
|
|||
// TODO: Be stable against division by zero!
|
|||
// TODO: I think this might be wrong... we should only use one component here
|
|||
dest[i] = float24::FromFloat32(1.0 / sqrt(src1[i].ToFloat32())); |
|||
} |
|||
|
|||
break; |
|||
} |
|||
|
|||
case Instruction::OpCode::MOV: |
|||
{ |
|||
for (int i = 0; i < 4; ++i) { |
|||
if (!swizzle.DestComponentEnabled(i)) |
|||
continue; |
|||
|
|||
dest[i] = src1[i]; |
|||
} |
|||
break; |
|||
} |
|||
|
|||
case Instruction::OpCode::RET: |
|||
if (*state.call_stack_pointer == VertexShaderState::INVALID_ADDRESS) { |
|||
exit_loop = true; |
|||
} else { |
|||
state.program_counter = &shader_memory[*state.call_stack_pointer--]; |
|||
*state.call_stack_pointer = VertexShaderState::INVALID_ADDRESS; |
|||
} |
|||
|
|||
break; |
|||
|
|||
case Instruction::OpCode::CALL: |
|||
increment_pc = false; |
|||
|
|||
_dbg_assert_(GPU, state.call_stack_pointer - state.call_stack < sizeof(state.call_stack)); |
|||
|
|||
*++state.call_stack_pointer = state.program_counter - shader_memory; |
|||
// TODO: Does this offset refer to the beginning of shader memory?
|
|||
state.program_counter = &shader_memory[instr.flow_control.offset_words]; |
|||
break; |
|||
|
|||
case Instruction::OpCode::FLS: |
|||
// TODO: Do whatever needs to be done here?
|
|||
break; |
|||
|
|||
default: |
|||
ERROR_LOG(GPU, "Unhandled instruction: 0x%02x (%s): 0x%08x", |
|||
(int)instr.opcode.Value(), instr.GetOpCodeName().c_str(), instr.hex); |
|||
break; |
|||
} |
|||
|
|||
if (increment_pc) |
|||
++state.program_counter; |
|||
|
|||
if (exit_loop) |
|||
break; |
|||
} |
|||
} |
|||
|
|||
OutputVertex RunShader(const InputVertex& input, int num_attributes) |
|||
{ |
|||
VertexShaderState state; |
|||
|
|||
const u32* main = &shader_memory[registers.vs_main_offset]; |
|||
state.program_counter = (u32*)main; |
|||
|
|||
// Setup input register table
|
|||
const auto& attribute_register_map = registers.vs_input_register_map; |
|||
float24 dummy_register; |
|||
std::fill(&state.input_register_table[0], &state.input_register_table[16], &dummy_register); |
|||
if(num_attributes > 0) state.input_register_table[attribute_register_map.attribute0_register] = &input.attr[0].x; |
|||
if(num_attributes > 1) state.input_register_table[attribute_register_map.attribute1_register] = &input.attr[1].x; |
|||
if(num_attributes > 2) state.input_register_table[attribute_register_map.attribute2_register] = &input.attr[2].x; |
|||
if(num_attributes > 3) state.input_register_table[attribute_register_map.attribute3_register] = &input.attr[3].x; |
|||
if(num_attributes > 4) state.input_register_table[attribute_register_map.attribute4_register] = &input.attr[4].x; |
|||
if(num_attributes > 5) state.input_register_table[attribute_register_map.attribute5_register] = &input.attr[5].x; |
|||
if(num_attributes > 6) state.input_register_table[attribute_register_map.attribute6_register] = &input.attr[6].x; |
|||
if(num_attributes > 7) state.input_register_table[attribute_register_map.attribute7_register] = &input.attr[7].x; |
|||
if(num_attributes > 8) state.input_register_table[attribute_register_map.attribute8_register] = &input.attr[8].x; |
|||
if(num_attributes > 9) state.input_register_table[attribute_register_map.attribute9_register] = &input.attr[9].x; |
|||
if(num_attributes > 10) state.input_register_table[attribute_register_map.attribute10_register] = &input.attr[10].x; |
|||
if(num_attributes > 11) state.input_register_table[attribute_register_map.attribute11_register] = &input.attr[11].x; |
|||
if(num_attributes > 12) state.input_register_table[attribute_register_map.attribute12_register] = &input.attr[12].x; |
|||
if(num_attributes > 13) state.input_register_table[attribute_register_map.attribute13_register] = &input.attr[13].x; |
|||
if(num_attributes > 14) state.input_register_table[attribute_register_map.attribute14_register] = &input.attr[14].x; |
|||
if(num_attributes > 15) state.input_register_table[attribute_register_map.attribute15_register] = &input.attr[15].x; |
|||
|
|||
// Setup output register table
|
|||
OutputVertex ret; |
|||
for (int i = 0; i < 7; ++i) { |
|||
const auto& output_register_map = registers.vs_output_attributes[i]; |
|||
|
|||
u32 semantics[4] = { |
|||
output_register_map.map_x, output_register_map.map_y, |
|||
output_register_map.map_z, output_register_map.map_w |
|||
}; |
|||
|
|||
for (int comp = 0; comp < 4; ++comp) |
|||
state.output_register_table[4*i+comp] = ((float24*)&ret) + semantics[comp]; |
|||
} |
|||
|
|||
state.status_registers[0] = false; |
|||
state.status_registers[1] = false; |
|||
std::fill(state.call_stack, state.call_stack + sizeof(state.call_stack) / sizeof(state.call_stack[0]), |
|||
VertexShaderState::INVALID_ADDRESS); |
|||
state.call_stack_pointer = &state.call_stack[0]; |
|||
|
|||
ProcessShaderCode(state); |
|||
|
|||
DEBUG_LOG(GPU, "Output vertex: pos (%.2f, %.2f, %.2f, %.2f), col(%.2f, %.2f, %.2f, %.2f), tc0(%.2f, %.2f)", |
|||
ret.pos.x.ToFloat32(), ret.pos.y.ToFloat32(), ret.pos.z.ToFloat32(), ret.pos.w.ToFloat32(), |
|||
ret.color.x.ToFloat32(), ret.color.y.ToFloat32(), ret.color.z.ToFloat32(), ret.color.w.ToFloat32(), |
|||
ret.tc0.u().ToFloat32(), ret.tc0.v().ToFloat32()); |
|||
|
|||
return ret; |
|||
} |
|||
|
|||
|
|||
} // namespace
|
|||
|
|||
} // namespace
|
|||
@ -0,0 +1,211 @@ |
|||
// Copyright 2014 Citra Emulator Project |
|||
// Licensed under GPLv2 |
|||
// Refer to the license.txt file included. |
|||
|
|||
#pragma once |
|||
|
|||
#include <initializer_list> |
|||
|
|||
#include <common/common_types.h> |
|||
|
|||
#include "math.h" |
|||
#include "pica.h" |
|||
|
|||
namespace Pica { |
|||
|
|||
namespace VertexShader { |
|||
|
|||
struct InputVertex { |
|||
Math::Vec4<float24> attr[16]; |
|||
}; |
|||
|
|||
struct OutputVertex { |
|||
OutputVertex() = default; |
|||
|
|||
// VS output attributes |
|||
Math::Vec4<float24> pos; |
|||
Math::Vec4<float24> dummy; // quaternions (not implemented, yet) |
|||
Math::Vec4<float24> color; |
|||
Math::Vec2<float24> tc0; |
|||
float24 tc0_v; |
|||
|
|||
// Padding for optimal alignment |
|||
float24 pad[14]; |
|||
|
|||
// Attributes used to store intermediate results |
|||
|
|||
// position after perspective divide |
|||
Math::Vec3<float24> screenpos; |
|||
|
|||
// Linear interpolation |
|||
// factor: 0=this, 1=vtx |
|||
void Lerp(float24 factor, const OutputVertex& vtx) { |
|||
pos = pos * factor + vtx.pos * (float24::FromFloat32(1) - factor); |
|||
|
|||
// TODO: Should perform perspective correct interpolation here... |
|||
tc0 = tc0 * factor + vtx.tc0 * (float24::FromFloat32(1) - factor); |
|||
|
|||
screenpos = screenpos * factor + vtx.screenpos * (float24::FromFloat32(1) - factor); |
|||
|
|||
color = color * factor + vtx.color * (float24::FromFloat32(1) - factor); |
|||
} |
|||
|
|||
// Linear interpolation |
|||
// factor: 0=v0, 1=v1 |
|||
static OutputVertex Lerp(float24 factor, const OutputVertex& v0, const OutputVertex& v1) { |
|||
OutputVertex ret = v0; |
|||
ret.Lerp(factor, v1); |
|||
return ret; |
|||
} |
|||
}; |
|||
static_assert(std::is_pod<OutputVertex>::value, "Structure is not POD"); |
|||
|
|||
union Instruction { |
|||
enum class OpCode : u32 { |
|||
ADD = 0x0, |
|||
DP3 = 0x1, |
|||
DP4 = 0x2, |
|||
|
|||
MUL = 0x8, |
|||
|
|||
MAX = 0xC, |
|||
MIN = 0xD, |
|||
RCP = 0xE, |
|||
RSQ = 0xF, |
|||
|
|||
MOV = 0x13, |
|||
|
|||
RET = 0x21, |
|||
FLS = 0x22, // Flush |
|||
CALL = 0x24, |
|||
}; |
|||
|
|||
std::string GetOpCodeName() const { |
|||
std::map<OpCode, std::string> map = { |
|||
{ OpCode::ADD, "ADD" }, |
|||
{ OpCode::DP3, "DP3" }, |
|||
{ OpCode::DP4, "DP4" }, |
|||
{ OpCode::MUL, "MUL" }, |
|||
{ OpCode::MAX, "MAX" }, |
|||
{ OpCode::MIN, "MIN" }, |
|||
{ OpCode::RCP, "RCP" }, |
|||
{ OpCode::RSQ, "RSQ" }, |
|||
{ OpCode::MOV, "MOV" }, |
|||
{ OpCode::RET, "RET" }, |
|||
{ OpCode::FLS, "FLS" }, |
|||
}; |
|||
auto it = map.find(opcode); |
|||
if (it == map.end()) |
|||
return "UNK"; |
|||
else |
|||
return it->second; |
|||
} |
|||
|
|||
u32 hex; |
|||
|
|||
BitField<0x1a, 0x6, OpCode> opcode; |
|||
|
|||
// General notes: |
|||
// |
|||
// When two input registers are used, one of them uses a 5-bit index while the other |
|||
// one uses a 7-bit index. This is because at most one floating point uniform may be used |
|||
// as an input. |
|||
|
|||
|
|||
// Format used e.g. by arithmetic instructions and comparisons |
|||
// "src1" and "src2" specify register indices (i.e. indices referring to groups of 4 floats), |
|||
// while "dest" addresses individual floats. |
|||
union { |
|||
BitField<0x00, 0x5, u32> operand_desc_id; |
|||
BitField<0x07, 0x5, u32> src2; |
|||
BitField<0x0c, 0x7, u32> src1; |
|||
BitField<0x13, 0x7, u32> dest; |
|||
} common; |
|||
|
|||
// Format used for flow control instructions ("if") |
|||
union { |
|||
BitField<0x00, 0x8, u32> num_instructions; |
|||
BitField<0x0a, 0xc, u32> offset_words; |
|||
} flow_control; |
|||
}; |
|||
|
|||
union SwizzlePattern { |
|||
u32 hex; |
|||
|
|||
enum class Selector : u32 { |
|||
x = 0, |
|||
y = 1, |
|||
z = 2, |
|||
w = 3 |
|||
}; |
|||
|
|||
Selector GetSelectorSrc1(int comp) const { |
|||
Selector selectors[] = { |
|||
src1_selector_0, src1_selector_1, src1_selector_2, src1_selector_3 |
|||
}; |
|||
return selectors[comp]; |
|||
} |
|||
|
|||
Selector GetSelectorSrc2(int comp) const { |
|||
Selector selectors[] = { |
|||
src2_selector_0, src2_selector_1, src2_selector_2, src2_selector_3 |
|||
}; |
|||
return selectors[comp]; |
|||
} |
|||
|
|||
bool DestComponentEnabled(int i) const { |
|||
return (dest_mask & (0x8 >> i)); |
|||
} |
|||
|
|||
std::string SelectorToString(bool src2) const { |
|||
std::map<Selector, std::string> map = { |
|||
{ Selector::x, "x" }, |
|||
{ Selector::y, "y" }, |
|||
{ Selector::z, "z" }, |
|||
{ Selector::w, "w" } |
|||
}; |
|||
std::string ret; |
|||
for (int i = 0; i < 4; ++i) { |
|||
ret += map.at(src2 ? GetSelectorSrc2(i) : GetSelectorSrc1(i)); |
|||
} |
|||
return ret; |
|||
} |
|||
|
|||
std::string DestMaskToString() const { |
|||
std::string ret; |
|||
for (int i = 0; i < 4; ++i) { |
|||
if (!DestComponentEnabled(i)) |
|||
ret += "_"; |
|||
else |
|||
ret += "xyzw"[i]; |
|||
} |
|||
return ret; |
|||
} |
|||
|
|||
// Components of "dest" that should be written to: LSB=dest.w, MSB=dest.x |
|||
BitField< 0, 4, u32> dest_mask; |
|||
|
|||
BitField< 5, 2, Selector> src1_selector_3; |
|||
BitField< 7, 2, Selector> src1_selector_2; |
|||
BitField< 9, 2, Selector> src1_selector_1; |
|||
BitField<11, 2, Selector> src1_selector_0; |
|||
|
|||
BitField<14, 2, Selector> src2_selector_3; |
|||
BitField<16, 2, Selector> src2_selector_2; |
|||
BitField<18, 2, Selector> src2_selector_1; |
|||
BitField<20, 2, Selector> src2_selector_0; |
|||
|
|||
BitField<31, 1, u32> flag; // not sure what this means, maybe it's the sign? |
|||
}; |
|||
|
|||
void SubmitShaderMemoryChange(u32 addr, u32 value); |
|||
void SubmitSwizzleDataChange(u32 addr, u32 value); |
|||
|
|||
OutputVertex RunShader(const InputVertex& input, int num_attributes); |
|||
|
|||
Math::Vec4<float24>& GetFloatUniform(u32 index); |
|||
|
|||
} // namespace |
|||
|
|||
} // namespace |
|||
|
|||
Write
Preview
Loading…
Cancel
Save
Reference in new issue