Browse Source
Merge pull request #1927 from ReinUsesLisp/shader-ir
Merge pull request #1927 from ReinUsesLisp/shader-ir
video_core: Replace gl_shader_decompiler with an IR based decompilernce_cpp
committed by
GitHub
39 changed files with 5549 additions and 3860 deletions
-
28src/video_core/CMakeLists.txt
-
10src/video_core/engines/shader_bytecode.h
-
2src/video_core/engines/shader_header.h
-
4src/video_core/renderer_opengl/gl_rasterizer.cpp
-
8src/video_core/renderer_opengl/gl_shader_cache.cpp
-
1src/video_core/renderer_opengl/gl_shader_cache.h
-
4846src/video_core/renderer_opengl/gl_shader_decompiler.cpp
-
81src/video_core/renderer_opengl/gl_shader_decompiler.h
-
104src/video_core/renderer_opengl/gl_shader_gen.cpp
-
158src/video_core/renderer_opengl/gl_shader_gen.h
-
206src/video_core/shader/decode.cpp
-
155src/video_core/shader/decode/arithmetic.cpp
-
70src/video_core/shader/decode/arithmetic_half.cpp
-
51src/video_core/shader/decode/arithmetic_half_immediate.cpp
-
52src/video_core/shader/decode/arithmetic_immediate.cpp
-
287src/video_core/shader/decode/arithmetic_integer.cpp
-
96src/video_core/shader/decode/arithmetic_integer_immediate.cpp
-
49src/video_core/shader/decode/bfe.cpp
-
41src/video_core/shader/decode/bfi.cpp
-
149src/video_core/shader/decode/conversion.cpp
-
0src/video_core/shader/decode/decode_integer_set.cpp
-
59src/video_core/shader/decode/ffma.cpp
-
58src/video_core/shader/decode/float_set.cpp
-
56src/video_core/shader/decode/float_set_predicate.cpp
-
67src/video_core/shader/decode/half_set.cpp
-
62src/video_core/shader/decode/half_set_predicate.cpp
-
76src/video_core/shader/decode/hfma2.cpp
-
50src/video_core/shader/decode/integer_set.cpp
-
53src/video_core/shader/decode/integer_set_predicate.cpp
-
688src/video_core/shader/decode/memory.cpp
-
178src/video_core/shader/decode/other.cpp
-
67src/video_core/shader/decode/predicate_set_predicate.cpp
-
46src/video_core/shader/decode/predicate_set_register.cpp
-
51src/video_core/shader/decode/register_set_predicate.cpp
-
55src/video_core/shader/decode/shift.cpp
-
111src/video_core/shader/decode/video.cpp
-
97src/video_core/shader/decode/xmad.cpp
-
444src/video_core/shader/shader_ir.cpp
-
793src/video_core/shader/shader_ir.h
4846
src/video_core/renderer_opengl/gl_shader_decompiler.cpp
File diff suppressed because it is too large
View File
File diff suppressed because it is too large
View File
@ -0,0 +1,206 @@ |
|||||
|
// Copyright 2018 yuzu Emulator Project
|
||||
|
// Licensed under GPLv2 or any later version
|
||||
|
// Refer to the license.txt file included.
|
||||
|
|
||||
|
#include <cstring>
|
||||
|
#include <set>
|
||||
|
|
||||
|
#include <fmt/format.h>
|
||||
|
|
||||
|
#include "common/assert.h"
|
||||
|
#include "common/common_types.h"
|
||||
|
#include "video_core/engines/shader_bytecode.h"
|
||||
|
#include "video_core/engines/shader_header.h"
|
||||
|
#include "video_core/shader/shader_ir.h"
|
||||
|
|
||||
|
namespace VideoCommon::Shader { |
||||
|
|
||||
|
using Tegra::Shader::Instruction; |
||||
|
using Tegra::Shader::OpCode; |
||||
|
|
||||
|
namespace { |
||||
|
|
||||
|
/// Merges exit method of two parallel branches.
|
||||
|
constexpr ExitMethod ParallelExit(ExitMethod a, ExitMethod b) { |
||||
|
if (a == ExitMethod::Undetermined) { |
||||
|
return b; |
||||
|
} |
||||
|
if (b == ExitMethod::Undetermined) { |
||||
|
return a; |
||||
|
} |
||||
|
if (a == b) { |
||||
|
return a; |
||||
|
} |
||||
|
return ExitMethod::Conditional; |
||||
|
} |
||||
|
|
||||
|
/**
|
||||
|
* Returns whether the instruction at the specified offset is a 'sched' instruction. |
||||
|
* Sched instructions always appear before a sequence of 3 instructions. |
||||
|
*/ |
||||
|
constexpr bool IsSchedInstruction(u32 offset, u32 main_offset) { |
||||
|
constexpr u32 SchedPeriod = 4; |
||||
|
u32 absolute_offset = offset - main_offset; |
||||
|
|
||||
|
return (absolute_offset % SchedPeriod) == 0; |
||||
|
} |
||||
|
|
||||
|
} // namespace
|
||||
|
|
||||
|
void ShaderIR::Decode() { |
||||
|
std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header)); |
||||
|
|
||||
|
std::set<u32> labels; |
||||
|
const ExitMethod exit_method = Scan(main_offset, MAX_PROGRAM_LENGTH, labels); |
||||
|
if (exit_method != ExitMethod::AlwaysEnd) { |
||||
|
UNREACHABLE_MSG("Program does not always end"); |
||||
|
} |
||||
|
|
||||
|
if (labels.empty()) { |
||||
|
basic_blocks.insert({main_offset, DecodeRange(main_offset, MAX_PROGRAM_LENGTH)}); |
||||
|
return; |
||||
|
} |
||||
|
|
||||
|
labels.insert(main_offset); |
||||
|
|
||||
|
for (const u32 label : labels) { |
||||
|
const auto next_it = labels.lower_bound(label + 1); |
||||
|
const u32 next_label = next_it == labels.end() ? MAX_PROGRAM_LENGTH : *next_it; |
||||
|
|
||||
|
basic_blocks.insert({label, DecodeRange(label, next_label)}); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
ExitMethod ShaderIR::Scan(u32 begin, u32 end, std::set<u32>& labels) { |
||||
|
const auto [iter, inserted] = |
||||
|
exit_method_map.emplace(std::make_pair(begin, end), ExitMethod::Undetermined); |
||||
|
ExitMethod& exit_method = iter->second; |
||||
|
if (!inserted) |
||||
|
return exit_method; |
||||
|
|
||||
|
for (u32 offset = begin; offset != end && offset != MAX_PROGRAM_LENGTH; ++offset) { |
||||
|
coverage_begin = std::min(coverage_begin, offset); |
||||
|
coverage_end = std::max(coverage_end, offset + 1); |
||||
|
|
||||
|
const Instruction instr = {program_code[offset]}; |
||||
|
const auto opcode = OpCode::Decode(instr); |
||||
|
if (!opcode) |
||||
|
continue; |
||||
|
switch (opcode->get().GetId()) { |
||||
|
case OpCode::Id::EXIT: { |
||||
|
// The EXIT instruction can be predicated, which means that the shader can conditionally
|
||||
|
// end on this instruction. We have to consider the case where the condition is not met
|
||||
|
// and check the exit method of that other basic block.
|
||||
|
using Tegra::Shader::Pred; |
||||
|
if (instr.pred.pred_index == static_cast<u64>(Pred::UnusedIndex)) { |
||||
|
return exit_method = ExitMethod::AlwaysEnd; |
||||
|
} else { |
||||
|
const ExitMethod not_met = Scan(offset + 1, end, labels); |
||||
|
return exit_method = ParallelExit(ExitMethod::AlwaysEnd, not_met); |
||||
|
} |
||||
|
} |
||||
|
case OpCode::Id::BRA: { |
||||
|
const u32 target = offset + instr.bra.GetBranchTarget(); |
||||
|
labels.insert(target); |
||||
|
const ExitMethod no_jmp = Scan(offset + 1, end, labels); |
||||
|
const ExitMethod jmp = Scan(target, end, labels); |
||||
|
return exit_method = ParallelExit(no_jmp, jmp); |
||||
|
} |
||||
|
case OpCode::Id::SSY: |
||||
|
case OpCode::Id::PBK: { |
||||
|
// The SSY and PBK use a similar encoding as the BRA instruction.
|
||||
|
UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, |
||||
|
"Constant buffer branching is not supported"); |
||||
|
const u32 target = offset + instr.bra.GetBranchTarget(); |
||||
|
labels.insert(target); |
||||
|
// Continue scanning for an exit method.
|
||||
|
break; |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
return exit_method = ExitMethod::AlwaysReturn; |
||||
|
} |
||||
|
|
||||
|
BasicBlock ShaderIR::DecodeRange(u32 begin, u32 end) { |
||||
|
BasicBlock basic_block; |
||||
|
for (u32 pc = begin; pc < (begin > end ? MAX_PROGRAM_LENGTH : end);) { |
||||
|
pc = DecodeInstr(basic_block, pc); |
||||
|
} |
||||
|
return std::move(basic_block); |
||||
|
} |
||||
|
|
||||
|
u32 ShaderIR::DecodeInstr(BasicBlock& bb, u32 pc) { |
||||
|
// Ignore sched instructions when generating code.
|
||||
|
if (IsSchedInstruction(pc, main_offset)) { |
||||
|
return pc + 1; |
||||
|
} |
||||
|
|
||||
|
const Instruction instr = {program_code[pc]}; |
||||
|
const auto opcode = OpCode::Decode(instr); |
||||
|
|
||||
|
// Decoding failure
|
||||
|
if (!opcode) { |
||||
|
UNIMPLEMENTED_MSG("Unhandled instruction: {0:x}", instr.value); |
||||
|
return pc + 1; |
||||
|
} |
||||
|
|
||||
|
bb.push_back( |
||||
|
Comment(fmt::format("{}: {} (0x{:016x})", pc, opcode->get().GetName(), instr.value))); |
||||
|
|
||||
|
using Tegra::Shader::Pred; |
||||
|
UNIMPLEMENTED_IF_MSG(instr.pred.full_pred == Pred::NeverExecute, |
||||
|
"NeverExecute predicate not implemented"); |
||||
|
|
||||
|
static const std::map<OpCode::Type, u32 (ShaderIR::*)(BasicBlock&, const BasicBlock&, u32)> |
||||
|
decoders = { |
||||
|
{OpCode::Type::Arithmetic, &ShaderIR::DecodeArithmetic}, |
||||
|
{OpCode::Type::ArithmeticImmediate, &ShaderIR::DecodeArithmeticImmediate}, |
||||
|
{OpCode::Type::Bfe, &ShaderIR::DecodeBfe}, |
||||
|
{OpCode::Type::Bfi, &ShaderIR::DecodeBfi}, |
||||
|
{OpCode::Type::Shift, &ShaderIR::DecodeShift}, |
||||
|
{OpCode::Type::ArithmeticInteger, &ShaderIR::DecodeArithmeticInteger}, |
||||
|
{OpCode::Type::ArithmeticIntegerImmediate, &ShaderIR::DecodeArithmeticIntegerImmediate}, |
||||
|
{OpCode::Type::ArithmeticHalf, &ShaderIR::DecodeArithmeticHalf}, |
||||
|
{OpCode::Type::ArithmeticHalfImmediate, &ShaderIR::DecodeArithmeticHalfImmediate}, |
||||
|
{OpCode::Type::Ffma, &ShaderIR::DecodeFfma}, |
||||
|
{OpCode::Type::Hfma2, &ShaderIR::DecodeHfma2}, |
||||
|
{OpCode::Type::Conversion, &ShaderIR::DecodeConversion}, |
||||
|
{OpCode::Type::Memory, &ShaderIR::DecodeMemory}, |
||||
|
{OpCode::Type::FloatSetPredicate, &ShaderIR::DecodeFloatSetPredicate}, |
||||
|
{OpCode::Type::IntegerSetPredicate, &ShaderIR::DecodeIntegerSetPredicate}, |
||||
|
{OpCode::Type::HalfSetPredicate, &ShaderIR::DecodeHalfSetPredicate}, |
||||
|
{OpCode::Type::PredicateSetRegister, &ShaderIR::DecodePredicateSetRegister}, |
||||
|
{OpCode::Type::PredicateSetPredicate, &ShaderIR::DecodePredicateSetPredicate}, |
||||
|
{OpCode::Type::RegisterSetPredicate, &ShaderIR::DecodeRegisterSetPredicate}, |
||||
|
{OpCode::Type::FloatSet, &ShaderIR::DecodeFloatSet}, |
||||
|
{OpCode::Type::IntegerSet, &ShaderIR::DecodeIntegerSet}, |
||||
|
{OpCode::Type::HalfSet, &ShaderIR::DecodeHalfSet}, |
||||
|
{OpCode::Type::Video, &ShaderIR::DecodeVideo}, |
||||
|
{OpCode::Type::Xmad, &ShaderIR::DecodeXmad}, |
||||
|
}; |
||||
|
|
||||
|
std::vector<Node> tmp_block; |
||||
|
if (const auto decoder = decoders.find(opcode->get().GetType()); decoder != decoders.end()) { |
||||
|
pc = (this->*decoder->second)(tmp_block, bb, pc); |
||||
|
} else { |
||||
|
pc = DecodeOther(tmp_block, bb, pc); |
||||
|
} |
||||
|
|
||||
|
// Some instructions (like SSY) don't have a predicate field, they are always unconditionally
|
||||
|
// executed.
|
||||
|
const bool can_be_predicated = OpCode::IsPredicatedInstruction(opcode->get().GetId()); |
||||
|
const auto pred_index = static_cast<u32>(instr.pred.pred_index); |
||||
|
|
||||
|
if (can_be_predicated && pred_index != static_cast<u32>(Pred::UnusedIndex)) { |
||||
|
bb.push_back( |
||||
|
Conditional(GetPredicate(pred_index, instr.negate_pred != 0), std::move(tmp_block))); |
||||
|
} else { |
||||
|
for (auto& node : tmp_block) { |
||||
|
bb.push_back(std::move(node)); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
return pc + 1; |
||||
|
} |
||||
|
|
||||
|
} // namespace VideoCommon::Shader
|
||||
@ -0,0 +1,155 @@ |
|||||
|
// Copyright 2018 yuzu Emulator Project
|
||||
|
// Licensed under GPLv2 or any later version
|
||||
|
// Refer to the license.txt file included.
|
||||
|
|
||||
|
#include "common/assert.h"
|
||||
|
#include "common/common_types.h"
|
||||
|
#include "video_core/engines/shader_bytecode.h"
|
||||
|
#include "video_core/shader/shader_ir.h"
|
||||
|
|
||||
|
namespace VideoCommon::Shader { |
||||
|
|
||||
|
using Tegra::Shader::Instruction; |
||||
|
using Tegra::Shader::OpCode; |
||||
|
using Tegra::Shader::SubOp; |
||||
|
|
||||
|
u32 ShaderIR::DecodeArithmetic(BasicBlock& bb, const BasicBlock& code, u32 pc) { |
||||
|
const Instruction instr = {program_code[pc]}; |
||||
|
const auto opcode = OpCode::Decode(instr); |
||||
|
|
||||
|
Node op_a = GetRegister(instr.gpr8); |
||||
|
|
||||
|
Node op_b = [&]() -> Node { |
||||
|
if (instr.is_b_imm) { |
||||
|
return GetImmediate19(instr); |
||||
|
} else if (instr.is_b_gpr) { |
||||
|
return GetRegister(instr.gpr20); |
||||
|
} else { |
||||
|
return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset); |
||||
|
} |
||||
|
}(); |
||||
|
|
||||
|
switch (opcode->get().GetId()) { |
||||
|
case OpCode::Id::MOV_C: |
||||
|
case OpCode::Id::MOV_R: { |
||||
|
// MOV does not have neither 'abs' nor 'neg' bits.
|
||||
|
SetRegister(bb, instr.gpr0, op_b); |
||||
|
break; |
||||
|
} |
||||
|
case OpCode::Id::FMUL_C: |
||||
|
case OpCode::Id::FMUL_R: |
||||
|
case OpCode::Id::FMUL_IMM: { |
||||
|
// FMUL does not have 'abs' bits and only the second operand has a 'neg' bit.
|
||||
|
UNIMPLEMENTED_IF_MSG(instr.fmul.tab5cb8_2 != 0, "FMUL tab5cb8_2({}) is not implemented", |
||||
|
instr.fmul.tab5cb8_2.Value()); |
||||
|
UNIMPLEMENTED_IF_MSG( |
||||
|
instr.fmul.tab5c68_0 != 1, "FMUL tab5cb8_0({}) is not implemented", |
||||
|
instr.fmul.tab5c68_0.Value()); // SMO typical sends 1 here which seems to be the default
|
||||
|
|
||||
|
op_b = GetOperandAbsNegFloat(op_b, false, instr.fmul.negate_b); |
||||
|
|
||||
|
// TODO(Rodrigo): Should precise be used when there's a postfactor?
|
||||
|
Node value = Operation(OperationCode::FMul, PRECISE, op_a, op_b); |
||||
|
|
||||
|
if (instr.fmul.postfactor != 0) { |
||||
|
auto postfactor = static_cast<s32>(instr.fmul.postfactor); |
||||
|
|
||||
|
// Postfactor encoded as 3-bit 1's complement in instruction, interpreted with below
|
||||
|
// logic.
|
||||
|
if (postfactor >= 4) { |
||||
|
postfactor = 7 - postfactor; |
||||
|
} else { |
||||
|
postfactor = 0 - postfactor; |
||||
|
} |
||||
|
|
||||
|
if (postfactor > 0) { |
||||
|
value = Operation(OperationCode::FMul, NO_PRECISE, value, |
||||
|
Immediate(static_cast<f32>(1 << postfactor))); |
||||
|
} else { |
||||
|
value = Operation(OperationCode::FDiv, NO_PRECISE, value, |
||||
|
Immediate(static_cast<f32>(1 << -postfactor))); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
value = GetSaturatedFloat(value, instr.alu.saturate_d); |
||||
|
|
||||
|
SetInternalFlagsFromFloat(bb, value, instr.generates_cc); |
||||
|
SetRegister(bb, instr.gpr0, value); |
||||
|
break; |
||||
|
} |
||||
|
case OpCode::Id::FADD_C: |
||||
|
case OpCode::Id::FADD_R: |
||||
|
case OpCode::Id::FADD_IMM: { |
||||
|
op_a = GetOperandAbsNegFloat(op_a, instr.alu.abs_a, instr.alu.negate_a); |
||||
|
op_b = GetOperandAbsNegFloat(op_b, instr.alu.abs_b, instr.alu.negate_b); |
||||
|
|
||||
|
Node value = Operation(OperationCode::FAdd, PRECISE, op_a, op_b); |
||||
|
value = GetSaturatedFloat(value, instr.alu.saturate_d); |
||||
|
|
||||
|
SetInternalFlagsFromFloat(bb, value, instr.generates_cc); |
||||
|
SetRegister(bb, instr.gpr0, value); |
||||
|
break; |
||||
|
} |
||||
|
case OpCode::Id::MUFU: { |
||||
|
op_a = GetOperandAbsNegFloat(op_a, instr.alu.abs_a, instr.alu.negate_a); |
||||
|
|
||||
|
Node value = [&]() { |
||||
|
switch (instr.sub_op) { |
||||
|
case SubOp::Cos: |
||||
|
return Operation(OperationCode::FCos, PRECISE, op_a); |
||||
|
case SubOp::Sin: |
||||
|
return Operation(OperationCode::FSin, PRECISE, op_a); |
||||
|
case SubOp::Ex2: |
||||
|
return Operation(OperationCode::FExp2, PRECISE, op_a); |
||||
|
case SubOp::Lg2: |
||||
|
return Operation(OperationCode::FLog2, PRECISE, op_a); |
||||
|
case SubOp::Rcp: |
||||
|
return Operation(OperationCode::FDiv, PRECISE, Immediate(1.0f), op_a); |
||||
|
case SubOp::Rsq: |
||||
|
return Operation(OperationCode::FInverseSqrt, PRECISE, op_a); |
||||
|
case SubOp::Sqrt: |
||||
|
return Operation(OperationCode::FSqrt, PRECISE, op_a); |
||||
|
default: |
||||
|
UNIMPLEMENTED_MSG("Unhandled MUFU sub op={0:x}", |
||||
|
static_cast<unsigned>(instr.sub_op.Value())); |
||||
|
return Immediate(0); |
||||
|
} |
||||
|
}(); |
||||
|
value = GetSaturatedFloat(value, instr.alu.saturate_d); |
||||
|
|
||||
|
SetRegister(bb, instr.gpr0, value); |
||||
|
break; |
||||
|
} |
||||
|
case OpCode::Id::FMNMX_C: |
||||
|
case OpCode::Id::FMNMX_R: |
||||
|
case OpCode::Id::FMNMX_IMM: { |
||||
|
op_a = GetOperandAbsNegFloat(op_a, instr.alu.abs_a, instr.alu.negate_a); |
||||
|
op_b = GetOperandAbsNegFloat(op_b, instr.alu.abs_b, instr.alu.negate_b); |
||||
|
|
||||
|
const Node condition = GetPredicate(instr.alu.fmnmx.pred, instr.alu.fmnmx.negate_pred != 0); |
||||
|
|
||||
|
const Node min = Operation(OperationCode::FMin, NO_PRECISE, op_a, op_b); |
||||
|
const Node max = Operation(OperationCode::FMax, NO_PRECISE, op_a, op_b); |
||||
|
const Node value = Operation(OperationCode::Select, NO_PRECISE, condition, min, max); |
||||
|
|
||||
|
SetInternalFlagsFromFloat(bb, value, instr.generates_cc); |
||||
|
SetRegister(bb, instr.gpr0, value); |
||||
|
break; |
||||
|
} |
||||
|
case OpCode::Id::RRO_C: |
||||
|
case OpCode::Id::RRO_R: |
||||
|
case OpCode::Id::RRO_IMM: { |
||||
|
// Currently RRO is only implemented as a register move.
|
||||
|
op_b = GetOperandAbsNegFloat(op_b, instr.alu.abs_b, instr.alu.negate_b); |
||||
|
SetRegister(bb, instr.gpr0, op_b); |
||||
|
LOG_WARNING(HW_GPU, "RRO instruction is incomplete"); |
||||
|
break; |
||||
|
} |
||||
|
default: |
||||
|
UNIMPLEMENTED_MSG("Unhandled arithmetic instruction: {}", opcode->get().GetName()); |
||||
|
} |
||||
|
|
||||
|
return pc; |
||||
|
} |
||||
|
|
||||
|
} // namespace VideoCommon::Shader
|
||||
@ -0,0 +1,70 @@ |
|||||
|
// Copyright 2018 yuzu Emulator Project
|
||||
|
// Licensed under GPLv2 or any later version
|
||||
|
// Refer to the license.txt file included.
|
||||
|
|
||||
|
#include "common/assert.h"
|
||||
|
#include "common/common_types.h"
|
||||
|
#include "video_core/engines/shader_bytecode.h"
|
||||
|
#include "video_core/shader/shader_ir.h"
|
||||
|
|
||||
|
namespace VideoCommon::Shader { |
||||
|
|
||||
|
using Tegra::Shader::Instruction; |
||||
|
using Tegra::Shader::OpCode; |
||||
|
|
||||
|
u32 ShaderIR::DecodeArithmeticHalf(BasicBlock& bb, const BasicBlock& code, u32 pc) { |
||||
|
const Instruction instr = {program_code[pc]}; |
||||
|
const auto opcode = OpCode::Decode(instr); |
||||
|
|
||||
|
if (opcode->get().GetId() == OpCode::Id::HADD2_C || |
||||
|
opcode->get().GetId() == OpCode::Id::HADD2_R) { |
||||
|
UNIMPLEMENTED_IF(instr.alu_half.ftz != 0); |
||||
|
} |
||||
|
UNIMPLEMENTED_IF_MSG(instr.alu_half.saturate != 0, "Half float saturation not implemented"); |
||||
|
|
||||
|
const bool negate_a = |
||||
|
opcode->get().GetId() != OpCode::Id::HMUL2_R && instr.alu_half.negate_a != 0; |
||||
|
const bool negate_b = |
||||
|
opcode->get().GetId() != OpCode::Id::HMUL2_C && instr.alu_half.negate_b != 0; |
||||
|
|
||||
|
const Node op_a = GetOperandAbsNegHalf(GetRegister(instr.gpr8), instr.alu_half.abs_a, negate_a); |
||||
|
|
||||
|
// instr.alu_half.type_a
|
||||
|
|
||||
|
Node op_b = [&]() { |
||||
|
switch (opcode->get().GetId()) { |
||||
|
case OpCode::Id::HADD2_C: |
||||
|
case OpCode::Id::HMUL2_C: |
||||
|
return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset); |
||||
|
case OpCode::Id::HADD2_R: |
||||
|
case OpCode::Id::HMUL2_R: |
||||
|
return GetRegister(instr.gpr20); |
||||
|
default: |
||||
|
UNREACHABLE(); |
||||
|
return Immediate(0); |
||||
|
} |
||||
|
}(); |
||||
|
op_b = GetOperandAbsNegHalf(op_b, instr.alu_half.abs_b, negate_b); |
||||
|
|
||||
|
Node value = [&]() { |
||||
|
MetaHalfArithmetic meta{true, {instr.alu_half_imm.type_a, instr.alu_half.type_b}}; |
||||
|
switch (opcode->get().GetId()) { |
||||
|
case OpCode::Id::HADD2_C: |
||||
|
case OpCode::Id::HADD2_R: |
||||
|
return Operation(OperationCode::HAdd, meta, op_a, op_b); |
||||
|
case OpCode::Id::HMUL2_C: |
||||
|
case OpCode::Id::HMUL2_R: |
||||
|
return Operation(OperationCode::HMul, meta, op_a, op_b); |
||||
|
default: |
||||
|
UNIMPLEMENTED_MSG("Unhandled half float instruction: {}", opcode->get().GetName()); |
||||
|
return Immediate(0); |
||||
|
} |
||||
|
}(); |
||||
|
value = HalfMerge(GetRegister(instr.gpr0), value, instr.alu_half.merge); |
||||
|
|
||||
|
SetRegister(bb, instr.gpr0, value); |
||||
|
|
||||
|
return pc; |
||||
|
} |
||||
|
|
||||
|
} // namespace VideoCommon::Shader
|
||||
@ -0,0 +1,51 @@ |
|||||
|
// Copyright 2018 yuzu Emulator Project
|
||||
|
// Licensed under GPLv2 or any later version
|
||||
|
// Refer to the license.txt file included.
|
||||
|
|
||||
|
#include "common/assert.h"
|
||||
|
#include "common/common_types.h"
|
||||
|
#include "video_core/engines/shader_bytecode.h"
|
||||
|
#include "video_core/shader/shader_ir.h"
|
||||
|
|
||||
|
namespace VideoCommon::Shader { |
||||
|
|
||||
|
using Tegra::Shader::Instruction; |
||||
|
using Tegra::Shader::OpCode; |
||||
|
|
||||
|
u32 ShaderIR::DecodeArithmeticHalfImmediate(BasicBlock& bb, const BasicBlock& code, u32 pc) { |
||||
|
const Instruction instr = {program_code[pc]}; |
||||
|
const auto opcode = OpCode::Decode(instr); |
||||
|
|
||||
|
if (opcode->get().GetId() == OpCode::Id::HADD2_IMM) { |
||||
|
UNIMPLEMENTED_IF(instr.alu_half_imm.ftz != 0); |
||||
|
} else { |
||||
|
UNIMPLEMENTED_IF(instr.alu_half_imm.precision != Tegra::Shader::HalfPrecision::None); |
||||
|
} |
||||
|
UNIMPLEMENTED_IF_MSG(instr.alu_half_imm.saturate != 0, |
||||
|
"Half float immediate saturation not implemented"); |
||||
|
|
||||
|
Node op_a = GetRegister(instr.gpr8); |
||||
|
op_a = GetOperandAbsNegHalf(op_a, instr.alu_half_imm.abs_a, instr.alu_half_imm.negate_a); |
||||
|
|
||||
|
const Node op_b = UnpackHalfImmediate(instr, true); |
||||
|
|
||||
|
Node value = [&]() { |
||||
|
MetaHalfArithmetic meta{true, {instr.alu_half_imm.type_a}}; |
||||
|
switch (opcode->get().GetId()) { |
||||
|
case OpCode::Id::HADD2_IMM: |
||||
|
return Operation(OperationCode::HAdd, meta, op_a, op_b); |
||||
|
case OpCode::Id::HMUL2_IMM: |
||||
|
return Operation(OperationCode::HMul, meta, op_a, op_b); |
||||
|
default: |
||||
|
UNREACHABLE(); |
||||
|
return Immediate(0); |
||||
|
} |
||||
|
}(); |
||||
|
value = HalfMerge(GetRegister(instr.gpr0), value, instr.alu_half_imm.merge); |
||||
|
|
||||
|
SetRegister(bb, instr.gpr0, value); |
||||
|
|
||||
|
return pc; |
||||
|
} |
||||
|
|
||||
|
} // namespace VideoCommon::Shader
|
||||
@ -0,0 +1,52 @@ |
|||||
|
// Copyright 2018 yuzu Emulator Project
|
||||
|
// Licensed under GPLv2 or any later version
|
||||
|
// Refer to the license.txt file included.
|
||||
|
|
||||
|
#include "common/assert.h"
|
||||
|
#include "common/common_types.h"
|
||||
|
#include "video_core/engines/shader_bytecode.h"
|
||||
|
#include "video_core/shader/shader_ir.h"
|
||||
|
|
||||
|
namespace VideoCommon::Shader { |
||||
|
|
||||
|
using Tegra::Shader::Instruction; |
||||
|
using Tegra::Shader::OpCode; |
||||
|
|
||||
|
u32 ShaderIR::DecodeArithmeticImmediate(BasicBlock& bb, const BasicBlock& code, u32 pc) { |
||||
|
const Instruction instr = {program_code[pc]}; |
||||
|
const auto opcode = OpCode::Decode(instr); |
||||
|
|
||||
|
switch (opcode->get().GetId()) { |
||||
|
case OpCode::Id::MOV32_IMM: { |
||||
|
SetRegister(bb, instr.gpr0, GetImmediate32(instr)); |
||||
|
break; |
||||
|
} |
||||
|
case OpCode::Id::FMUL32_IMM: { |
||||
|
Node value = |
||||
|
Operation(OperationCode::FMul, PRECISE, GetRegister(instr.gpr8), GetImmediate32(instr)); |
||||
|
value = GetSaturatedFloat(value, instr.fmul32.saturate); |
||||
|
|
||||
|
SetInternalFlagsFromFloat(bb, value, instr.op_32.generates_cc); |
||||
|
SetRegister(bb, instr.gpr0, value); |
||||
|
break; |
||||
|
} |
||||
|
case OpCode::Id::FADD32I: { |
||||
|
const Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fadd32i.abs_a, |
||||
|
instr.fadd32i.negate_a); |
||||
|
const Node op_b = GetOperandAbsNegFloat(GetImmediate32(instr), instr.fadd32i.abs_b, |
||||
|
instr.fadd32i.negate_b); |
||||
|
|
||||
|
const Node value = Operation(OperationCode::FAdd, PRECISE, op_a, op_b); |
||||
|
SetInternalFlagsFromFloat(bb, value, instr.op_32.generates_cc); |
||||
|
SetRegister(bb, instr.gpr0, value); |
||||
|
break; |
||||
|
} |
||||
|
default: |
||||
|
UNIMPLEMENTED_MSG("Unhandled arithmetic immediate instruction: {}", |
||||
|
opcode->get().GetName()); |
||||
|
} |
||||
|
|
||||
|
return pc; |
||||
|
} |
||||
|
|
||||
|
} // namespace VideoCommon::Shader
|
||||
@ -0,0 +1,287 @@ |
|||||
|
// Copyright 2018 yuzu Emulator Project
|
||||
|
// Licensed under GPLv2 or any later version
|
||||
|
// Refer to the license.txt file included.
|
||||
|
|
||||
|
#include "common/assert.h"
|
||||
|
#include "common/common_types.h"
|
||||
|
#include "video_core/engines/shader_bytecode.h"
|
||||
|
#include "video_core/shader/shader_ir.h"
|
||||
|
|
||||
|
namespace VideoCommon::Shader { |
||||
|
|
||||
|
using Tegra::Shader::IAdd3Height; |
||||
|
using Tegra::Shader::Instruction; |
||||
|
using Tegra::Shader::OpCode; |
||||
|
using Tegra::Shader::Pred; |
||||
|
using Tegra::Shader::Register; |
||||
|
|
||||
|
u32 ShaderIR::DecodeArithmeticInteger(BasicBlock& bb, const BasicBlock& code, u32 pc) { |
||||
|
const Instruction instr = {program_code[pc]}; |
||||
|
const auto opcode = OpCode::Decode(instr); |
||||
|
|
||||
|
Node op_a = GetRegister(instr.gpr8); |
||||
|
Node op_b = [&]() { |
||||
|
if (instr.is_b_imm) { |
||||
|
return Immediate(instr.alu.GetSignedImm20_20()); |
||||
|
} else if (instr.is_b_gpr) { |
||||
|
return GetRegister(instr.gpr20); |
||||
|
} else { |
||||
|
return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset); |
||||
|
} |
||||
|
}(); |
||||
|
|
||||
|
switch (opcode->get().GetId()) { |
||||
|
case OpCode::Id::IADD_C: |
||||
|
case OpCode::Id::IADD_R: |
||||
|
case OpCode::Id::IADD_IMM: { |
||||
|
UNIMPLEMENTED_IF_MSG(instr.alu.saturate_d, "IADD saturation not implemented"); |
||||
|
|
||||
|
op_a = GetOperandAbsNegInteger(op_a, false, instr.alu_integer.negate_a, true); |
||||
|
op_b = GetOperandAbsNegInteger(op_b, false, instr.alu_integer.negate_b, true); |
||||
|
|
||||
|
const Node value = Operation(OperationCode::IAdd, PRECISE, op_a, op_b); |
||||
|
|
||||
|
SetInternalFlagsFromInteger(bb, value, instr.op_32.generates_cc); |
||||
|
SetRegister(bb, instr.gpr0, value); |
||||
|
break; |
||||
|
} |
||||
|
case OpCode::Id::IADD3_C: |
||||
|
case OpCode::Id::IADD3_R: |
||||
|
case OpCode::Id::IADD3_IMM: { |
||||
|
Node op_c = GetRegister(instr.gpr39); |
||||
|
|
||||
|
const auto ApplyHeight = [&](IAdd3Height height, Node value) { |
||||
|
switch (height) { |
||||
|
case IAdd3Height::None: |
||||
|
return value; |
||||
|
case IAdd3Height::LowerHalfWord: |
||||
|
return BitfieldExtract(value, 0, 16); |
||||
|
case IAdd3Height::UpperHalfWord: |
||||
|
return BitfieldExtract(value, 16, 16); |
||||
|
default: |
||||
|
UNIMPLEMENTED_MSG("Unhandled IADD3 height: {}", static_cast<u32>(height)); |
||||
|
return Immediate(0); |
||||
|
} |
||||
|
}; |
||||
|
|
||||
|
if (opcode->get().GetId() == OpCode::Id::IADD3_R) { |
||||
|
op_a = ApplyHeight(instr.iadd3.height_a, op_a); |
||||
|
op_b = ApplyHeight(instr.iadd3.height_b, op_b); |
||||
|
op_c = ApplyHeight(instr.iadd3.height_c, op_c); |
||||
|
} |
||||
|
|
||||
|
op_a = GetOperandAbsNegInteger(op_a, false, instr.iadd3.neg_a, true); |
||||
|
op_b = GetOperandAbsNegInteger(op_b, false, instr.iadd3.neg_b, true); |
||||
|
op_c = GetOperandAbsNegInteger(op_c, false, instr.iadd3.neg_c, true); |
||||
|
|
||||
|
const Node value = [&]() { |
||||
|
const Node add_ab = Operation(OperationCode::IAdd, NO_PRECISE, op_a, op_b); |
||||
|
if (opcode->get().GetId() != OpCode::Id::IADD3_R) { |
||||
|
return Operation(OperationCode::IAdd, NO_PRECISE, add_ab, op_c); |
||||
|
} |
||||
|
const Node shifted = [&]() { |
||||
|
switch (instr.iadd3.mode) { |
||||
|
case Tegra::Shader::IAdd3Mode::RightShift: |
||||
|
// TODO(tech4me): According to
|
||||
|
// https://envytools.readthedocs.io/en/latest/hw/graph/maxwell/cuda/int.html?highlight=iadd3
|
||||
|
// The addition between op_a and op_b should be done in uint33, more
|
||||
|
// investigation required
|
||||
|
return Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, add_ab, |
||||
|
Immediate(16)); |
||||
|
case Tegra::Shader::IAdd3Mode::LeftShift: |
||||
|
return Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, add_ab, |
||||
|
Immediate(16)); |
||||
|
default: |
||||
|
return add_ab; |
||||
|
} |
||||
|
}(); |
||||
|
return Operation(OperationCode::IAdd, NO_PRECISE, shifted, op_c); |
||||
|
}(); |
||||
|
|
||||
|
SetInternalFlagsFromInteger(bb, value, instr.generates_cc); |
||||
|
SetRegister(bb, instr.gpr0, value); |
||||
|
break; |
||||
|
} |
||||
|
case OpCode::Id::ISCADD_C: |
||||
|
case OpCode::Id::ISCADD_R: |
||||
|
case OpCode::Id::ISCADD_IMM: { |
||||
|
UNIMPLEMENTED_IF_MSG(instr.generates_cc, |
||||
|
"Condition codes generation in ISCADD is not implemented"); |
||||
|
|
||||
|
op_a = GetOperandAbsNegInteger(op_a, false, instr.alu_integer.negate_a, true); |
||||
|
op_b = GetOperandAbsNegInteger(op_b, false, instr.alu_integer.negate_b, true); |
||||
|
|
||||
|
const Node shift = Immediate(static_cast<u32>(instr.alu_integer.shift_amount)); |
||||
|
const Node shifted_a = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, op_a, shift); |
||||
|
const Node value = Operation(OperationCode::IAdd, NO_PRECISE, shifted_a, op_b); |
||||
|
|
||||
|
SetInternalFlagsFromInteger(bb, value, instr.generates_cc); |
||||
|
SetRegister(bb, instr.gpr0, value); |
||||
|
break; |
||||
|
} |
||||
|
case OpCode::Id::POPC_C: |
||||
|
case OpCode::Id::POPC_R: |
||||
|
case OpCode::Id::POPC_IMM: { |
||||
|
if (instr.popc.invert) { |
||||
|
op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_b); |
||||
|
} |
||||
|
const Node value = Operation(OperationCode::IBitCount, PRECISE, op_b); |
||||
|
SetRegister(bb, instr.gpr0, value); |
||||
|
break; |
||||
|
} |
||||
|
case OpCode::Id::SEL_C: |
||||
|
case OpCode::Id::SEL_R: |
||||
|
case OpCode::Id::SEL_IMM: { |
||||
|
const Node condition = GetPredicate(instr.sel.pred, instr.sel.neg_pred != 0); |
||||
|
const Node value = Operation(OperationCode::Select, PRECISE, condition, op_a, op_b); |
||||
|
SetRegister(bb, instr.gpr0, value); |
||||
|
break; |
||||
|
} |
||||
|
case OpCode::Id::LOP_C: |
||||
|
case OpCode::Id::LOP_R: |
||||
|
case OpCode::Id::LOP_IMM: { |
||||
|
if (instr.alu.lop.invert_a) |
||||
|
op_a = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_a); |
||||
|
if (instr.alu.lop.invert_b) |
||||
|
op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_b); |
||||
|
|
||||
|
WriteLogicOperation(bb, instr.gpr0, instr.alu.lop.operation, op_a, op_b, |
||||
|
instr.alu.lop.pred_result_mode, instr.alu.lop.pred48, |
||||
|
instr.generates_cc); |
||||
|
break; |
||||
|
} |
||||
|
case OpCode::Id::LOP3_C: |
||||
|
case OpCode::Id::LOP3_R: |
||||
|
case OpCode::Id::LOP3_IMM: { |
||||
|
const Node op_c = GetRegister(instr.gpr39); |
||||
|
const Node lut = [&]() { |
||||
|
if (opcode->get().GetId() == OpCode::Id::LOP3_R) { |
||||
|
return Immediate(instr.alu.lop3.GetImmLut28()); |
||||
|
} else { |
||||
|
return Immediate(instr.alu.lop3.GetImmLut48()); |
||||
|
} |
||||
|
}(); |
||||
|
|
||||
|
WriteLop3Instruction(bb, instr.gpr0, op_a, op_b, op_c, lut, instr.generates_cc); |
||||
|
break; |
||||
|
} |
||||
|
case OpCode::Id::IMNMX_C: |
||||
|
case OpCode::Id::IMNMX_R: |
||||
|
case OpCode::Id::IMNMX_IMM: { |
||||
|
UNIMPLEMENTED_IF(instr.imnmx.exchange != Tegra::Shader::IMinMaxExchange::None); |
||||
|
|
||||
|
const bool is_signed = instr.imnmx.is_signed; |
||||
|
|
||||
|
const Node condition = GetPredicate(instr.imnmx.pred, instr.imnmx.negate_pred != 0); |
||||
|
const Node min = SignedOperation(OperationCode::IMin, is_signed, NO_PRECISE, op_a, op_b); |
||||
|
const Node max = SignedOperation(OperationCode::IMax, is_signed, NO_PRECISE, op_a, op_b); |
||||
|
const Node value = Operation(OperationCode::Select, NO_PRECISE, condition, min, max); |
||||
|
|
||||
|
SetInternalFlagsFromInteger(bb, value, instr.generates_cc); |
||||
|
SetRegister(bb, instr.gpr0, value); |
||||
|
break; |
||||
|
} |
||||
|
case OpCode::Id::LEA_R2: |
||||
|
case OpCode::Id::LEA_R1: |
||||
|
case OpCode::Id::LEA_IMM: |
||||
|
case OpCode::Id::LEA_RZ: |
||||
|
case OpCode::Id::LEA_HI: { |
||||
|
const auto [op_a, op_b, op_c] = [&]() -> std::tuple<Node, Node, Node> { |
||||
|
switch (opcode->get().GetId()) { |
||||
|
case OpCode::Id::LEA_R2: { |
||||
|
return {GetRegister(instr.gpr20), GetRegister(instr.gpr39), |
||||
|
Immediate(static_cast<u32>(instr.lea.r2.entry_a))}; |
||||
|
} |
||||
|
|
||||
|
case OpCode::Id::LEA_R1: { |
||||
|
const bool neg = instr.lea.r1.neg != 0; |
||||
|
return {GetOperandAbsNegInteger(GetRegister(instr.gpr8), false, neg, true), |
||||
|
GetRegister(instr.gpr20), |
||||
|
Immediate(static_cast<u32>(instr.lea.r1.entry_a))}; |
||||
|
} |
||||
|
|
||||
|
case OpCode::Id::LEA_IMM: { |
||||
|
const bool neg = instr.lea.imm.neg != 0; |
||||
|
return {Immediate(static_cast<u32>(instr.lea.imm.entry_a)), |
||||
|
GetOperandAbsNegInteger(GetRegister(instr.gpr8), false, neg, true), |
||||
|
Immediate(static_cast<u32>(instr.lea.imm.entry_b))}; |
||||
|
} |
||||
|
|
||||
|
case OpCode::Id::LEA_RZ: { |
||||
|
const bool neg = instr.lea.rz.neg != 0; |
||||
|
return {GetConstBuffer(instr.lea.rz.cb_index, instr.lea.rz.cb_offset), |
||||
|
GetOperandAbsNegInteger(GetRegister(instr.gpr8), false, neg, true), |
||||
|
Immediate(static_cast<u32>(instr.lea.rz.entry_a))}; |
||||
|
} |
||||
|
|
||||
|
case OpCode::Id::LEA_HI: |
||||
|
default: |
||||
|
UNIMPLEMENTED_MSG("Unhandled LEA subinstruction: {}", opcode->get().GetName()); |
||||
|
|
||||
|
return {Immediate(static_cast<u32>(instr.lea.imm.entry_a)), GetRegister(instr.gpr8), |
||||
|
Immediate(static_cast<u32>(instr.lea.imm.entry_b))}; |
||||
|
} |
||||
|
}(); |
||||
|
|
||||
|
UNIMPLEMENTED_IF_MSG(instr.lea.pred48 != static_cast<u64>(Pred::UnusedIndex), |
||||
|
"Unhandled LEA Predicate"); |
||||
|
|
||||
|
const Node shifted_c = |
||||
|
Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, Immediate(1), op_c); |
||||
|
const Node mul_bc = Operation(OperationCode::IMul, NO_PRECISE, op_b, shifted_c); |
||||
|
const Node value = Operation(OperationCode::IAdd, NO_PRECISE, op_a, mul_bc); |
||||
|
|
||||
|
SetRegister(bb, instr.gpr0, value); |
||||
|
|
||||
|
break; |
||||
|
} |
||||
|
default: |
||||
|
UNIMPLEMENTED_MSG("Unhandled ArithmeticInteger instruction: {}", opcode->get().GetName()); |
||||
|
} |
||||
|
|
||||
|
return pc; |
||||
|
} |
||||
|
|
||||
|
void ShaderIR::WriteLop3Instruction(BasicBlock& bb, Register dest, Node op_a, Node op_b, Node op_c, |
||||
|
Node imm_lut, bool sets_cc) { |
||||
|
constexpr u32 lop_iterations = 32; |
||||
|
const Node one = Immediate(1); |
||||
|
const Node two = Immediate(2); |
||||
|
|
||||
|
Node value{}; |
||||
|
for (u32 i = 0; i < lop_iterations; ++i) { |
||||
|
const Node shift_amount = Immediate(i); |
||||
|
|
||||
|
const Node a = Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, op_c, shift_amount); |
||||
|
const Node pack_0 = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, one); |
||||
|
|
||||
|
const Node b = Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, op_b, shift_amount); |
||||
|
const Node c = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, b, one); |
||||
|
const Node pack_1 = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, c, one); |
||||
|
|
||||
|
const Node d = Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, op_a, shift_amount); |
||||
|
const Node e = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, d, one); |
||||
|
const Node pack_2 = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, e, two); |
||||
|
|
||||
|
const Node pack_01 = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, pack_0, pack_1); |
||||
|
const Node pack_012 = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, pack_01, pack_2); |
||||
|
|
||||
|
const Node shifted_bit = |
||||
|
Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, imm_lut, pack_012); |
||||
|
const Node bit = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, shifted_bit, one); |
||||
|
|
||||
|
const Node right = |
||||
|
Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, bit, shift_amount); |
||||
|
|
||||
|
if (i > 0) { |
||||
|
value = Operation(OperationCode::IBitwiseOr, NO_PRECISE, value, right); |
||||
|
} else { |
||||
|
value = right; |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
SetInternalFlagsFromInteger(bb, value, sets_cc); |
||||
|
SetRegister(bb, dest, value); |
||||
|
} |
||||
|
|
||||
|
} // namespace VideoCommon::Shader
|
||||
@ -0,0 +1,96 @@ |
|||||
|
// Copyright 2018 yuzu Emulator Project
|
||||
|
// Licensed under GPLv2 or any later version
|
||||
|
// Refer to the license.txt file included.
|
||||
|
|
||||
|
#include "common/assert.h"
|
||||
|
#include "common/common_types.h"
|
||||
|
#include "video_core/engines/shader_bytecode.h"
|
||||
|
#include "video_core/shader/shader_ir.h"
|
||||
|
|
||||
|
namespace VideoCommon::Shader { |
||||
|
|
||||
|
using Tegra::Shader::Instruction; |
||||
|
using Tegra::Shader::LogicOperation; |
||||
|
using Tegra::Shader::OpCode; |
||||
|
using Tegra::Shader::Pred; |
||||
|
using Tegra::Shader::PredicateResultMode; |
||||
|
using Tegra::Shader::Register; |
||||
|
|
||||
|
u32 ShaderIR::DecodeArithmeticIntegerImmediate(BasicBlock& bb, const BasicBlock& code, u32 pc) { |
||||
|
const Instruction instr = {program_code[pc]}; |
||||
|
const auto opcode = OpCode::Decode(instr); |
||||
|
|
||||
|
Node op_a = GetRegister(instr.gpr8); |
||||
|
Node op_b = Immediate(static_cast<s32>(instr.alu.imm20_32)); |
||||
|
|
||||
|
switch (opcode->get().GetId()) { |
||||
|
case OpCode::Id::IADD32I: { |
||||
|
UNIMPLEMENTED_IF_MSG(instr.iadd32i.saturate, "IADD32I saturation is not implemented"); |
||||
|
|
||||
|
op_a = GetOperandAbsNegInteger(op_a, false, instr.iadd32i.negate_a, true); |
||||
|
|
||||
|
const Node value = Operation(OperationCode::IAdd, PRECISE, op_a, op_b); |
||||
|
|
||||
|
SetInternalFlagsFromInteger(bb, value, instr.op_32.generates_cc); |
||||
|
SetRegister(bb, instr.gpr0, value); |
||||
|
break; |
||||
|
} |
||||
|
case OpCode::Id::LOP32I: { |
||||
|
if (instr.alu.lop32i.invert_a) |
||||
|
op_a = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_a); |
||||
|
|
||||
|
if (instr.alu.lop32i.invert_b) |
||||
|
op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_b); |
||||
|
|
||||
|
WriteLogicOperation(bb, instr.gpr0, instr.alu.lop32i.operation, op_a, op_b, |
||||
|
PredicateResultMode::None, Pred::UnusedIndex, instr.op_32.generates_cc); |
||||
|
break; |
||||
|
} |
||||
|
default: |
||||
|
UNIMPLEMENTED_MSG("Unhandled ArithmeticIntegerImmediate instruction: {}", |
||||
|
opcode->get().GetName()); |
||||
|
} |
||||
|
|
||||
|
return pc; |
||||
|
} |
||||
|
|
||||
|
void ShaderIR::WriteLogicOperation(BasicBlock& bb, Register dest, LogicOperation logic_op, |
||||
|
Node op_a, Node op_b, PredicateResultMode predicate_mode, |
||||
|
Pred predicate, bool sets_cc) { |
||||
|
const Node result = [&]() { |
||||
|
switch (logic_op) { |
||||
|
case LogicOperation::And: |
||||
|
return Operation(OperationCode::IBitwiseAnd, PRECISE, op_a, op_b); |
||||
|
case LogicOperation::Or: |
||||
|
return Operation(OperationCode::IBitwiseOr, PRECISE, op_a, op_b); |
||||
|
case LogicOperation::Xor: |
||||
|
return Operation(OperationCode::IBitwiseXor, PRECISE, op_a, op_b); |
||||
|
case LogicOperation::PassB: |
||||
|
return op_b; |
||||
|
default: |
||||
|
UNIMPLEMENTED_MSG("Unimplemented logic operation={}", static_cast<u32>(logic_op)); |
||||
|
return Immediate(0); |
||||
|
} |
||||
|
}(); |
||||
|
|
||||
|
SetInternalFlagsFromInteger(bb, result, sets_cc); |
||||
|
SetRegister(bb, dest, result); |
||||
|
|
||||
|
// Write the predicate value depending on the predicate mode.
|
||||
|
switch (predicate_mode) { |
||||
|
case PredicateResultMode::None: |
||||
|
// Do nothing.
|
||||
|
return; |
||||
|
case PredicateResultMode::NotZero: { |
||||
|
// Set the predicate to true if the result is not zero.
|
||||
|
const Node compare = Operation(OperationCode::LogicalINotEqual, result, Immediate(0)); |
||||
|
SetPredicate(bb, static_cast<u64>(predicate), compare); |
||||
|
break; |
||||
|
} |
||||
|
default: |
||||
|
UNIMPLEMENTED_MSG("Unimplemented predicate result mode: {}", |
||||
|
static_cast<u32>(predicate_mode)); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
} // namespace VideoCommon::Shader
|
||||
@ -0,0 +1,49 @@ |
|||||
|
// Copyright 2018 yuzu Emulator Project
|
||||
|
// Licensed under GPLv2 or any later version
|
||||
|
// Refer to the license.txt file included.
|
||||
|
|
||||
|
#include "common/assert.h"
|
||||
|
#include "common/common_types.h"
|
||||
|
#include "video_core/engines/shader_bytecode.h"
|
||||
|
#include "video_core/shader/shader_ir.h"
|
||||
|
|
||||
|
namespace VideoCommon::Shader { |
||||
|
|
||||
|
using Tegra::Shader::Instruction; |
||||
|
using Tegra::Shader::OpCode; |
||||
|
|
||||
|
u32 ShaderIR::DecodeBfe(BasicBlock& bb, const BasicBlock& code, u32 pc) { |
||||
|
const Instruction instr = {program_code[pc]}; |
||||
|
const auto opcode = OpCode::Decode(instr); |
||||
|
|
||||
|
UNIMPLEMENTED_IF(instr.bfe.negate_b); |
||||
|
|
||||
|
Node op_a = GetRegister(instr.gpr8); |
||||
|
op_a = GetOperandAbsNegInteger(op_a, false, instr.bfe.negate_a, false); |
||||
|
|
||||
|
switch (opcode->get().GetId()) { |
||||
|
case OpCode::Id::BFE_IMM: { |
||||
|
UNIMPLEMENTED_IF_MSG(instr.generates_cc, |
||||
|
"Condition codes generation in BFE is not implemented"); |
||||
|
|
||||
|
const Node inner_shift_imm = Immediate(static_cast<u32>(instr.bfe.GetLeftShiftValue())); |
||||
|
const Node outer_shift_imm = |
||||
|
Immediate(static_cast<u32>(instr.bfe.GetLeftShiftValue() + instr.bfe.shift_position)); |
||||
|
|
||||
|
const Node inner_shift = |
||||
|
Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, op_a, inner_shift_imm); |
||||
|
const Node outer_shift = |
||||
|
Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, inner_shift, outer_shift_imm); |
||||
|
|
||||
|
SetInternalFlagsFromInteger(bb, outer_shift, instr.generates_cc); |
||||
|
SetRegister(bb, instr.gpr0, outer_shift); |
||||
|
break; |
||||
|
} |
||||
|
default: |
||||
|
UNIMPLEMENTED_MSG("Unhandled BFE instruction: {}", opcode->get().GetName()); |
||||
|
} |
||||
|
|
||||
|
return pc; |
||||
|
} |
||||
|
|
||||
|
} // namespace VideoCommon::Shader
|
||||
@ -0,0 +1,41 @@ |
|||||
|
// Copyright 2018 yuzu Emulator Project
|
||||
|
// Licensed under GPLv2 or any later version
|
||||
|
// Refer to the license.txt file included.
|
||||
|
|
||||
|
#include "common/assert.h"
|
||||
|
#include "common/common_types.h"
|
||||
|
#include "video_core/engines/shader_bytecode.h"
|
||||
|
#include "video_core/shader/shader_ir.h"
|
||||
|
|
||||
|
namespace VideoCommon::Shader { |
||||
|
|
||||
|
using Tegra::Shader::Instruction; |
||||
|
using Tegra::Shader::OpCode; |
||||
|
|
||||
|
u32 ShaderIR::DecodeBfi(BasicBlock& bb, const BasicBlock& code, u32 pc) { |
||||
|
const Instruction instr = {program_code[pc]}; |
||||
|
const auto opcode = OpCode::Decode(instr); |
||||
|
|
||||
|
const auto [base, packed_shift] = [&]() -> std::tuple<Node, Node> { |
||||
|
switch (opcode->get().GetId()) { |
||||
|
case OpCode::Id::BFI_IMM_R: |
||||
|
return {GetRegister(instr.gpr39), Immediate(instr.alu.GetSignedImm20_20())}; |
||||
|
default: |
||||
|
UNREACHABLE(); |
||||
|
return {Immediate(0), Immediate(0)}; |
||||
|
} |
||||
|
}(); |
||||
|
const Node insert = GetRegister(instr.gpr8); |
||||
|
const Node offset = BitfieldExtract(packed_shift, 0, 8); |
||||
|
const Node bits = BitfieldExtract(packed_shift, 8, 8); |
||||
|
|
||||
|
const Node value = |
||||
|
Operation(OperationCode::UBitfieldInsert, PRECISE, base, insert, offset, bits); |
||||
|
|
||||
|
SetInternalFlagsFromInteger(bb, value, instr.generates_cc); |
||||
|
SetRegister(bb, instr.gpr0, value); |
||||
|
|
||||
|
return pc; |
||||
|
} |
||||
|
|
||||
|
} // namespace VideoCommon::Shader
|
||||
@ -0,0 +1,149 @@ |
|||||
|
// Copyright 2018 yuzu Emulator Project
|
||||
|
// Licensed under GPLv2 or any later version
|
||||
|
// Refer to the license.txt file included.
|
||||
|
|
||||
|
#include "common/assert.h"
|
||||
|
#include "common/common_types.h"
|
||||
|
#include "video_core/engines/shader_bytecode.h"
|
||||
|
#include "video_core/shader/shader_ir.h"
|
||||
|
|
||||
|
namespace VideoCommon::Shader { |
||||
|
|
||||
|
using Tegra::Shader::Instruction; |
||||
|
using Tegra::Shader::OpCode; |
||||
|
using Tegra::Shader::Register; |
||||
|
|
||||
|
u32 ShaderIR::DecodeConversion(BasicBlock& bb, const BasicBlock& code, u32 pc) { |
||||
|
const Instruction instr = {program_code[pc]}; |
||||
|
const auto opcode = OpCode::Decode(instr); |
||||
|
|
||||
|
switch (opcode->get().GetId()) { |
||||
|
case OpCode::Id::I2I_R: { |
||||
|
UNIMPLEMENTED_IF(instr.conversion.selector); |
||||
|
|
||||
|
const bool input_signed = instr.conversion.is_input_signed; |
||||
|
const bool output_signed = instr.conversion.is_output_signed; |
||||
|
|
||||
|
Node value = GetRegister(instr.gpr20); |
||||
|
value = ConvertIntegerSize(value, instr.conversion.src_size, input_signed); |
||||
|
|
||||
|
value = GetOperandAbsNegInteger(value, instr.conversion.abs_a, instr.conversion.negate_a, |
||||
|
input_signed); |
||||
|
if (input_signed != output_signed) { |
||||
|
value = SignedOperation(OperationCode::ICastUnsigned, output_signed, NO_PRECISE, value); |
||||
|
} |
||||
|
|
||||
|
SetInternalFlagsFromInteger(bb, value, instr.generates_cc); |
||||
|
SetRegister(bb, instr.gpr0, value); |
||||
|
break; |
||||
|
} |
||||
|
case OpCode::Id::I2F_R: |
||||
|
case OpCode::Id::I2F_C: { |
||||
|
UNIMPLEMENTED_IF(instr.conversion.dest_size != Register::Size::Word); |
||||
|
UNIMPLEMENTED_IF(instr.conversion.selector); |
||||
|
UNIMPLEMENTED_IF_MSG(instr.generates_cc, |
||||
|
"Condition codes generation in I2F is not implemented"); |
||||
|
|
||||
|
Node value = [&]() { |
||||
|
if (instr.is_b_gpr) { |
||||
|
return GetRegister(instr.gpr20); |
||||
|
} else { |
||||
|
return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset); |
||||
|
} |
||||
|
}(); |
||||
|
const bool input_signed = instr.conversion.is_input_signed; |
||||
|
value = ConvertIntegerSize(value, instr.conversion.src_size, input_signed); |
||||
|
value = GetOperandAbsNegInteger(value, instr.conversion.abs_a, false, input_signed); |
||||
|
value = SignedOperation(OperationCode::FCastInteger, input_signed, PRECISE, value); |
||||
|
value = GetOperandAbsNegFloat(value, false, instr.conversion.negate_a); |
||||
|
|
||||
|
SetInternalFlagsFromFloat(bb, value, instr.generates_cc); |
||||
|
SetRegister(bb, instr.gpr0, value); |
||||
|
break; |
||||
|
} |
||||
|
case OpCode::Id::F2F_R: |
||||
|
case OpCode::Id::F2F_C: { |
||||
|
UNIMPLEMENTED_IF(instr.conversion.dest_size != Register::Size::Word); |
||||
|
UNIMPLEMENTED_IF(instr.conversion.src_size != Register::Size::Word); |
||||
|
UNIMPLEMENTED_IF_MSG(instr.generates_cc, |
||||
|
"Condition codes generation in F2F is not implemented"); |
||||
|
|
||||
|
Node value = [&]() { |
||||
|
if (instr.is_b_gpr) { |
||||
|
return GetRegister(instr.gpr20); |
||||
|
} else { |
||||
|
return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset); |
||||
|
} |
||||
|
}(); |
||||
|
|
||||
|
value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a); |
||||
|
|
||||
|
value = [&]() { |
||||
|
switch (instr.conversion.f2f.rounding) { |
||||
|
case Tegra::Shader::F2fRoundingOp::None: |
||||
|
return value; |
||||
|
case Tegra::Shader::F2fRoundingOp::Round: |
||||
|
return Operation(OperationCode::FRoundEven, PRECISE, value); |
||||
|
case Tegra::Shader::F2fRoundingOp::Floor: |
||||
|
return Operation(OperationCode::FFloor, PRECISE, value); |
||||
|
case Tegra::Shader::F2fRoundingOp::Ceil: |
||||
|
return Operation(OperationCode::FCeil, PRECISE, value); |
||||
|
case Tegra::Shader::F2fRoundingOp::Trunc: |
||||
|
return Operation(OperationCode::FTrunc, PRECISE, value); |
||||
|
} |
||||
|
UNIMPLEMENTED_MSG("Unimplemented F2F rounding mode {}", |
||||
|
static_cast<u32>(instr.conversion.f2f.rounding.Value())); |
||||
|
return Immediate(0); |
||||
|
}(); |
||||
|
value = GetSaturatedFloat(value, instr.alu.saturate_d); |
||||
|
|
||||
|
SetInternalFlagsFromFloat(bb, value, instr.generates_cc); |
||||
|
SetRegister(bb, instr.gpr0, value); |
||||
|
break; |
||||
|
} |
||||
|
case OpCode::Id::F2I_R: |
||||
|
case OpCode::Id::F2I_C: { |
||||
|
UNIMPLEMENTED_IF(instr.conversion.src_size != Register::Size::Word); |
||||
|
UNIMPLEMENTED_IF_MSG(instr.generates_cc, |
||||
|
"Condition codes generation in F2I is not implemented"); |
||||
|
Node value = [&]() { |
||||
|
if (instr.is_b_gpr) { |
||||
|
return GetRegister(instr.gpr20); |
||||
|
} else { |
||||
|
return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset); |
||||
|
} |
||||
|
}(); |
||||
|
|
||||
|
value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a); |
||||
|
|
||||
|
value = [&]() { |
||||
|
switch (instr.conversion.f2i.rounding) { |
||||
|
case Tegra::Shader::F2iRoundingOp::None: |
||||
|
return value; |
||||
|
case Tegra::Shader::F2iRoundingOp::Floor: |
||||
|
return Operation(OperationCode::FFloor, PRECISE, value); |
||||
|
case Tegra::Shader::F2iRoundingOp::Ceil: |
||||
|
return Operation(OperationCode::FCeil, PRECISE, value); |
||||
|
case Tegra::Shader::F2iRoundingOp::Trunc: |
||||
|
return Operation(OperationCode::FTrunc, PRECISE, value); |
||||
|
default: |
||||
|
UNIMPLEMENTED_MSG("Unimplemented F2I rounding mode {}", |
||||
|
static_cast<u32>(instr.conversion.f2i.rounding.Value())); |
||||
|
return Immediate(0); |
||||
|
} |
||||
|
}(); |
||||
|
const bool is_signed = instr.conversion.is_output_signed; |
||||
|
value = SignedOperation(OperationCode::ICastFloat, is_signed, PRECISE, value); |
||||
|
value = ConvertIntegerSize(value, instr.conversion.dest_size, is_signed); |
||||
|
|
||||
|
SetRegister(bb, instr.gpr0, value); |
||||
|
break; |
||||
|
} |
||||
|
default: |
||||
|
UNIMPLEMENTED_MSG("Unhandled conversion instruction: {}", opcode->get().GetName()); |
||||
|
} |
||||
|
|
||||
|
return pc; |
||||
|
} |
||||
|
|
||||
|
} // namespace VideoCommon::Shader
|
||||
@ -0,0 +1,59 @@ |
|||||
|
// Copyright 2018 yuzu Emulator Project
|
||||
|
// Licensed under GPLv2 or any later version
|
||||
|
// Refer to the license.txt file included.
|
||||
|
|
||||
|
#include "common/assert.h"
|
||||
|
#include "common/common_types.h"
|
||||
|
#include "video_core/engines/shader_bytecode.h"
|
||||
|
#include "video_core/shader/shader_ir.h"
|
||||
|
|
||||
|
namespace VideoCommon::Shader { |
||||
|
|
||||
|
using Tegra::Shader::Instruction; |
||||
|
using Tegra::Shader::OpCode; |
||||
|
|
||||
|
u32 ShaderIR::DecodeFfma(BasicBlock& bb, const BasicBlock& code, u32 pc) { |
||||
|
const Instruction instr = {program_code[pc]}; |
||||
|
const auto opcode = OpCode::Decode(instr); |
||||
|
|
||||
|
UNIMPLEMENTED_IF_MSG(instr.ffma.cc != 0, "FFMA cc not implemented"); |
||||
|
UNIMPLEMENTED_IF_MSG(instr.ffma.tab5980_0 != 1, "FFMA tab5980_0({}) not implemented", |
||||
|
instr.ffma.tab5980_0.Value()); // Seems to be 1 by default based on SMO
|
||||
|
UNIMPLEMENTED_IF_MSG(instr.ffma.tab5980_1 != 0, "FFMA tab5980_1({}) not implemented", |
||||
|
instr.ffma.tab5980_1.Value()); |
||||
|
|
||||
|
const Node op_a = GetRegister(instr.gpr8); |
||||
|
|
||||
|
auto [op_b, op_c] = [&]() -> std::tuple<Node, Node> { |
||||
|
switch (opcode->get().GetId()) { |
||||
|
case OpCode::Id::FFMA_CR: { |
||||
|
return {GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset), |
||||
|
GetRegister(instr.gpr39)}; |
||||
|
} |
||||
|
case OpCode::Id::FFMA_RR: |
||||
|
return {GetRegister(instr.gpr20), GetRegister(instr.gpr39)}; |
||||
|
case OpCode::Id::FFMA_RC: { |
||||
|
return {GetRegister(instr.gpr39), |
||||
|
GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset)}; |
||||
|
} |
||||
|
case OpCode::Id::FFMA_IMM: |
||||
|
return {GetImmediate19(instr), GetRegister(instr.gpr39)}; |
||||
|
default: |
||||
|
UNIMPLEMENTED_MSG("Unhandled FFMA instruction: {}", opcode->get().GetName()); |
||||
|
return {Immediate(0), Immediate(0)}; |
||||
|
} |
||||
|
}(); |
||||
|
|
||||
|
op_b = GetOperandAbsNegFloat(op_b, false, instr.ffma.negate_b); |
||||
|
op_c = GetOperandAbsNegFloat(op_c, false, instr.ffma.negate_c); |
||||
|
|
||||
|
Node value = Operation(OperationCode::FFma, PRECISE, op_a, op_b, op_c); |
||||
|
value = GetSaturatedFloat(value, instr.alu.saturate_d); |
||||
|
|
||||
|
SetInternalFlagsFromFloat(bb, value, instr.generates_cc); |
||||
|
SetRegister(bb, instr.gpr0, value); |
||||
|
|
||||
|
return pc; |
||||
|
} |
||||
|
|
||||
|
} // namespace VideoCommon::Shader
|
||||
@ -0,0 +1,58 @@ |
|||||
|
// Copyright 2018 yuzu Emulator Project
|
||||
|
// Licensed under GPLv2 or any later version
|
||||
|
// Refer to the license.txt file included.
|
||||
|
|
||||
|
#include "common/assert.h"
|
||||
|
#include "common/common_types.h"
|
||||
|
#include "video_core/engines/shader_bytecode.h"
|
||||
|
#include "video_core/shader/shader_ir.h"
|
||||
|
|
||||
|
namespace VideoCommon::Shader { |
||||
|
|
||||
|
using Tegra::Shader::Instruction; |
||||
|
using Tegra::Shader::OpCode; |
||||
|
|
||||
|
u32 ShaderIR::DecodeFloatSet(BasicBlock& bb, const BasicBlock& code, u32 pc) { |
||||
|
const Instruction instr = {program_code[pc]}; |
||||
|
const auto opcode = OpCode::Decode(instr); |
||||
|
|
||||
|
const Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fset.abs_a != 0, |
||||
|
instr.fset.neg_a != 0); |
||||
|
|
||||
|
Node op_b = [&]() { |
||||
|
if (instr.is_b_imm) { |
||||
|
return GetImmediate19(instr); |
||||
|
} else if (instr.is_b_gpr) { |
||||
|
return GetRegister(instr.gpr20); |
||||
|
} else { |
||||
|
return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset); |
||||
|
} |
||||
|
}(); |
||||
|
|
||||
|
op_b = GetOperandAbsNegFloat(op_b, instr.fset.abs_b != 0, instr.fset.neg_b != 0); |
||||
|
|
||||
|
// The fset instruction sets a register to 1.0 or -1 (depending on the bf bit) if the
|
||||
|
// condition is true, and to 0 otherwise.
|
||||
|
const Node second_pred = GetPredicate(instr.fset.pred39, instr.fset.neg_pred != 0); |
||||
|
|
||||
|
const OperationCode combiner = GetPredicateCombiner(instr.fset.op); |
||||
|
const Node first_pred = GetPredicateComparisonFloat(instr.fset.cond, op_a, op_b); |
||||
|
|
||||
|
const Node predicate = Operation(combiner, first_pred, second_pred); |
||||
|
|
||||
|
const Node true_value = instr.fset.bf ? Immediate(1.0f) : Immediate(-1); |
||||
|
const Node false_value = instr.fset.bf ? Immediate(0.0f) : Immediate(0); |
||||
|
const Node value = |
||||
|
Operation(OperationCode::Select, PRECISE, predicate, true_value, false_value); |
||||
|
|
||||
|
if (instr.fset.bf) { |
||||
|
SetInternalFlagsFromFloat(bb, value, instr.generates_cc); |
||||
|
} else { |
||||
|
SetInternalFlagsFromInteger(bb, value, instr.generates_cc); |
||||
|
} |
||||
|
SetRegister(bb, instr.gpr0, value); |
||||
|
|
||||
|
return pc; |
||||
|
} |
||||
|
|
||||
|
} // namespace VideoCommon::Shader
|
||||
@ -0,0 +1,56 @@ |
|||||
|
// Copyright 2018 yuzu Emulator Project
|
||||
|
// Licensed under GPLv2 or any later version
|
||||
|
// Refer to the license.txt file included.
|
||||
|
|
||||
|
#include "common/assert.h"
|
||||
|
#include "common/common_types.h"
|
||||
|
#include "video_core/engines/shader_bytecode.h"
|
||||
|
#include "video_core/shader/shader_ir.h"
|
||||
|
|
||||
|
namespace VideoCommon::Shader { |
||||
|
|
||||
|
using Tegra::Shader::Instruction; |
||||
|
using Tegra::Shader::OpCode; |
||||
|
using Tegra::Shader::Pred; |
||||
|
|
||||
|
u32 ShaderIR::DecodeFloatSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc) { |
||||
|
const Instruction instr = {program_code[pc]}; |
||||
|
const auto opcode = OpCode::Decode(instr); |
||||
|
|
||||
|
const Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fsetp.abs_a != 0, |
||||
|
instr.fsetp.neg_a != 0); |
||||
|
Node op_b = [&]() { |
||||
|
if (instr.is_b_imm) { |
||||
|
return GetImmediate19(instr); |
||||
|
} else if (instr.is_b_gpr) { |
||||
|
return GetRegister(instr.gpr20); |
||||
|
} else { |
||||
|
return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset); |
||||
|
} |
||||
|
}(); |
||||
|
op_b = GetOperandAbsNegFloat(op_b, instr.fsetp.abs_b, false); |
||||
|
|
||||
|
// We can't use the constant predicate as destination.
|
||||
|
ASSERT(instr.fsetp.pred3 != static_cast<u64>(Pred::UnusedIndex)); |
||||
|
|
||||
|
const Node predicate = GetPredicateComparisonFloat(instr.fsetp.cond, op_a, op_b); |
||||
|
const Node second_pred = GetPredicate(instr.fsetp.pred39, instr.fsetp.neg_pred != 0); |
||||
|
|
||||
|
const OperationCode combiner = GetPredicateCombiner(instr.fsetp.op); |
||||
|
const Node value = Operation(combiner, predicate, second_pred); |
||||
|
|
||||
|
// Set the primary predicate to the result of Predicate OP SecondPredicate
|
||||
|
SetPredicate(bb, instr.fsetp.pred3, value); |
||||
|
|
||||
|
if (instr.fsetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) { |
||||
|
// Set the secondary predicate to the result of !Predicate OP SecondPredicate,
|
||||
|
// if enabled
|
||||
|
const Node negated_pred = Operation(OperationCode::LogicalNegate, predicate); |
||||
|
const Node second_value = Operation(combiner, negated_pred, second_pred); |
||||
|
SetPredicate(bb, instr.fsetp.pred0, second_value); |
||||
|
} |
||||
|
|
||||
|
return pc; |
||||
|
} |
||||
|
|
||||
|
} // namespace VideoCommon::Shader
|
||||
@ -0,0 +1,67 @@ |
|||||
|
// Copyright 2018 yuzu Emulator Project
|
||||
|
// Licensed under GPLv2 or any later version
|
||||
|
// Refer to the license.txt file included.
|
||||
|
|
||||
|
#include <array>
|
||||
|
|
||||
|
#include "common/assert.h"
|
||||
|
#include "common/common_types.h"
|
||||
|
#include "video_core/engines/shader_bytecode.h"
|
||||
|
#include "video_core/shader/shader_ir.h"
|
||||
|
|
||||
|
namespace VideoCommon::Shader { |
||||
|
|
||||
|
using Tegra::Shader::Instruction; |
||||
|
using Tegra::Shader::OpCode; |
||||
|
|
||||
|
u32 ShaderIR::DecodeHalfSet(BasicBlock& bb, const BasicBlock& code, u32 pc) { |
||||
|
const Instruction instr = {program_code[pc]}; |
||||
|
const auto opcode = OpCode::Decode(instr); |
||||
|
|
||||
|
UNIMPLEMENTED_IF(instr.hset2.ftz != 0); |
||||
|
|
||||
|
// instr.hset2.type_a
|
||||
|
// instr.hset2.type_b
|
||||
|
Node op_a = GetRegister(instr.gpr8); |
||||
|
Node op_b = [&]() { |
||||
|
switch (opcode->get().GetId()) { |
||||
|
case OpCode::Id::HSET2_R: |
||||
|
return GetRegister(instr.gpr20); |
||||
|
default: |
||||
|
UNREACHABLE(); |
||||
|
return Immediate(0); |
||||
|
} |
||||
|
}(); |
||||
|
|
||||
|
op_a = GetOperandAbsNegHalf(op_a, instr.hset2.abs_a, instr.hset2.negate_a); |
||||
|
op_b = GetOperandAbsNegHalf(op_b, instr.hset2.abs_b, instr.hset2.negate_b); |
||||
|
|
||||
|
const Node second_pred = GetPredicate(instr.hset2.pred39, instr.hset2.neg_pred); |
||||
|
|
||||
|
MetaHalfArithmetic meta{false, {instr.hset2.type_a, instr.hset2.type_b}}; |
||||
|
const Node comparison_pair = GetPredicateComparisonHalf(instr.hset2.cond, meta, op_a, op_b); |
||||
|
|
||||
|
const OperationCode combiner = GetPredicateCombiner(instr.hset2.op); |
||||
|
|
||||
|
// HSET2 operates on each half float in the pack.
|
||||
|
std::array<Node, 2> values; |
||||
|
for (u32 i = 0; i < 2; ++i) { |
||||
|
const u32 raw_value = instr.hset2.bf ? 0x3c00 : 0xffff; |
||||
|
const Node true_value = Immediate(raw_value << (i * 16)); |
||||
|
const Node false_value = Immediate(0); |
||||
|
|
||||
|
const Node comparison = |
||||
|
Operation(OperationCode::LogicalPick2, comparison_pair, Immediate(i)); |
||||
|
const Node predicate = Operation(combiner, comparison, second_pred); |
||||
|
|
||||
|
values[i] = |
||||
|
Operation(OperationCode::Select, NO_PRECISE, predicate, true_value, false_value); |
||||
|
} |
||||
|
|
||||
|
const Node value = Operation(OperationCode::UBitwiseOr, NO_PRECISE, values[0], values[1]); |
||||
|
SetRegister(bb, instr.gpr0, value); |
||||
|
|
||||
|
return pc; |
||||
|
} |
||||
|
|
||||
|
} // namespace VideoCommon::Shader
|
||||
@ -0,0 +1,62 @@ |
|||||
|
// Copyright 2018 yuzu Emulator Project
|
||||
|
// Licensed under GPLv2 or any later version
|
||||
|
// Refer to the license.txt file included.
|
||||
|
|
||||
|
#include "common/assert.h"
|
||||
|
#include "common/common_types.h"
|
||||
|
#include "video_core/engines/shader_bytecode.h"
|
||||
|
#include "video_core/shader/shader_ir.h"
|
||||
|
|
||||
|
namespace VideoCommon::Shader { |
||||
|
|
||||
|
using Tegra::Shader::Instruction; |
||||
|
using Tegra::Shader::OpCode; |
||||
|
using Tegra::Shader::Pred; |
||||
|
|
||||
|
u32 ShaderIR::DecodeHalfSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc) { |
||||
|
const Instruction instr = {program_code[pc]}; |
||||
|
const auto opcode = OpCode::Decode(instr); |
||||
|
|
||||
|
UNIMPLEMENTED_IF(instr.hsetp2.ftz != 0); |
||||
|
|
||||
|
Node op_a = GetRegister(instr.gpr8); |
||||
|
op_a = GetOperandAbsNegHalf(op_a, instr.hsetp2.abs_a, instr.hsetp2.negate_a); |
||||
|
|
||||
|
const Node op_b = [&]() { |
||||
|
switch (opcode->get().GetId()) { |
||||
|
case OpCode::Id::HSETP2_R: |
||||
|
return GetOperandAbsNegHalf(GetRegister(instr.gpr20), instr.hsetp2.abs_a, |
||||
|
instr.hsetp2.negate_b); |
||||
|
default: |
||||
|
UNREACHABLE(); |
||||
|
return Immediate(0); |
||||
|
} |
||||
|
}(); |
||||
|
|
||||
|
// We can't use the constant predicate as destination.
|
||||
|
ASSERT(instr.hsetp2.pred3 != static_cast<u64>(Pred::UnusedIndex)); |
||||
|
|
||||
|
const Node second_pred = GetPredicate(instr.hsetp2.pred39, instr.hsetp2.neg_pred != 0); |
||||
|
|
||||
|
const OperationCode combiner = GetPredicateCombiner(instr.hsetp2.op); |
||||
|
const OperationCode pair_combiner = |
||||
|
instr.hsetp2.h_and ? OperationCode::LogicalAll2 : OperationCode::LogicalAny2; |
||||
|
|
||||
|
MetaHalfArithmetic meta = {false, {instr.hsetp2.type_a, instr.hsetp2.type_b}}; |
||||
|
const Node comparison = GetPredicateComparisonHalf(instr.hsetp2.cond, meta, op_a, op_b); |
||||
|
const Node first_pred = Operation(pair_combiner, comparison); |
||||
|
|
||||
|
// Set the primary predicate to the result of Predicate OP SecondPredicate
|
||||
|
const Node value = Operation(combiner, first_pred, second_pred); |
||||
|
SetPredicate(bb, instr.hsetp2.pred3, value); |
||||
|
|
||||
|
if (instr.hsetp2.pred0 != static_cast<u64>(Pred::UnusedIndex)) { |
||||
|
// Set the secondary predicate to the result of !Predicate OP SecondPredicate, if enabled
|
||||
|
const Node negated_pred = Operation(OperationCode::LogicalNegate, first_pred); |
||||
|
SetPredicate(bb, instr.hsetp2.pred0, Operation(combiner, negated_pred, second_pred)); |
||||
|
} |
||||
|
|
||||
|
return pc; |
||||
|
} |
||||
|
|
||||
|
} // namespace VideoCommon::Shader
|
||||
@ -0,0 +1,76 @@ |
|||||
|
// Copyright 2018 yuzu Emulator Project
|
||||
|
// Licensed under GPLv2 or any later version
|
||||
|
// Refer to the license.txt file included.
|
||||
|
|
||||
|
#include <tuple>
|
||||
|
|
||||
|
#include "common/assert.h"
|
||||
|
#include "common/common_types.h"
|
||||
|
#include "video_core/engines/shader_bytecode.h"
|
||||
|
#include "video_core/shader/shader_ir.h"
|
||||
|
|
||||
|
namespace VideoCommon::Shader { |
||||
|
|
||||
|
using Tegra::Shader::HalfPrecision; |
||||
|
using Tegra::Shader::HalfType; |
||||
|
using Tegra::Shader::Instruction; |
||||
|
using Tegra::Shader::OpCode; |
||||
|
|
||||
|
u32 ShaderIR::DecodeHfma2(BasicBlock& bb, const BasicBlock& code, u32 pc) { |
||||
|
const Instruction instr = {program_code[pc]}; |
||||
|
const auto opcode = OpCode::Decode(instr); |
||||
|
|
||||
|
if (opcode->get().GetId() == OpCode::Id::HFMA2_RR) { |
||||
|
UNIMPLEMENTED_IF(instr.hfma2.rr.precision != HalfPrecision::None); |
||||
|
} else { |
||||
|
UNIMPLEMENTED_IF(instr.hfma2.precision != HalfPrecision::None); |
||||
|
} |
||||
|
|
||||
|
constexpr auto identity = HalfType::H0_H1; |
||||
|
|
||||
|
const HalfType type_a = instr.hfma2.type_a; |
||||
|
const Node op_a = GetRegister(instr.gpr8); |
||||
|
|
||||
|
bool neg_b{}, neg_c{}; |
||||
|
auto [saturate, type_b, op_b, type_c, |
||||
|
op_c] = [&]() -> std::tuple<bool, HalfType, Node, HalfType, Node> { |
||||
|
switch (opcode->get().GetId()) { |
||||
|
case OpCode::Id::HFMA2_CR: |
||||
|
neg_b = instr.hfma2.negate_b; |
||||
|
neg_c = instr.hfma2.negate_c; |
||||
|
return {instr.hfma2.saturate, instr.hfma2.type_b, |
||||
|
GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset), instr.hfma2.type_reg39, |
||||
|
GetRegister(instr.gpr39)}; |
||||
|
case OpCode::Id::HFMA2_RC: |
||||
|
neg_b = instr.hfma2.negate_b; |
||||
|
neg_c = instr.hfma2.negate_c; |
||||
|
return {instr.hfma2.saturate, instr.hfma2.type_reg39, GetRegister(instr.gpr39), |
||||
|
instr.hfma2.type_b, GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset)}; |
||||
|
case OpCode::Id::HFMA2_RR: |
||||
|
neg_b = instr.hfma2.rr.negate_b; |
||||
|
neg_c = instr.hfma2.rr.negate_c; |
||||
|
return {instr.hfma2.rr.saturate, instr.hfma2.type_b, GetRegister(instr.gpr20), |
||||
|
instr.hfma2.rr.type_c, GetRegister(instr.gpr39)}; |
||||
|
case OpCode::Id::HFMA2_IMM_R: |
||||
|
neg_c = instr.hfma2.negate_c; |
||||
|
return {instr.hfma2.saturate, identity, UnpackHalfImmediate(instr, true), |
||||
|
instr.hfma2.type_reg39, GetRegister(instr.gpr39)}; |
||||
|
default: |
||||
|
return {false, identity, Immediate(0), identity, Immediate(0)}; |
||||
|
} |
||||
|
}(); |
||||
|
UNIMPLEMENTED_IF_MSG(saturate, "HFMA2 saturation is not implemented"); |
||||
|
|
||||
|
op_b = GetOperandAbsNegHalf(op_b, false, neg_b); |
||||
|
op_c = GetOperandAbsNegHalf(op_c, false, neg_c); |
||||
|
|
||||
|
MetaHalfArithmetic meta{true, {type_a, type_b, type_c}}; |
||||
|
Node value = Operation(OperationCode::HFma, meta, op_a, op_b, op_c); |
||||
|
value = HalfMerge(GetRegister(instr.gpr0), value, instr.hfma2.merge); |
||||
|
|
||||
|
SetRegister(bb, instr.gpr0, value); |
||||
|
|
||||
|
return pc; |
||||
|
} |
||||
|
|
||||
|
} // namespace VideoCommon::Shader
|
||||
@ -0,0 +1,50 @@ |
|||||
|
// Copyright 2018 yuzu Emulator Project
|
||||
|
// Licensed under GPLv2 or any later version
|
||||
|
// Refer to the license.txt file included.
|
||||
|
|
||||
|
#include "common/assert.h"
|
||||
|
#include "common/common_types.h"
|
||||
|
#include "video_core/engines/shader_bytecode.h"
|
||||
|
#include "video_core/shader/shader_ir.h"
|
||||
|
|
||||
|
namespace VideoCommon::Shader { |
||||
|
|
||||
|
using Tegra::Shader::Instruction; |
||||
|
using Tegra::Shader::OpCode; |
||||
|
|
||||
|
u32 ShaderIR::DecodeIntegerSet(BasicBlock& bb, const BasicBlock& code, u32 pc) { |
||||
|
const Instruction instr = {program_code[pc]}; |
||||
|
const auto opcode = OpCode::Decode(instr); |
||||
|
|
||||
|
const Node op_a = GetRegister(instr.gpr8); |
||||
|
const Node op_b = [&]() { |
||||
|
if (instr.is_b_imm) { |
||||
|
return Immediate(instr.alu.GetSignedImm20_20()); |
||||
|
} else if (instr.is_b_gpr) { |
||||
|
return GetRegister(instr.gpr20); |
||||
|
} else { |
||||
|
return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset); |
||||
|
} |
||||
|
}(); |
||||
|
|
||||
|
// The iset instruction sets a register to 1.0 or -1 (depending on the bf bit) if the condition
|
||||
|
// is true, and to 0 otherwise.
|
||||
|
const Node second_pred = GetPredicate(instr.iset.pred39, instr.iset.neg_pred != 0); |
||||
|
const Node first_pred = |
||||
|
GetPredicateComparisonInteger(instr.iset.cond, instr.iset.is_signed, op_a, op_b); |
||||
|
|
||||
|
const OperationCode combiner = GetPredicateCombiner(instr.iset.op); |
||||
|
|
||||
|
const Node predicate = Operation(combiner, first_pred, second_pred); |
||||
|
|
||||
|
const Node true_value = instr.iset.bf ? Immediate(1.0f) : Immediate(-1); |
||||
|
const Node false_value = instr.iset.bf ? Immediate(0.0f) : Immediate(0); |
||||
|
const Node value = |
||||
|
Operation(OperationCode::Select, PRECISE, predicate, true_value, false_value); |
||||
|
|
||||
|
SetRegister(bb, instr.gpr0, value); |
||||
|
|
||||
|
return pc; |
||||
|
} |
||||
|
|
||||
|
} // namespace VideoCommon::Shader
|
||||
@ -0,0 +1,53 @@ |
|||||
|
// Copyright 2018 yuzu Emulator Project
|
||||
|
// Licensed under GPLv2 or any later version
|
||||
|
// Refer to the license.txt file included.
|
||||
|
|
||||
|
#include "common/assert.h"
|
||||
|
#include "common/common_types.h"
|
||||
|
#include "video_core/engines/shader_bytecode.h"
|
||||
|
#include "video_core/shader/shader_ir.h"
|
||||
|
|
||||
|
namespace VideoCommon::Shader { |
||||
|
|
||||
|
using Tegra::Shader::Instruction; |
||||
|
using Tegra::Shader::OpCode; |
||||
|
using Tegra::Shader::Pred; |
||||
|
|
||||
|
u32 ShaderIR::DecodeIntegerSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc) { |
||||
|
const Instruction instr = {program_code[pc]}; |
||||
|
const auto opcode = OpCode::Decode(instr); |
||||
|
|
||||
|
const Node op_a = GetRegister(instr.gpr8); |
||||
|
|
||||
|
const Node op_b = [&]() { |
||||
|
if (instr.is_b_imm) { |
||||
|
return Immediate(instr.alu.GetSignedImm20_20()); |
||||
|
} else if (instr.is_b_gpr) { |
||||
|
return GetRegister(instr.gpr20); |
||||
|
} else { |
||||
|
return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset); |
||||
|
} |
||||
|
}(); |
||||
|
|
||||
|
// We can't use the constant predicate as destination.
|
||||
|
ASSERT(instr.isetp.pred3 != static_cast<u64>(Pred::UnusedIndex)); |
||||
|
|
||||
|
const Node second_pred = GetPredicate(instr.isetp.pred39, instr.isetp.neg_pred != 0); |
||||
|
const Node predicate = |
||||
|
GetPredicateComparisonInteger(instr.isetp.cond, instr.isetp.is_signed, op_a, op_b); |
||||
|
|
||||
|
// Set the primary predicate to the result of Predicate OP SecondPredicate
|
||||
|
const OperationCode combiner = GetPredicateCombiner(instr.isetp.op); |
||||
|
const Node value = Operation(combiner, predicate, second_pred); |
||||
|
SetPredicate(bb, instr.isetp.pred3, value); |
||||
|
|
||||
|
if (instr.isetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) { |
||||
|
// Set the secondary predicate to the result of !Predicate OP SecondPredicate, if enabled
|
||||
|
const Node negated_pred = Operation(OperationCode::LogicalNegate, predicate); |
||||
|
SetPredicate(bb, instr.isetp.pred0, Operation(combiner, negated_pred, second_pred)); |
||||
|
} |
||||
|
|
||||
|
return pc; |
||||
|
} |
||||
|
|
||||
|
} // namespace VideoCommon::Shader
|
||||
@ -0,0 +1,688 @@ |
|||||
|
// Copyright 2018 yuzu Emulator Project
|
||||
|
// Licensed under GPLv2 or any later version
|
||||
|
// Refer to the license.txt file included.
|
||||
|
|
||||
|
#include <algorithm>
|
||||
|
#include <vector>
|
||||
|
|
||||
|
#include "common/assert.h"
|
||||
|
#include "common/common_types.h"
|
||||
|
#include "video_core/engines/shader_bytecode.h"
|
||||
|
#include "video_core/shader/shader_ir.h"
|
||||
|
|
||||
|
namespace VideoCommon::Shader { |
||||
|
|
||||
|
using Tegra::Shader::Attribute; |
||||
|
using Tegra::Shader::Instruction; |
||||
|
using Tegra::Shader::OpCode; |
||||
|
using Tegra::Shader::Register; |
||||
|
using Tegra::Shader::TextureMiscMode; |
||||
|
using Tegra::Shader::TextureProcessMode; |
||||
|
using Tegra::Shader::TextureType; |
||||
|
|
||||
|
static std::size_t GetCoordCount(TextureType texture_type) { |
||||
|
switch (texture_type) { |
||||
|
case TextureType::Texture1D: |
||||
|
return 1; |
||||
|
case TextureType::Texture2D: |
||||
|
return 2; |
||||
|
case TextureType::Texture3D: |
||||
|
case TextureType::TextureCube: |
||||
|
return 3; |
||||
|
default: |
||||
|
UNIMPLEMENTED_MSG("Unhandled texture type: {}", static_cast<u32>(texture_type)); |
||||
|
return 0; |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
u32 ShaderIR::DecodeMemory(BasicBlock& bb, const BasicBlock& code, u32 pc) { |
||||
|
const Instruction instr = {program_code[pc]}; |
||||
|
const auto opcode = OpCode::Decode(instr); |
||||
|
|
||||
|
switch (opcode->get().GetId()) { |
||||
|
case OpCode::Id::LD_A: { |
||||
|
// Note: Shouldn't this be interp mode flat? As in no interpolation made.
|
||||
|
UNIMPLEMENTED_IF_MSG(instr.gpr8.Value() != Register::ZeroIndex, |
||||
|
"Indirect attribute loads are not supported"); |
||||
|
UNIMPLEMENTED_IF_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) != 0, |
||||
|
"Unaligned attribute loads are not supported"); |
||||
|
|
||||
|
Tegra::Shader::IpaMode input_mode{Tegra::Shader::IpaInterpMode::Perspective, |
||||
|
Tegra::Shader::IpaSampleMode::Default}; |
||||
|
|
||||
|
u64 next_element = instr.attribute.fmt20.element; |
||||
|
auto next_index = static_cast<u64>(instr.attribute.fmt20.index.Value()); |
||||
|
|
||||
|
const auto LoadNextElement = [&](u32 reg_offset) { |
||||
|
const Node buffer = GetRegister(instr.gpr39); |
||||
|
const Node attribute = GetInputAttribute(static_cast<Attribute::Index>(next_index), |
||||
|
next_element, input_mode, buffer); |
||||
|
|
||||
|
SetRegister(bb, instr.gpr0.Value() + reg_offset, attribute); |
||||
|
|
||||
|
// Load the next attribute element into the following register. If the element
|
||||
|
// to load goes beyond the vec4 size, load the first element of the next
|
||||
|
// attribute.
|
||||
|
next_element = (next_element + 1) % 4; |
||||
|
next_index = next_index + (next_element == 0 ? 1 : 0); |
||||
|
}; |
||||
|
|
||||
|
const u32 num_words = static_cast<u32>(instr.attribute.fmt20.size.Value()) + 1; |
||||
|
for (u32 reg_offset = 0; reg_offset < num_words; ++reg_offset) { |
||||
|
LoadNextElement(reg_offset); |
||||
|
} |
||||
|
break; |
||||
|
} |
||||
|
case OpCode::Id::LD_C: { |
||||
|
UNIMPLEMENTED_IF(instr.ld_c.unknown != 0); |
||||
|
|
||||
|
Node index = GetRegister(instr.gpr8); |
||||
|
|
||||
|
const Node op_a = |
||||
|
GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.offset + 0, index); |
||||
|
|
||||
|
switch (instr.ld_c.type.Value()) { |
||||
|
case Tegra::Shader::UniformType::Single: |
||||
|
SetRegister(bb, instr.gpr0, op_a); |
||||
|
break; |
||||
|
|
||||
|
case Tegra::Shader::UniformType::Double: { |
||||
|
const Node op_b = |
||||
|
GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.offset + 4, index); |
||||
|
|
||||
|
SetTemporal(bb, 0, op_a); |
||||
|
SetTemporal(bb, 1, op_b); |
||||
|
SetRegister(bb, instr.gpr0, GetTemporal(0)); |
||||
|
SetRegister(bb, instr.gpr0.Value() + 1, GetTemporal(1)); |
||||
|
break; |
||||
|
} |
||||
|
default: |
||||
|
UNIMPLEMENTED_MSG("Unhandled type: {}", static_cast<unsigned>(instr.ld_c.type.Value())); |
||||
|
} |
||||
|
break; |
||||
|
} |
||||
|
case OpCode::Id::LD_L: { |
||||
|
UNIMPLEMENTED_IF_MSG(instr.ld_l.unknown == 1, "LD_L Unhandled mode: {}", |
||||
|
static_cast<unsigned>(instr.ld_l.unknown.Value())); |
||||
|
|
||||
|
const Node index = Operation(OperationCode::IAdd, GetRegister(instr.gpr8), |
||||
|
Immediate(static_cast<s32>(instr.smem_imm))); |
||||
|
const Node lmem = GetLocalMemory(index); |
||||
|
|
||||
|
switch (instr.ldst_sl.type.Value()) { |
||||
|
case Tegra::Shader::StoreType::Bytes32: |
||||
|
SetRegister(bb, instr.gpr0, lmem); |
||||
|
break; |
||||
|
default: |
||||
|
UNIMPLEMENTED_MSG("LD_L Unhandled type: {}", |
||||
|
static_cast<unsigned>(instr.ldst_sl.type.Value())); |
||||
|
} |
||||
|
break; |
||||
|
} |
||||
|
case OpCode::Id::ST_A: { |
||||
|
UNIMPLEMENTED_IF_MSG(instr.gpr8.Value() != Register::ZeroIndex, |
||||
|
"Indirect attribute loads are not supported"); |
||||
|
UNIMPLEMENTED_IF_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) != 0, |
||||
|
"Unaligned attribute loads are not supported"); |
||||
|
|
||||
|
u64 next_element = instr.attribute.fmt20.element; |
||||
|
auto next_index = static_cast<u64>(instr.attribute.fmt20.index.Value()); |
||||
|
|
||||
|
const auto StoreNextElement = [&](u32 reg_offset) { |
||||
|
const auto dest = GetOutputAttribute(static_cast<Attribute::Index>(next_index), |
||||
|
next_element, GetRegister(instr.gpr39)); |
||||
|
const auto src = GetRegister(instr.gpr0.Value() + reg_offset); |
||||
|
|
||||
|
bb.push_back(Operation(OperationCode::Assign, dest, src)); |
||||
|
|
||||
|
// Load the next attribute element into the following register. If the element
|
||||
|
// to load goes beyond the vec4 size, load the first element of the next
|
||||
|
// attribute.
|
||||
|
next_element = (next_element + 1) % 4; |
||||
|
next_index = next_index + (next_element == 0 ? 1 : 0); |
||||
|
}; |
||||
|
|
||||
|
const u32 num_words = static_cast<u32>(instr.attribute.fmt20.size.Value()) + 1; |
||||
|
for (u32 reg_offset = 0; reg_offset < num_words; ++reg_offset) { |
||||
|
StoreNextElement(reg_offset); |
||||
|
} |
||||
|
|
||||
|
break; |
||||
|
} |
||||
|
case OpCode::Id::ST_L: { |
||||
|
UNIMPLEMENTED_IF_MSG(instr.st_l.unknown == 0, "ST_L Unhandled mode: {}", |
||||
|
static_cast<u32>(instr.st_l.unknown.Value())); |
||||
|
|
||||
|
const Node index = Operation(OperationCode::IAdd, NO_PRECISE, GetRegister(instr.gpr8), |
||||
|
Immediate(static_cast<s32>(instr.smem_imm))); |
||||
|
|
||||
|
switch (instr.ldst_sl.type.Value()) { |
||||
|
case Tegra::Shader::StoreType::Bytes32: |
||||
|
SetLocalMemory(bb, index, GetRegister(instr.gpr0)); |
||||
|
break; |
||||
|
default: |
||||
|
UNIMPLEMENTED_MSG("ST_L Unhandled type: {}", |
||||
|
static_cast<u32>(instr.ldst_sl.type.Value())); |
||||
|
} |
||||
|
break; |
||||
|
} |
||||
|
case OpCode::Id::TEX: { |
||||
|
UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(TextureMiscMode::AOFFI), |
||||
|
"AOFFI is not implemented"); |
||||
|
|
||||
|
if (instr.tex.UsesMiscMode(TextureMiscMode::NODEP)) { |
||||
|
LOG_WARNING(HW_GPU, "TEX.NODEP implementation is incomplete"); |
||||
|
} |
||||
|
|
||||
|
const TextureType texture_type{instr.tex.texture_type}; |
||||
|
const bool is_array = instr.tex.array != 0; |
||||
|
const bool depth_compare = instr.tex.UsesMiscMode(TextureMiscMode::DC); |
||||
|
const auto process_mode = instr.tex.GetTextureProcessMode(); |
||||
|
WriteTexInstructionFloat( |
||||
|
bb, instr, GetTexCode(instr, texture_type, process_mode, depth_compare, is_array)); |
||||
|
break; |
||||
|
} |
||||
|
case OpCode::Id::TEXS: { |
||||
|
const TextureType texture_type{instr.texs.GetTextureType()}; |
||||
|
const bool is_array{instr.texs.IsArrayTexture()}; |
||||
|
const bool depth_compare = instr.texs.UsesMiscMode(TextureMiscMode::DC); |
||||
|
const auto process_mode = instr.texs.GetTextureProcessMode(); |
||||
|
|
||||
|
if (instr.texs.UsesMiscMode(TextureMiscMode::NODEP)) { |
||||
|
LOG_WARNING(HW_GPU, "TEXS.NODEP implementation is incomplete"); |
||||
|
} |
||||
|
|
||||
|
const Node4 components = |
||||
|
GetTexsCode(instr, texture_type, process_mode, depth_compare, is_array); |
||||
|
|
||||
|
if (instr.texs.fp32_flag) { |
||||
|
WriteTexsInstructionFloat(bb, instr, components); |
||||
|
} else { |
||||
|
WriteTexsInstructionHalfFloat(bb, instr, components); |
||||
|
} |
||||
|
break; |
||||
|
} |
||||
|
case OpCode::Id::TLD4: { |
||||
|
ASSERT(instr.tld4.array == 0); |
||||
|
UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::AOFFI), |
||||
|
"AOFFI is not implemented"); |
||||
|
UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::NDV), |
||||
|
"NDV is not implemented"); |
||||
|
UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::PTP), |
||||
|
"PTP is not implemented"); |
||||
|
|
||||
|
if (instr.tld4.UsesMiscMode(TextureMiscMode::NODEP)) { |
||||
|
LOG_WARNING(HW_GPU, "TLD4.NODEP implementation is incomplete"); |
||||
|
} |
||||
|
|
||||
|
const auto texture_type = instr.tld4.texture_type.Value(); |
||||
|
const bool depth_compare = instr.tld4.UsesMiscMode(TextureMiscMode::DC); |
||||
|
const bool is_array = instr.tld4.array != 0; |
||||
|
WriteTexInstructionFloat(bb, instr, |
||||
|
GetTld4Code(instr, texture_type, depth_compare, is_array)); |
||||
|
break; |
||||
|
} |
||||
|
case OpCode::Id::TLD4S: { |
||||
|
UNIMPLEMENTED_IF_MSG(instr.tld4s.UsesMiscMode(TextureMiscMode::AOFFI), |
||||
|
"AOFFI is not implemented"); |
||||
|
|
||||
|
if (instr.tld4s.UsesMiscMode(TextureMiscMode::NODEP)) { |
||||
|
LOG_WARNING(HW_GPU, "TLD4S.NODEP implementation is incomplete"); |
||||
|
} |
||||
|
|
||||
|
const bool depth_compare = instr.tld4s.UsesMiscMode(TextureMiscMode::DC); |
||||
|
const Node op_a = GetRegister(instr.gpr8); |
||||
|
const Node op_b = GetRegister(instr.gpr20); |
||||
|
|
||||
|
std::vector<Node> coords; |
||||
|
|
||||
|
// TODO(Subv): Figure out how the sampler type is encoded in the TLD4S instruction.
|
||||
|
if (depth_compare) { |
||||
|
// Note: TLD4S coordinate encoding works just like TEXS's
|
||||
|
const Node op_y = GetRegister(instr.gpr8.Value() + 1); |
||||
|
coords.push_back(op_a); |
||||
|
coords.push_back(op_y); |
||||
|
coords.push_back(op_b); |
||||
|
} else { |
||||
|
coords.push_back(op_a); |
||||
|
coords.push_back(op_b); |
||||
|
} |
||||
|
const auto num_coords = static_cast<u32>(coords.size()); |
||||
|
coords.push_back(Immediate(static_cast<u32>(instr.tld4s.component))); |
||||
|
|
||||
|
const auto& sampler = |
||||
|
GetSampler(instr.sampler, TextureType::Texture2D, false, depth_compare); |
||||
|
|
||||
|
Node4 values; |
||||
|
for (u32 element = 0; element < values.size(); ++element) { |
||||
|
auto params = coords; |
||||
|
MetaTexture meta{sampler, element, num_coords}; |
||||
|
values[element] = |
||||
|
Operation(OperationCode::F4TextureGather, std::move(meta), std::move(params)); |
||||
|
} |
||||
|
|
||||
|
WriteTexsInstructionFloat(bb, instr, values); |
||||
|
break; |
||||
|
} |
||||
|
case OpCode::Id::TXQ: { |
||||
|
if (instr.txq.UsesMiscMode(TextureMiscMode::NODEP)) { |
||||
|
LOG_WARNING(HW_GPU, "TXQ.NODEP implementation is incomplete"); |
||||
|
} |
||||
|
|
||||
|
// TODO: The new commits on the texture refactor, change the way samplers work.
|
||||
|
// Sadly, not all texture instructions specify the type of texture their sampler
|
||||
|
// uses. This must be fixed at a later instance.
|
||||
|
const auto& sampler = |
||||
|
GetSampler(instr.sampler, Tegra::Shader::TextureType::Texture2D, false, false); |
||||
|
|
||||
|
switch (instr.txq.query_type) { |
||||
|
case Tegra::Shader::TextureQueryType::Dimension: { |
||||
|
for (u32 element = 0; element < 4; ++element) { |
||||
|
MetaTexture meta{sampler, element}; |
||||
|
const Node value = Operation(OperationCode::F4TextureQueryDimensions, |
||||
|
std::move(meta), GetRegister(instr.gpr8)); |
||||
|
SetTemporal(bb, element, value); |
||||
|
} |
||||
|
for (u32 i = 0; i < 4; ++i) { |
||||
|
SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i)); |
||||
|
} |
||||
|
break; |
||||
|
} |
||||
|
default: |
||||
|
UNIMPLEMENTED_MSG("Unhandled texture query type: {}", |
||||
|
static_cast<u32>(instr.txq.query_type.Value())); |
||||
|
} |
||||
|
break; |
||||
|
} |
||||
|
case OpCode::Id::TMML: { |
||||
|
UNIMPLEMENTED_IF_MSG(instr.tmml.UsesMiscMode(Tegra::Shader::TextureMiscMode::NDV), |
||||
|
"NDV is not implemented"); |
||||
|
|
||||
|
if (instr.tmml.UsesMiscMode(TextureMiscMode::NODEP)) { |
||||
|
LOG_WARNING(HW_GPU, "TMML.NODEP implementation is incomplete"); |
||||
|
} |
||||
|
|
||||
|
auto texture_type = instr.tmml.texture_type.Value(); |
||||
|
const bool is_array = instr.tmml.array != 0; |
||||
|
const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false); |
||||
|
|
||||
|
std::vector<Node> coords; |
||||
|
|
||||
|
// TODO: Add coordinates for different samplers once other texture types are implemented.
|
||||
|
switch (texture_type) { |
||||
|
case TextureType::Texture1D: |
||||
|
coords.push_back(GetRegister(instr.gpr8)); |
||||
|
break; |
||||
|
case TextureType::Texture2D: |
||||
|
coords.push_back(GetRegister(instr.gpr8.Value() + 0)); |
||||
|
coords.push_back(GetRegister(instr.gpr8.Value() + 1)); |
||||
|
break; |
||||
|
default: |
||||
|
UNIMPLEMENTED_MSG("Unhandled texture type {}", static_cast<u32>(texture_type)); |
||||
|
|
||||
|
// Fallback to interpreting as a 2D texture for now
|
||||
|
coords.push_back(GetRegister(instr.gpr8.Value() + 0)); |
||||
|
coords.push_back(GetRegister(instr.gpr8.Value() + 1)); |
||||
|
texture_type = TextureType::Texture2D; |
||||
|
} |
||||
|
|
||||
|
for (u32 element = 0; element < 2; ++element) { |
||||
|
auto params = coords; |
||||
|
MetaTexture meta_texture{sampler, element, static_cast<u32>(coords.size())}; |
||||
|
const Node value = |
||||
|
Operation(OperationCode::F4TextureQueryLod, meta_texture, std::move(params)); |
||||
|
SetTemporal(bb, element, value); |
||||
|
} |
||||
|
for (u32 element = 0; element < 2; ++element) { |
||||
|
SetRegister(bb, instr.gpr0.Value() + element, GetTemporal(element)); |
||||
|
} |
||||
|
|
||||
|
break; |
||||
|
} |
||||
|
case OpCode::Id::TLDS: { |
||||
|
const Tegra::Shader::TextureType texture_type{instr.tlds.GetTextureType()}; |
||||
|
const bool is_array{instr.tlds.IsArrayTexture()}; |
||||
|
|
||||
|
UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::AOFFI), |
||||
|
"AOFFI is not implemented"); |
||||
|
UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::MZ), "MZ is not implemented"); |
||||
|
|
||||
|
if (instr.tlds.UsesMiscMode(TextureMiscMode::NODEP)) { |
||||
|
LOG_WARNING(HW_GPU, "TMML.NODEP implementation is incomplete"); |
||||
|
} |
||||
|
|
||||
|
WriteTexsInstructionFloat(bb, instr, GetTldsCode(instr, texture_type, is_array)); |
||||
|
break; |
||||
|
} |
||||
|
default: |
||||
|
UNIMPLEMENTED_MSG("Unhandled memory instruction: {}", opcode->get().GetName()); |
||||
|
} |
||||
|
|
||||
|
return pc; |
||||
|
} |
||||
|
|
||||
|
const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, TextureType type, |
||||
|
bool is_array, bool is_shadow) { |
||||
|
const auto offset = static_cast<std::size_t>(sampler.index.Value()); |
||||
|
|
||||
|
// If this sampler has already been used, return the existing mapping.
|
||||
|
const auto itr = |
||||
|
std::find_if(used_samplers.begin(), used_samplers.end(), |
||||
|
[&](const Sampler& entry) { return entry.GetOffset() == offset; }); |
||||
|
if (itr != used_samplers.end()) { |
||||
|
ASSERT(itr->GetType() == type && itr->IsArray() == is_array && |
||||
|
itr->IsShadow() == is_shadow); |
||||
|
return *itr; |
||||
|
} |
||||
|
|
||||
|
// Otherwise create a new mapping for this sampler
|
||||
|
const std::size_t next_index = used_samplers.size(); |
||||
|
const Sampler entry{offset, next_index, type, is_array, is_shadow}; |
||||
|
return *used_samplers.emplace(entry).first; |
||||
|
} |
||||
|
|
||||
|
void ShaderIR::WriteTexInstructionFloat(BasicBlock& bb, Instruction instr, |
||||
|
const Node4& components) { |
||||
|
u32 dest_elem = 0; |
||||
|
for (u32 elem = 0; elem < 4; ++elem) { |
||||
|
if (!instr.tex.IsComponentEnabled(elem)) { |
||||
|
// Skip disabled components
|
||||
|
continue; |
||||
|
} |
||||
|
SetTemporal(bb, dest_elem++, components[elem]); |
||||
|
} |
||||
|
// After writing values in temporals, move them to the real registers
|
||||
|
for (u32 i = 0; i < dest_elem; ++i) { |
||||
|
SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i)); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
void ShaderIR::WriteTexsInstructionFloat(BasicBlock& bb, Instruction instr, |
||||
|
const Node4& components) { |
||||
|
// TEXS has two destination registers and a swizzle. The first two elements in the swizzle
|
||||
|
// go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1
|
||||
|
|
||||
|
u32 dest_elem = 0; |
||||
|
for (u32 component = 0; component < 4; ++component) { |
||||
|
if (!instr.texs.IsComponentEnabled(component)) |
||||
|
continue; |
||||
|
SetTemporal(bb, dest_elem++, components[component]); |
||||
|
} |
||||
|
|
||||
|
for (u32 i = 0; i < dest_elem; ++i) { |
||||
|
if (i < 2) { |
||||
|
// Write the first two swizzle components to gpr0 and gpr0+1
|
||||
|
SetRegister(bb, instr.gpr0.Value() + i % 2, GetTemporal(i)); |
||||
|
} else { |
||||
|
ASSERT(instr.texs.HasTwoDestinations()); |
||||
|
// Write the rest of the swizzle components to gpr28 and gpr28+1
|
||||
|
SetRegister(bb, instr.gpr28.Value() + i % 2, GetTemporal(i)); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
void ShaderIR::WriteTexsInstructionHalfFloat(BasicBlock& bb, Instruction instr, |
||||
|
const Node4& components) { |
||||
|
// TEXS.F16 destionation registers are packed in two registers in pairs (just like any half
|
||||
|
// float instruction).
|
||||
|
|
||||
|
Node4 values; |
||||
|
u32 dest_elem = 0; |
||||
|
for (u32 component = 0; component < 4; ++component) { |
||||
|
if (!instr.texs.IsComponentEnabled(component)) |
||||
|
continue; |
||||
|
values[dest_elem++] = components[component]; |
||||
|
} |
||||
|
if (dest_elem == 0) |
||||
|
return; |
||||
|
|
||||
|
std::generate(values.begin() + dest_elem, values.end(), [&]() { return Immediate(0); }); |
||||
|
|
||||
|
const Node first_value = Operation(OperationCode::HPack2, values[0], values[1]); |
||||
|
if (dest_elem <= 2) { |
||||
|
SetRegister(bb, instr.gpr0, first_value); |
||||
|
return; |
||||
|
} |
||||
|
|
||||
|
SetTemporal(bb, 0, first_value); |
||||
|
SetTemporal(bb, 1, Operation(OperationCode::HPack2, values[2], values[3])); |
||||
|
|
||||
|
SetRegister(bb, instr.gpr0, GetTemporal(0)); |
||||
|
SetRegister(bb, instr.gpr28, GetTemporal(1)); |
||||
|
} |
||||
|
|
||||
|
Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, |
||||
|
TextureProcessMode process_mode, bool depth_compare, bool is_array, |
||||
|
std::size_t array_offset, std::size_t bias_offset, |
||||
|
std::vector<Node>&& coords) { |
||||
|
UNIMPLEMENTED_IF_MSG( |
||||
|
(texture_type == TextureType::Texture3D && (is_array || depth_compare)) || |
||||
|
(texture_type == TextureType::TextureCube && is_array && depth_compare), |
||||
|
"This method is not supported."); |
||||
|
|
||||
|
const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, depth_compare); |
||||
|
|
||||
|
const bool lod_needed = process_mode == TextureProcessMode::LZ || |
||||
|
process_mode == TextureProcessMode::LL || |
||||
|
process_mode == TextureProcessMode::LLA; |
||||
|
|
||||
|
// LOD selection (either via bias or explicit textureLod) not supported in GL for
|
||||
|
// sampler2DArrayShadow and samplerCubeArrayShadow.
|
||||
|
const bool gl_lod_supported = |
||||
|
!((texture_type == Tegra::Shader::TextureType::Texture2D && is_array && depth_compare) || |
||||
|
(texture_type == Tegra::Shader::TextureType::TextureCube && is_array && depth_compare)); |
||||
|
|
||||
|
const OperationCode read_method = |
||||
|
lod_needed && gl_lod_supported ? OperationCode::F4TextureLod : OperationCode::F4Texture; |
||||
|
|
||||
|
UNIMPLEMENTED_IF(process_mode != TextureProcessMode::None && !gl_lod_supported); |
||||
|
|
||||
|
std::optional<u32> array_offset_value; |
||||
|
if (is_array) |
||||
|
array_offset_value = static_cast<u32>(array_offset); |
||||
|
|
||||
|
const auto coords_count = static_cast<u32>(coords.size()); |
||||
|
|
||||
|
if (process_mode != TextureProcessMode::None && gl_lod_supported) { |
||||
|
if (process_mode == TextureProcessMode::LZ) { |
||||
|
coords.push_back(Immediate(0.0f)); |
||||
|
} else { |
||||
|
// If present, lod or bias are always stored in the register indexed by the gpr20
|
||||
|
// field with an offset depending on the usage of the other registers
|
||||
|
coords.push_back(GetRegister(instr.gpr20.Value() + bias_offset)); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
Node4 values; |
||||
|
for (u32 element = 0; element < values.size(); ++element) { |
||||
|
auto params = coords; |
||||
|
MetaTexture meta{sampler, element, coords_count, array_offset_value}; |
||||
|
values[element] = Operation(read_method, std::move(meta), std::move(params)); |
||||
|
} |
||||
|
|
||||
|
return values; |
||||
|
} |
||||
|
|
||||
|
Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type, |
||||
|
TextureProcessMode process_mode, bool depth_compare, bool is_array) { |
||||
|
const bool lod_bias_enabled = |
||||
|
(process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ); |
||||
|
|
||||
|
const auto [coord_count, total_coord_count] = ValidateAndGetCoordinateElement( |
||||
|
texture_type, depth_compare, is_array, lod_bias_enabled, 4, 5); |
||||
|
// If enabled arrays index is always stored in the gpr8 field
|
||||
|
const u64 array_register = instr.gpr8.Value(); |
||||
|
// First coordinate index is the gpr8 or gpr8 + 1 when arrays are used
|
||||
|
const u64 coord_register = array_register + (is_array ? 1 : 0); |
||||
|
|
||||
|
std::vector<Node> coords; |
||||
|
for (std::size_t i = 0; i < coord_count; ++i) { |
||||
|
coords.push_back(GetRegister(coord_register + i)); |
||||
|
} |
||||
|
// 1D.DC in opengl the 2nd component is ignored.
|
||||
|
if (depth_compare && !is_array && texture_type == TextureType::Texture1D) { |
||||
|
coords.push_back(Immediate(0.0f)); |
||||
|
} |
||||
|
std::size_t array_offset{}; |
||||
|
if (is_array) { |
||||
|
array_offset = coords.size(); |
||||
|
coords.push_back(GetRegister(array_register)); |
||||
|
} |
||||
|
if (depth_compare) { |
||||
|
// Depth is always stored in the register signaled by gpr20
|
||||
|
// or in the next register if lod or bias are used
|
||||
|
const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0); |
||||
|
coords.push_back(GetRegister(depth_register)); |
||||
|
} |
||||
|
// Fill ignored coordinates
|
||||
|
while (coords.size() < total_coord_count) { |
||||
|
coords.push_back(Immediate(0)); |
||||
|
} |
||||
|
|
||||
|
return GetTextureCode(instr, texture_type, process_mode, depth_compare, is_array, array_offset, |
||||
|
0, std::move(coords)); |
||||
|
} |
||||
|
|
||||
|
Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type, |
||||
|
TextureProcessMode process_mode, bool depth_compare, bool is_array) { |
||||
|
const bool lod_bias_enabled = |
||||
|
(process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ); |
||||
|
|
||||
|
const auto [coord_count, total_coord_count] = ValidateAndGetCoordinateElement( |
||||
|
texture_type, depth_compare, is_array, lod_bias_enabled, 4, 4); |
||||
|
// If enabled arrays index is always stored in the gpr8 field
|
||||
|
const u64 array_register = instr.gpr8.Value(); |
||||
|
// First coordinate index is stored in gpr8 field or (gpr8 + 1) when arrays are used
|
||||
|
const u64 coord_register = array_register + (is_array ? 1 : 0); |
||||
|
const u64 last_coord_register = |
||||
|
(is_array || !(lod_bias_enabled || depth_compare) || (coord_count > 2)) |
||||
|
? static_cast<u64>(instr.gpr20.Value()) |
||||
|
: coord_register + 1; |
||||
|
|
||||
|
std::vector<Node> coords; |
||||
|
for (std::size_t i = 0; i < coord_count; ++i) { |
||||
|
const bool last = (i == (coord_count - 1)) && (coord_count > 1); |
||||
|
coords.push_back(GetRegister(last ? last_coord_register : coord_register + i)); |
||||
|
} |
||||
|
|
||||
|
std::size_t array_offset{}; |
||||
|
if (is_array) { |
||||
|
array_offset = coords.size(); |
||||
|
coords.push_back(GetRegister(array_register)); |
||||
|
} |
||||
|
if (depth_compare) { |
||||
|
// Depth is always stored in the register signaled by gpr20
|
||||
|
// or in the next register if lod or bias are used
|
||||
|
const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0); |
||||
|
coords.push_back(GetRegister(depth_register)); |
||||
|
} |
||||
|
// Fill ignored coordinates
|
||||
|
while (coords.size() < total_coord_count) { |
||||
|
coords.push_back(Immediate(0)); |
||||
|
} |
||||
|
|
||||
|
return GetTextureCode(instr, texture_type, process_mode, depth_compare, is_array, array_offset, |
||||
|
(coord_count > 2 ? 1 : 0), std::move(coords)); |
||||
|
} |
||||
|
|
||||
|
Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare, |
||||
|
bool is_array) { |
||||
|
const std::size_t coord_count = GetCoordCount(texture_type); |
||||
|
const std::size_t total_coord_count = coord_count + (is_array ? 1 : 0); |
||||
|
const std::size_t total_reg_count = total_coord_count + (depth_compare ? 1 : 0); |
||||
|
|
||||
|
// If enabled arrays index is always stored in the gpr8 field
|
||||
|
const u64 array_register = instr.gpr8.Value(); |
||||
|
// First coordinate index is the gpr8 or gpr8 + 1 when arrays are used
|
||||
|
const u64 coord_register = array_register + (is_array ? 1 : 0); |
||||
|
|
||||
|
std::vector<Node> coords; |
||||
|
|
||||
|
for (size_t i = 0; i < coord_count; ++i) { |
||||
|
coords.push_back(GetRegister(coord_register + i)); |
||||
|
} |
||||
|
std::optional<u32> array_offset; |
||||
|
if (is_array) { |
||||
|
array_offset = static_cast<u32>(coords.size()); |
||||
|
coords.push_back(GetRegister(array_register)); |
||||
|
} |
||||
|
|
||||
|
const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, depth_compare); |
||||
|
|
||||
|
Node4 values; |
||||
|
for (u32 element = 0; element < values.size(); ++element) { |
||||
|
auto params = coords; |
||||
|
MetaTexture meta{sampler, element, static_cast<u32>(coords.size()), array_offset}; |
||||
|
values[element] = |
||||
|
Operation(OperationCode::F4TextureGather, std::move(meta), std::move(params)); |
||||
|
} |
||||
|
|
||||
|
return values; |
||||
|
} |
||||
|
|
||||
|
Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is_array) { |
||||
|
const std::size_t type_coord_count = GetCoordCount(texture_type); |
||||
|
const std::size_t total_coord_count = type_coord_count + (is_array ? 1 : 0); |
||||
|
const bool lod_enabled = instr.tlds.GetTextureProcessMode() == TextureProcessMode::LL; |
||||
|
|
||||
|
// If enabled arrays index is always stored in the gpr8 field
|
||||
|
const u64 array_register = instr.gpr8.Value(); |
||||
|
// if is array gpr20 is used
|
||||
|
const u64 coord_register = is_array ? instr.gpr20.Value() : instr.gpr8.Value(); |
||||
|
|
||||
|
const u64 last_coord_register = |
||||
|
((type_coord_count > 2) || (type_coord_count == 2 && !lod_enabled)) && !is_array |
||||
|
? static_cast<u64>(instr.gpr20.Value()) |
||||
|
: coord_register + 1; |
||||
|
|
||||
|
std::vector<Node> coords; |
||||
|
|
||||
|
for (std::size_t i = 0; i < type_coord_count; ++i) { |
||||
|
const bool last = (i == (type_coord_count - 1)) && (type_coord_count > 1); |
||||
|
coords.push_back(GetRegister(last ? last_coord_register : coord_register + i)); |
||||
|
} |
||||
|
std::optional<u32> array_offset; |
||||
|
if (is_array) { |
||||
|
array_offset = static_cast<u32>(coords.size()); |
||||
|
coords.push_back(GetRegister(array_register)); |
||||
|
} |
||||
|
const auto coords_count = static_cast<u32>(coords.size()); |
||||
|
|
||||
|
if (lod_enabled) { |
||||
|
// When lod is used always is in grp20
|
||||
|
coords.push_back(GetRegister(instr.gpr20)); |
||||
|
} else { |
||||
|
coords.push_back(Immediate(0)); |
||||
|
} |
||||
|
|
||||
|
const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false); |
||||
|
|
||||
|
Node4 values; |
||||
|
for (u32 element = 0; element < values.size(); ++element) { |
||||
|
auto params = coords; |
||||
|
MetaTexture meta{sampler, element, coords_count, array_offset}; |
||||
|
values[element] = |
||||
|
Operation(OperationCode::F4TexelFetch, std::move(meta), std::move(params)); |
||||
|
} |
||||
|
return values; |
||||
|
} |
||||
|
|
||||
|
std::tuple<std::size_t, std::size_t> ShaderIR::ValidateAndGetCoordinateElement( |
||||
|
TextureType texture_type, bool depth_compare, bool is_array, bool lod_bias_enabled, |
||||
|
std::size_t max_coords, std::size_t max_inputs) { |
||||
|
const std::size_t coord_count = GetCoordCount(texture_type); |
||||
|
|
||||
|
std::size_t total_coord_count = coord_count + (is_array ? 1 : 0) + (depth_compare ? 1 : 0); |
||||
|
const std::size_t total_reg_count = total_coord_count + (lod_bias_enabled ? 1 : 0); |
||||
|
if (total_coord_count > max_coords || total_reg_count > max_inputs) { |
||||
|
UNIMPLEMENTED_MSG("Unsupported Texture operation"); |
||||
|
total_coord_count = std::min(total_coord_count, max_coords); |
||||
|
} |
||||
|
// 1D.DC OpenGL is using a vec3 but 2nd component is ignored later.
|
||||
|
total_coord_count += |
||||
|
(depth_compare && !is_array && texture_type == TextureType::Texture1D) ? 1 : 0; |
||||
|
|
||||
|
return {coord_count, total_coord_count}; |
||||
|
} |
||||
|
|
||||
|
} // namespace VideoCommon::Shader
|
||||
@ -0,0 +1,178 @@ |
|||||
|
// Copyright 2018 yuzu Emulator Project
|
||||
|
// Licensed under GPLv2 or any later version
|
||||
|
// Refer to the license.txt file included.
|
||||
|
|
||||
|
#include "common/assert.h"
|
||||
|
#include "common/common_types.h"
|
||||
|
#include "video_core/engines/shader_bytecode.h"
|
||||
|
#include "video_core/shader/shader_ir.h"
|
||||
|
|
||||
|
namespace VideoCommon::Shader { |
||||
|
|
||||
|
using Tegra::Shader::ConditionCode; |
||||
|
using Tegra::Shader::Instruction; |
||||
|
using Tegra::Shader::OpCode; |
||||
|
using Tegra::Shader::Register; |
||||
|
|
||||
|
u32 ShaderIR::DecodeOther(BasicBlock& bb, const BasicBlock& code, u32 pc) { |
||||
|
const Instruction instr = {program_code[pc]}; |
||||
|
const auto opcode = OpCode::Decode(instr); |
||||
|
|
||||
|
switch (opcode->get().GetId()) { |
||||
|
case OpCode::Id::EXIT: { |
||||
|
const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; |
||||
|
UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "EXIT condition code used: {}", |
||||
|
static_cast<u32>(cc)); |
||||
|
|
||||
|
switch (instr.flow.cond) { |
||||
|
case Tegra::Shader::FlowCondition::Always: |
||||
|
bb.push_back(Operation(OperationCode::Exit)); |
||||
|
if (instr.pred.pred_index == static_cast<u64>(Tegra::Shader::Pred::UnusedIndex)) { |
||||
|
// If this is an unconditional exit then just end processing here,
|
||||
|
// otherwise we have to account for the possibility of the condition
|
||||
|
// not being met, so continue processing the next instruction.
|
||||
|
pc = MAX_PROGRAM_LENGTH - 1; |
||||
|
} |
||||
|
break; |
||||
|
|
||||
|
case Tegra::Shader::FlowCondition::Fcsm_Tr: |
||||
|
// TODO(bunnei): What is this used for? If we assume this conditon is not
|
||||
|
// satisifed, dual vertex shaders in Farming Simulator make more sense
|
||||
|
UNIMPLEMENTED_MSG("Skipping unknown FlowCondition::Fcsm_Tr"); |
||||
|
break; |
||||
|
|
||||
|
default: |
||||
|
UNIMPLEMENTED_MSG("Unhandled flow condition: {}", |
||||
|
static_cast<u32>(instr.flow.cond.Value())); |
||||
|
} |
||||
|
break; |
||||
|
} |
||||
|
case OpCode::Id::KIL: { |
||||
|
UNIMPLEMENTED_IF(instr.flow.cond != Tegra::Shader::FlowCondition::Always); |
||||
|
|
||||
|
const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; |
||||
|
UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "KIL condition code used: {}", |
||||
|
static_cast<u32>(cc)); |
||||
|
|
||||
|
bb.push_back(Operation(OperationCode::Discard)); |
||||
|
break; |
||||
|
} |
||||
|
case OpCode::Id::MOV_SYS: { |
||||
|
switch (instr.sys20) { |
||||
|
case Tegra::Shader::SystemVariable::InvocationInfo: { |
||||
|
LOG_WARNING(HW_GPU, "MOV_SYS instruction with InvocationInfo is incomplete"); |
||||
|
SetRegister(bb, instr.gpr0, Immediate(0u)); |
||||
|
break; |
||||
|
} |
||||
|
case Tegra::Shader::SystemVariable::Ydirection: { |
||||
|
// Config pack's third value is Y_NEGATE's state.
|
||||
|
SetRegister(bb, instr.gpr0, Operation(OperationCode::YNegate)); |
||||
|
break; |
||||
|
} |
||||
|
default: |
||||
|
UNIMPLEMENTED_MSG("Unhandled system move: {}", static_cast<u32>(instr.sys20.Value())); |
||||
|
} |
||||
|
break; |
||||
|
} |
||||
|
case OpCode::Id::BRA: { |
||||
|
UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, |
||||
|
"BRA with constant buffers are not implemented"); |
||||
|
|
||||
|
const u32 target = pc + instr.bra.GetBranchTarget(); |
||||
|
const Node branch = Operation(OperationCode::Branch, Immediate(target)); |
||||
|
|
||||
|
const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; |
||||
|
if (cc != Tegra::Shader::ConditionCode::T) { |
||||
|
bb.push_back(Conditional(GetConditionCode(cc), {branch})); |
||||
|
} else { |
||||
|
bb.push_back(branch); |
||||
|
} |
||||
|
break; |
||||
|
} |
||||
|
case OpCode::Id::SSY: { |
||||
|
UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, |
||||
|
"Constant buffer flow is not supported"); |
||||
|
|
||||
|
// The SSY opcode tells the GPU where to re-converge divergent execution paths, it sets the
|
||||
|
// target of the jump that the SYNC instruction will make. The SSY opcode has a similar
|
||||
|
// structure to the BRA opcode.
|
||||
|
const u32 target = pc + instr.bra.GetBranchTarget(); |
||||
|
bb.push_back(Operation(OperationCode::PushFlowStack, Immediate(target))); |
||||
|
break; |
||||
|
} |
||||
|
case OpCode::Id::PBK: { |
||||
|
UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, |
||||
|
"Constant buffer PBK is not supported"); |
||||
|
|
||||
|
// PBK pushes to a stack the address where BRK will jump to. This shares stack with SSY but
|
||||
|
// using SYNC on a PBK address will kill the shader execution. We don't emulate this because
|
||||
|
// it's very unlikely a driver will emit such invalid shader.
|
||||
|
const u32 target = pc + instr.bra.GetBranchTarget(); |
||||
|
bb.push_back(Operation(OperationCode::PushFlowStack, Immediate(target))); |
||||
|
break; |
||||
|
} |
||||
|
case OpCode::Id::SYNC: { |
||||
|
const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; |
||||
|
UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "SYNC condition code used: {}", |
||||
|
static_cast<u32>(cc)); |
||||
|
|
||||
|
// The SYNC opcode jumps to the address previously set by the SSY opcode
|
||||
|
bb.push_back(Operation(OperationCode::PopFlowStack)); |
||||
|
break; |
||||
|
} |
||||
|
case OpCode::Id::BRK: { |
||||
|
const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; |
||||
|
UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "BRK condition code used: {}", |
||||
|
static_cast<u32>(cc)); |
||||
|
|
||||
|
// The BRK opcode jumps to the address previously set by the PBK opcode
|
||||
|
bb.push_back(Operation(OperationCode::PopFlowStack)); |
||||
|
break; |
||||
|
} |
||||
|
case OpCode::Id::IPA: { |
||||
|
const auto& attribute = instr.attribute.fmt28; |
||||
|
const Tegra::Shader::IpaMode input_mode{instr.ipa.interp_mode.Value(), |
||||
|
instr.ipa.sample_mode.Value()}; |
||||
|
|
||||
|
const Node attr = GetInputAttribute(attribute.index, attribute.element, input_mode); |
||||
|
const Node value = GetSaturatedFloat(attr, instr.ipa.saturate); |
||||
|
|
||||
|
SetRegister(bb, instr.gpr0, value); |
||||
|
break; |
||||
|
} |
||||
|
case OpCode::Id::OUT_R: { |
||||
|
UNIMPLEMENTED_IF_MSG(instr.gpr20.Value() != Register::ZeroIndex, |
||||
|
"Stream buffer is not supported"); |
||||
|
|
||||
|
if (instr.out.emit) { |
||||
|
// gpr0 is used to store the next address and gpr8 contains the address to emit.
|
||||
|
// Hardware uses pointers here but we just ignore it
|
||||
|
bb.push_back(Operation(OperationCode::EmitVertex)); |
||||
|
SetRegister(bb, instr.gpr0, Immediate(0)); |
||||
|
} |
||||
|
if (instr.out.cut) { |
||||
|
bb.push_back(Operation(OperationCode::EndPrimitive)); |
||||
|
} |
||||
|
break; |
||||
|
} |
||||
|
case OpCode::Id::ISBERD: { |
||||
|
UNIMPLEMENTED_IF(instr.isberd.o != 0); |
||||
|
UNIMPLEMENTED_IF(instr.isberd.skew != 0); |
||||
|
UNIMPLEMENTED_IF(instr.isberd.shift != Tegra::Shader::IsberdShift::None); |
||||
|
UNIMPLEMENTED_IF(instr.isberd.mode != Tegra::Shader::IsberdMode::None); |
||||
|
LOG_WARNING(HW_GPU, "ISBERD instruction is incomplete"); |
||||
|
SetRegister(bb, instr.gpr0, GetRegister(instr.gpr8)); |
||||
|
break; |
||||
|
} |
||||
|
case OpCode::Id::DEPBAR: { |
||||
|
LOG_WARNING(HW_GPU, "DEPBAR instruction is stubbed"); |
||||
|
break; |
||||
|
} |
||||
|
default: |
||||
|
UNIMPLEMENTED_MSG("Unhandled instruction: {}", opcode->get().GetName()); |
||||
|
} |
||||
|
|
||||
|
return pc; |
||||
|
} |
||||
|
|
||||
|
} // namespace VideoCommon::Shader
|
||||
@ -0,0 +1,67 @@ |
|||||
|
// Copyright 2018 yuzu Emulator Project
|
||||
|
// Licensed under GPLv2 or any later version
|
||||
|
// Refer to the license.txt file included.
|
||||
|
|
||||
|
#include "common/assert.h"
|
||||
|
#include "common/common_types.h"
|
||||
|
#include "video_core/engines/shader_bytecode.h"
|
||||
|
#include "video_core/shader/shader_ir.h"
|
||||
|
|
||||
|
namespace VideoCommon::Shader { |
||||
|
|
||||
|
using Tegra::Shader::Instruction; |
||||
|
using Tegra::Shader::OpCode; |
||||
|
using Tegra::Shader::Pred; |
||||
|
|
||||
|
u32 ShaderIR::DecodePredicateSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc) { |
||||
|
const Instruction instr = {program_code[pc]}; |
||||
|
const auto opcode = OpCode::Decode(instr); |
||||
|
|
||||
|
switch (opcode->get().GetId()) { |
||||
|
case OpCode::Id::PSETP: { |
||||
|
const Node op_a = GetPredicate(instr.psetp.pred12, instr.psetp.neg_pred12 != 0); |
||||
|
const Node op_b = GetPredicate(instr.psetp.pred29, instr.psetp.neg_pred29 != 0); |
||||
|
|
||||
|
// We can't use the constant predicate as destination.
|
||||
|
ASSERT(instr.psetp.pred3 != static_cast<u64>(Pred::UnusedIndex)); |
||||
|
|
||||
|
const Node second_pred = GetPredicate(instr.psetp.pred39, instr.psetp.neg_pred39 != 0); |
||||
|
|
||||
|
const OperationCode combiner = GetPredicateCombiner(instr.psetp.op); |
||||
|
const Node predicate = Operation(combiner, op_a, op_b); |
||||
|
|
||||
|
// Set the primary predicate to the result of Predicate OP SecondPredicate
|
||||
|
SetPredicate(bb, instr.psetp.pred3, Operation(combiner, predicate, second_pred)); |
||||
|
|
||||
|
if (instr.psetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) { |
||||
|
// Set the secondary predicate to the result of !Predicate OP SecondPredicate, if
|
||||
|
// enabled
|
||||
|
SetPredicate(bb, instr.psetp.pred0, |
||||
|
Operation(combiner, Operation(OperationCode::LogicalNegate, predicate), |
||||
|
second_pred)); |
||||
|
} |
||||
|
break; |
||||
|
} |
||||
|
case OpCode::Id::CSETP: { |
||||
|
const Node pred = GetPredicate(instr.csetp.pred39, instr.csetp.neg_pred39 != 0); |
||||
|
const Node condition_code = GetConditionCode(instr.csetp.cc); |
||||
|
|
||||
|
const OperationCode combiner = GetPredicateCombiner(instr.csetp.op); |
||||
|
|
||||
|
if (instr.csetp.pred3 != static_cast<u64>(Pred::UnusedIndex)) { |
||||
|
SetPredicate(bb, instr.csetp.pred3, Operation(combiner, condition_code, pred)); |
||||
|
} |
||||
|
if (instr.csetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) { |
||||
|
const Node neg_cc = Operation(OperationCode::LogicalNegate, condition_code); |
||||
|
SetPredicate(bb, instr.csetp.pred0, Operation(combiner, neg_cc, pred)); |
||||
|
} |
||||
|
break; |
||||
|
} |
||||
|
default: |
||||
|
UNIMPLEMENTED_MSG("Unhandled predicate instruction: {}", opcode->get().GetName()); |
||||
|
} |
||||
|
|
||||
|
return pc; |
||||
|
} |
||||
|
|
||||
|
} // namespace VideoCommon::Shader
|
||||
@ -0,0 +1,46 @@ |
|||||
|
// Copyright 2018 yuzu Emulator Project
|
||||
|
// Licensed under GPLv2 or any later version
|
||||
|
// Refer to the license.txt file included.
|
||||
|
|
||||
|
#include "common/assert.h"
|
||||
|
#include "common/common_types.h"
|
||||
|
#include "video_core/engines/shader_bytecode.h"
|
||||
|
#include "video_core/shader/shader_ir.h"
|
||||
|
|
||||
|
namespace VideoCommon::Shader { |
||||
|
|
||||
|
using Tegra::Shader::Instruction; |
||||
|
using Tegra::Shader::OpCode; |
||||
|
|
||||
|
u32 ShaderIR::DecodePredicateSetRegister(BasicBlock& bb, const BasicBlock& code, u32 pc) { |
||||
|
const Instruction instr = {program_code[pc]}; |
||||
|
const auto opcode = OpCode::Decode(instr); |
||||
|
|
||||
|
UNIMPLEMENTED_IF_MSG(instr.generates_cc, |
||||
|
"Condition codes generation in PSET is not implemented"); |
||||
|
|
||||
|
const Node op_a = GetPredicate(instr.pset.pred12, instr.pset.neg_pred12 != 0); |
||||
|
const Node op_b = GetPredicate(instr.pset.pred29, instr.pset.neg_pred29 != 0); |
||||
|
const Node first_pred = Operation(GetPredicateCombiner(instr.pset.cond), op_a, op_b); |
||||
|
|
||||
|
const Node second_pred = GetPredicate(instr.pset.pred39, instr.pset.neg_pred39 != 0); |
||||
|
|
||||
|
const OperationCode combiner = GetPredicateCombiner(instr.pset.op); |
||||
|
const Node predicate = Operation(combiner, first_pred, second_pred); |
||||
|
|
||||
|
const Node true_value = instr.pset.bf ? Immediate(1.0f) : Immediate(0xffffffff); |
||||
|
const Node false_value = instr.pset.bf ? Immediate(0.0f) : Immediate(0); |
||||
|
const Node value = |
||||
|
Operation(OperationCode::Select, PRECISE, predicate, true_value, false_value); |
||||
|
|
||||
|
if (instr.pset.bf) { |
||||
|
SetInternalFlagsFromFloat(bb, value, instr.generates_cc); |
||||
|
} else { |
||||
|
SetInternalFlagsFromInteger(bb, value, instr.generates_cc); |
||||
|
} |
||||
|
SetRegister(bb, instr.gpr0, value); |
||||
|
|
||||
|
return pc; |
||||
|
} |
||||
|
|
||||
|
} // namespace VideoCommon::Shader
|
||||
@ -0,0 +1,51 @@ |
|||||
|
// Copyright 2018 yuzu Emulator Project
|
||||
|
// Licensed under GPLv2 or any later version
|
||||
|
// Refer to the license.txt file included.
|
||||
|
|
||||
|
#include "common/assert.h"
|
||||
|
#include "common/common_types.h"
|
||||
|
#include "video_core/engines/shader_bytecode.h"
|
||||
|
#include "video_core/shader/shader_ir.h"
|
||||
|
|
||||
|
namespace VideoCommon::Shader { |
||||
|
|
||||
|
using Tegra::Shader::Instruction; |
||||
|
using Tegra::Shader::OpCode; |
||||
|
|
||||
|
u32 ShaderIR::DecodeRegisterSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc) { |
||||
|
const Instruction instr = {program_code[pc]}; |
||||
|
const auto opcode = OpCode::Decode(instr); |
||||
|
|
||||
|
UNIMPLEMENTED_IF(instr.r2p.mode != Tegra::Shader::R2pMode::Pr); |
||||
|
|
||||
|
const Node apply_mask = [&]() { |
||||
|
switch (opcode->get().GetId()) { |
||||
|
case OpCode::Id::R2P_IMM: |
||||
|
return Immediate(static_cast<u32>(instr.r2p.immediate_mask)); |
||||
|
default: |
||||
|
UNREACHABLE(); |
||||
|
return Immediate(static_cast<u32>(instr.r2p.immediate_mask)); |
||||
|
} |
||||
|
}(); |
||||
|
const Node mask = GetRegister(instr.gpr8); |
||||
|
const auto offset = static_cast<u32>(instr.r2p.byte) * 8; |
||||
|
|
||||
|
constexpr u32 programmable_preds = 7; |
||||
|
for (u64 pred = 0; pred < programmable_preds; ++pred) { |
||||
|
const auto shift = static_cast<u32>(pred); |
||||
|
|
||||
|
const Node apply_compare = BitfieldExtract(apply_mask, shift, 1); |
||||
|
const Node condition = |
||||
|
Operation(OperationCode::LogicalUNotEqual, apply_compare, Immediate(0)); |
||||
|
|
||||
|
const Node value_compare = BitfieldExtract(mask, offset + shift, 1); |
||||
|
const Node value = Operation(OperationCode::LogicalUNotEqual, value_compare, Immediate(0)); |
||||
|
|
||||
|
const Node code = Operation(OperationCode::LogicalAssign, GetPredicate(pred), value); |
||||
|
bb.push_back(Conditional(condition, {code})); |
||||
|
} |
||||
|
|
||||
|
return pc; |
||||
|
} |
||||
|
|
||||
|
} // namespace VideoCommon::Shader
|
||||
@ -0,0 +1,55 @@ |
|||||
|
// Copyright 2018 yuzu Emulator Project
|
||||
|
// Licensed under GPLv2 or any later version
|
||||
|
// Refer to the license.txt file included.
|
||||
|
|
||||
|
#include "common/assert.h"
|
||||
|
#include "common/common_types.h"
|
||||
|
#include "video_core/engines/shader_bytecode.h"
|
||||
|
#include "video_core/shader/shader_ir.h"
|
||||
|
|
||||
|
namespace VideoCommon::Shader { |
||||
|
|
||||
|
using Tegra::Shader::Instruction; |
||||
|
using Tegra::Shader::OpCode; |
||||
|
|
||||
|
u32 ShaderIR::DecodeShift(BasicBlock& bb, const BasicBlock& code, u32 pc) { |
||||
|
const Instruction instr = {program_code[pc]}; |
||||
|
const auto opcode = OpCode::Decode(instr); |
||||
|
|
||||
|
const Node op_a = GetRegister(instr.gpr8); |
||||
|
const Node op_b = [&]() { |
||||
|
if (instr.is_b_imm) { |
||||
|
return Immediate(instr.alu.GetSignedImm20_20()); |
||||
|
} else if (instr.is_b_gpr) { |
||||
|
return GetRegister(instr.gpr20); |
||||
|
} else { |
||||
|
return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset); |
||||
|
} |
||||
|
}(); |
||||
|
|
||||
|
switch (opcode->get().GetId()) { |
||||
|
case OpCode::Id::SHR_C: |
||||
|
case OpCode::Id::SHR_R: |
||||
|
case OpCode::Id::SHR_IMM: { |
||||
|
const Node value = SignedOperation(OperationCode::IArithmeticShiftRight, |
||||
|
instr.shift.is_signed, PRECISE, op_a, op_b); |
||||
|
SetInternalFlagsFromInteger(bb, value, instr.generates_cc); |
||||
|
SetRegister(bb, instr.gpr0, value); |
||||
|
break; |
||||
|
} |
||||
|
case OpCode::Id::SHL_C: |
||||
|
case OpCode::Id::SHL_R: |
||||
|
case OpCode::Id::SHL_IMM: { |
||||
|
const Node value = Operation(OperationCode::ILogicalShiftLeft, PRECISE, op_a, op_b); |
||||
|
SetInternalFlagsFromInteger(bb, value, instr.generates_cc); |
||||
|
SetRegister(bb, instr.gpr0, value); |
||||
|
break; |
||||
|
} |
||||
|
default: |
||||
|
UNIMPLEMENTED_MSG("Unhandled shift instruction: {}", opcode->get().GetName()); |
||||
|
} |
||||
|
|
||||
|
return pc; |
||||
|
} |
||||
|
|
||||
|
} // namespace VideoCommon::Shader
|
||||
@ -0,0 +1,111 @@ |
|||||
|
// Copyright 2018 yuzu Emulator Project
|
||||
|
// Licensed under GPLv2 or any later version
|
||||
|
// Refer to the license.txt file included.
|
||||
|
|
||||
|
#include "common/assert.h"
|
||||
|
#include "common/common_types.h"
|
||||
|
#include "video_core/engines/shader_bytecode.h"
|
||||
|
#include "video_core/shader/shader_ir.h"
|
||||
|
|
||||
|
namespace VideoCommon::Shader { |
||||
|
|
||||
|
using Tegra::Shader::Instruction; |
||||
|
using Tegra::Shader::OpCode; |
||||
|
using Tegra::Shader::Pred; |
||||
|
using Tegra::Shader::VideoType; |
||||
|
using Tegra::Shader::VmadShr; |
||||
|
|
||||
|
u32 ShaderIR::DecodeVideo(BasicBlock& bb, const BasicBlock& code, u32 pc) { |
||||
|
const Instruction instr = {program_code[pc]}; |
||||
|
const auto opcode = OpCode::Decode(instr); |
||||
|
|
||||
|
const Node op_a = |
||||
|
GetVideoOperand(GetRegister(instr.gpr8), instr.video.is_byte_chunk_a, instr.video.signed_a, |
||||
|
instr.video.type_a, instr.video.byte_height_a); |
||||
|
const Node op_b = [&]() { |
||||
|
if (instr.video.use_register_b) { |
||||
|
return GetVideoOperand(GetRegister(instr.gpr20), instr.video.is_byte_chunk_b, |
||||
|
instr.video.signed_b, instr.video.type_b, |
||||
|
instr.video.byte_height_b); |
||||
|
} |
||||
|
if (instr.video.signed_b) { |
||||
|
const auto imm = static_cast<s16>(instr.alu.GetImm20_16()); |
||||
|
return Immediate(static_cast<u32>(imm)); |
||||
|
} else { |
||||
|
return Immediate(instr.alu.GetImm20_16()); |
||||
|
} |
||||
|
}(); |
||||
|
|
||||
|
switch (opcode->get().GetId()) { |
||||
|
case OpCode::Id::VMAD: { |
||||
|
const bool result_signed = instr.video.signed_a == 1 || instr.video.signed_b == 1; |
||||
|
const Node op_c = GetRegister(instr.gpr39); |
||||
|
|
||||
|
Node value = SignedOperation(OperationCode::IMul, result_signed, NO_PRECISE, op_a, op_b); |
||||
|
value = SignedOperation(OperationCode::IAdd, result_signed, NO_PRECISE, value, op_c); |
||||
|
|
||||
|
if (instr.vmad.shr == VmadShr::Shr7 || instr.vmad.shr == VmadShr::Shr15) { |
||||
|
const Node shift = Immediate(instr.vmad.shr == VmadShr::Shr7 ? 7 : 15); |
||||
|
value = |
||||
|
SignedOperation(OperationCode::IArithmeticShiftRight, result_signed, value, shift); |
||||
|
} |
||||
|
|
||||
|
SetInternalFlagsFromInteger(bb, value, instr.generates_cc); |
||||
|
SetRegister(bb, instr.gpr0, value); |
||||
|
break; |
||||
|
} |
||||
|
case OpCode::Id::VSETP: { |
||||
|
// We can't use the constant predicate as destination.
|
||||
|
ASSERT(instr.vsetp.pred3 != static_cast<u64>(Pred::UnusedIndex)); |
||||
|
|
||||
|
const bool sign = instr.video.signed_a == 1 || instr.video.signed_b == 1; |
||||
|
const Node first_pred = GetPredicateComparisonInteger(instr.vsetp.cond, sign, op_a, op_b); |
||||
|
const Node second_pred = GetPredicate(instr.vsetp.pred39, false); |
||||
|
|
||||
|
const OperationCode combiner = GetPredicateCombiner(instr.vsetp.op); |
||||
|
|
||||
|
// Set the primary predicate to the result of Predicate OP SecondPredicate
|
||||
|
SetPredicate(bb, instr.vsetp.pred3, Operation(combiner, first_pred, second_pred)); |
||||
|
|
||||
|
if (instr.vsetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) { |
||||
|
// Set the secondary predicate to the result of !Predicate OP SecondPredicate,
|
||||
|
// if enabled
|
||||
|
const Node negate_pred = Operation(OperationCode::LogicalNegate, first_pred); |
||||
|
SetPredicate(bb, instr.vsetp.pred0, Operation(combiner, negate_pred, second_pred)); |
||||
|
} |
||||
|
break; |
||||
|
} |
||||
|
default: |
||||
|
UNIMPLEMENTED_MSG("Unhandled video instruction: {}", opcode->get().GetName()); |
||||
|
} |
||||
|
|
||||
|
return pc; |
||||
|
} |
||||
|
|
||||
|
Node ShaderIR::GetVideoOperand(Node op, bool is_chunk, bool is_signed, |
||||
|
Tegra::Shader::VideoType type, u64 byte_height) { |
||||
|
if (!is_chunk) { |
||||
|
return BitfieldExtract(op, static_cast<u32>(byte_height * 8), 8); |
||||
|
} |
||||
|
const Node zero = Immediate(0); |
||||
|
|
||||
|
switch (type) { |
||||
|
case Tegra::Shader::VideoType::Size16_Low: |
||||
|
return BitfieldExtract(op, 0, 16); |
||||
|
case Tegra::Shader::VideoType::Size16_High: |
||||
|
return BitfieldExtract(op, 16, 16); |
||||
|
case Tegra::Shader::VideoType::Size32: |
||||
|
// TODO(Rodrigo): From my hardware tests it becomes a bit "mad" when this type is used
|
||||
|
// (1 * 1 + 0 == 0x5b800000). Until a better explanation is found: abort.
|
||||
|
UNIMPLEMENTED(); |
||||
|
return zero; |
||||
|
case Tegra::Shader::VideoType::Invalid: |
||||
|
UNREACHABLE_MSG("Invalid instruction encoding"); |
||||
|
return zero; |
||||
|
default: |
||||
|
UNREACHABLE(); |
||||
|
return zero; |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
} // namespace VideoCommon::Shader
|
||||
@ -0,0 +1,97 @@ |
|||||
|
// Copyright 2018 yuzu Emulator Project
|
||||
|
// Licensed under GPLv2 or any later version
|
||||
|
// Refer to the license.txt file included.
|
||||
|
|
||||
|
#include "common/assert.h"
|
||||
|
#include "common/common_types.h"
|
||||
|
#include "video_core/engines/shader_bytecode.h"
|
||||
|
#include "video_core/shader/shader_ir.h"
|
||||
|
|
||||
|
namespace VideoCommon::Shader { |
||||
|
|
||||
|
using Tegra::Shader::Instruction; |
||||
|
using Tegra::Shader::OpCode; |
||||
|
|
||||
|
u32 ShaderIR::DecodeXmad(BasicBlock& bb, const BasicBlock& code, u32 pc) { |
||||
|
const Instruction instr = {program_code[pc]}; |
||||
|
const auto opcode = OpCode::Decode(instr); |
||||
|
|
||||
|
UNIMPLEMENTED_IF(instr.xmad.sign_a); |
||||
|
UNIMPLEMENTED_IF(instr.xmad.sign_b); |
||||
|
UNIMPLEMENTED_IF_MSG(instr.generates_cc, |
||||
|
"Condition codes generation in XMAD is not implemented"); |
||||
|
|
||||
|
Node op_a = GetRegister(instr.gpr8); |
||||
|
|
||||
|
// TODO(bunnei): Needs to be fixed once op_a or op_b is signed
|
||||
|
UNIMPLEMENTED_IF(instr.xmad.sign_a != instr.xmad.sign_b); |
||||
|
const bool is_signed_a = instr.xmad.sign_a == 1; |
||||
|
const bool is_signed_b = instr.xmad.sign_b == 1; |
||||
|
const bool is_signed_c = is_signed_a; |
||||
|
|
||||
|
auto [is_merge, op_b, op_c] = [&]() -> std::tuple<bool, Node, Node> { |
||||
|
switch (opcode->get().GetId()) { |
||||
|
case OpCode::Id::XMAD_CR: |
||||
|
return {instr.xmad.merge_56, GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset), |
||||
|
GetRegister(instr.gpr39)}; |
||||
|
case OpCode::Id::XMAD_RR: |
||||
|
return {instr.xmad.merge_37, GetRegister(instr.gpr20), GetRegister(instr.gpr39)}; |
||||
|
case OpCode::Id::XMAD_RC: |
||||
|
return {false, GetRegister(instr.gpr39), |
||||
|
GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset)}; |
||||
|
case OpCode::Id::XMAD_IMM: |
||||
|
return {instr.xmad.merge_37, Immediate(static_cast<u32>(instr.xmad.imm20_16)), |
||||
|
GetRegister(instr.gpr39)}; |
||||
|
} |
||||
|
UNIMPLEMENTED_MSG("Unhandled XMAD instruction: {}", opcode->get().GetName()); |
||||
|
return {false, Immediate(0), Immediate(0)}; |
||||
|
}(); |
||||
|
|
||||
|
op_a = BitfieldExtract(op_a, instr.xmad.high_a ? 16 : 0, 16); |
||||
|
|
||||
|
const Node original_b = op_b; |
||||
|
op_b = BitfieldExtract(op_b, instr.xmad.high_b ? 16 : 0, 16); |
||||
|
|
||||
|
// TODO(Rodrigo): Use an appropiate sign for this operation
|
||||
|
Node product = Operation(OperationCode::IMul, NO_PRECISE, op_a, op_b); |
||||
|
if (instr.xmad.product_shift_left) { |
||||
|
product = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, product, Immediate(16)); |
||||
|
} |
||||
|
|
||||
|
const Node original_c = op_c; |
||||
|
op_c = [&]() { |
||||
|
switch (instr.xmad.mode) { |
||||
|
case Tegra::Shader::XmadMode::None: |
||||
|
return original_c; |
||||
|
case Tegra::Shader::XmadMode::CLo: |
||||
|
return BitfieldExtract(original_c, 0, 16); |
||||
|
case Tegra::Shader::XmadMode::CHi: |
||||
|
return BitfieldExtract(original_c, 16, 16); |
||||
|
case Tegra::Shader::XmadMode::CBcc: { |
||||
|
const Node shifted_b = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed_b, |
||||
|
NO_PRECISE, original_b, Immediate(16)); |
||||
|
return SignedOperation(OperationCode::IAdd, is_signed_c, NO_PRECISE, original_c, |
||||
|
shifted_b); |
||||
|
} |
||||
|
default: |
||||
|
UNIMPLEMENTED_MSG("Unhandled XMAD mode: {}", static_cast<u32>(instr.xmad.mode.Value())); |
||||
|
return Immediate(0); |
||||
|
} |
||||
|
}(); |
||||
|
|
||||
|
// TODO(Rodrigo): Use an appropiate sign for this operation
|
||||
|
Node sum = Operation(OperationCode::IAdd, product, op_c); |
||||
|
if (is_merge) { |
||||
|
const Node a = BitfieldExtract(sum, 0, 16); |
||||
|
const Node b = |
||||
|
Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, original_b, Immediate(16)); |
||||
|
sum = Operation(OperationCode::IBitwiseOr, NO_PRECISE, a, b); |
||||
|
} |
||||
|
|
||||
|
SetInternalFlagsFromInteger(bb, sum, instr.generates_cc); |
||||
|
SetRegister(bb, instr.gpr0, sum); |
||||
|
|
||||
|
return pc; |
||||
|
} |
||||
|
|
||||
|
} // namespace VideoCommon::Shader
|
||||
@ -0,0 +1,444 @@ |
|||||
|
// Copyright 2018 yuzu Emulator Project
|
||||
|
// Licensed under GPLv2 or any later version
|
||||
|
// Refer to the license.txt file included.
|
||||
|
|
||||
|
#include <cmath>
|
||||
|
#include <unordered_map>
|
||||
|
|
||||
|
#include "common/assert.h"
|
||||
|
#include "common/common_types.h"
|
||||
|
#include "common/logging/log.h"
|
||||
|
#include "video_core/engines/shader_bytecode.h"
|
||||
|
#include "video_core/shader/shader_ir.h"
|
||||
|
|
||||
|
namespace VideoCommon::Shader { |
||||
|
|
||||
|
using Tegra::Shader::Attribute; |
||||
|
using Tegra::Shader::Instruction; |
||||
|
using Tegra::Shader::IpaMode; |
||||
|
using Tegra::Shader::Pred; |
||||
|
using Tegra::Shader::PredCondition; |
||||
|
using Tegra::Shader::PredOperation; |
||||
|
using Tegra::Shader::Register; |
||||
|
|
||||
|
Node ShaderIR::StoreNode(NodeData&& node_data) { |
||||
|
auto store = std::make_unique<NodeData>(node_data); |
||||
|
const Node node = store.get(); |
||||
|
stored_nodes.push_back(std::move(store)); |
||||
|
return node; |
||||
|
} |
||||
|
|
||||
|
Node ShaderIR::Conditional(Node condition, std::vector<Node>&& code) { |
||||
|
return StoreNode(ConditionalNode(condition, std::move(code))); |
||||
|
} |
||||
|
|
||||
|
Node ShaderIR::Comment(const std::string& text) { |
||||
|
return StoreNode(CommentNode(text)); |
||||
|
} |
||||
|
|
||||
|
Node ShaderIR::Immediate(u32 value) { |
||||
|
return StoreNode(ImmediateNode(value)); |
||||
|
} |
||||
|
|
||||
|
Node ShaderIR::GetRegister(Register reg) { |
||||
|
if (reg != Register::ZeroIndex) { |
||||
|
used_registers.insert(static_cast<u32>(reg)); |
||||
|
} |
||||
|
return StoreNode(GprNode(reg)); |
||||
|
} |
||||
|
|
||||
|
Node ShaderIR::GetImmediate19(Instruction instr) { |
||||
|
return Immediate(instr.alu.GetImm20_19()); |
||||
|
} |
||||
|
|
||||
|
Node ShaderIR::GetImmediate32(Instruction instr) { |
||||
|
return Immediate(instr.alu.GetImm20_32()); |
||||
|
} |
||||
|
|
||||
|
Node ShaderIR::GetConstBuffer(u64 index_, u64 offset_) { |
||||
|
const auto index = static_cast<u32>(index_); |
||||
|
const auto offset = static_cast<u32>(offset_); |
||||
|
|
||||
|
const auto [entry, is_new] = used_cbufs.try_emplace(index); |
||||
|
entry->second.MarkAsUsed(offset); |
||||
|
|
||||
|
return StoreNode(CbufNode(index, Immediate(offset))); |
||||
|
} |
||||
|
|
||||
|
Node ShaderIR::GetConstBufferIndirect(u64 index_, u64 offset_, Node node) { |
||||
|
const auto index = static_cast<u32>(index_); |
||||
|
const auto offset = static_cast<u32>(offset_); |
||||
|
|
||||
|
const auto [entry, is_new] = used_cbufs.try_emplace(index); |
||||
|
entry->second.MarkAsUsedIndirect(); |
||||
|
|
||||
|
const Node final_offset = Operation(OperationCode::UAdd, NO_PRECISE, node, Immediate(offset)); |
||||
|
return StoreNode(CbufNode(index, final_offset)); |
||||
|
} |
||||
|
|
||||
|
Node ShaderIR::GetPredicate(u64 pred_, bool negated) { |
||||
|
const auto pred = static_cast<Pred>(pred_); |
||||
|
if (pred != Pred::UnusedIndex && pred != Pred::NeverExecute) { |
||||
|
used_predicates.insert(pred); |
||||
|
} |
||||
|
|
||||
|
return StoreNode(PredicateNode(pred, negated)); |
||||
|
} |
||||
|
|
||||
|
Node ShaderIR::GetPredicate(bool immediate) { |
||||
|
return GetPredicate(static_cast<u64>(immediate ? Pred::UnusedIndex : Pred::NeverExecute)); |
||||
|
} |
||||
|
|
||||
|
Node ShaderIR::GetInputAttribute(Attribute::Index index, u64 element, |
||||
|
const Tegra::Shader::IpaMode& input_mode, Node buffer) { |
||||
|
const auto [entry, is_new] = |
||||
|
used_input_attributes.emplace(std::make_pair(index, std::set<Tegra::Shader::IpaMode>{})); |
||||
|
entry->second.insert(input_mode); |
||||
|
|
||||
|
return StoreNode(AbufNode(index, static_cast<u32>(element), input_mode, buffer)); |
||||
|
} |
||||
|
|
||||
|
Node ShaderIR::GetOutputAttribute(Attribute::Index index, u64 element, Node buffer) { |
||||
|
if (index == Attribute::Index::ClipDistances0123 || |
||||
|
index == Attribute::Index::ClipDistances4567) { |
||||
|
const auto clip_index = |
||||
|
static_cast<u32>((index == Attribute::Index::ClipDistances4567 ? 1 : 0) + element); |
||||
|
used_clip_distances.at(clip_index) = true; |
||||
|
} |
||||
|
used_output_attributes.insert(index); |
||||
|
|
||||
|
return StoreNode(AbufNode(index, static_cast<u32>(element), buffer)); |
||||
|
} |
||||
|
|
||||
|
Node ShaderIR::GetInternalFlag(InternalFlag flag, bool negated) { |
||||
|
const Node node = StoreNode(InternalFlagNode(flag)); |
||||
|
if (negated) { |
||||
|
return Operation(OperationCode::LogicalNegate, node); |
||||
|
} |
||||
|
return node; |
||||
|
} |
||||
|
|
||||
|
Node ShaderIR::GetLocalMemory(Node address) { |
||||
|
return StoreNode(LmemNode(address)); |
||||
|
} |
||||
|
|
||||
|
Node ShaderIR::GetTemporal(u32 id) { |
||||
|
return GetRegister(Register::ZeroIndex + 1 + id); |
||||
|
} |
||||
|
|
||||
|
Node ShaderIR::GetOperandAbsNegFloat(Node value, bool absolute, bool negate) { |
||||
|
if (absolute) { |
||||
|
value = Operation(OperationCode::FAbsolute, NO_PRECISE, value); |
||||
|
} |
||||
|
if (negate) { |
||||
|
value = Operation(OperationCode::FNegate, NO_PRECISE, value); |
||||
|
} |
||||
|
return value; |
||||
|
} |
||||
|
|
||||
|
Node ShaderIR::GetSaturatedFloat(Node value, bool saturate) { |
||||
|
if (!saturate) { |
||||
|
return value; |
||||
|
} |
||||
|
const Node positive_zero = Immediate(std::copysignf(0, 1)); |
||||
|
const Node positive_one = Immediate(1.0f); |
||||
|
return Operation(OperationCode::FClamp, NO_PRECISE, value, positive_zero, positive_one); |
||||
|
} |
||||
|
|
||||
|
Node ShaderIR::ConvertIntegerSize(Node value, Tegra::Shader::Register::Size size, bool is_signed) { |
||||
|
switch (size) { |
||||
|
case Register::Size::Byte: |
||||
|
value = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, NO_PRECISE, value, |
||||
|
Immediate(24)); |
||||
|
value = SignedOperation(OperationCode::IArithmeticShiftRight, is_signed, NO_PRECISE, value, |
||||
|
Immediate(24)); |
||||
|
return value; |
||||
|
case Register::Size::Short: |
||||
|
value = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, NO_PRECISE, value, |
||||
|
Immediate(16)); |
||||
|
value = SignedOperation(OperationCode::IArithmeticShiftRight, is_signed, NO_PRECISE, value, |
||||
|
Immediate(16)); |
||||
|
case Register::Size::Word: |
||||
|
// Default - do nothing
|
||||
|
return value; |
||||
|
default: |
||||
|
UNREACHABLE_MSG("Unimplemented conversion size: {}", static_cast<u32>(size)); |
||||
|
return value; |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
Node ShaderIR::GetOperandAbsNegInteger(Node value, bool absolute, bool negate, bool is_signed) { |
||||
|
if (!is_signed) { |
||||
|
// Absolute or negate on an unsigned is pointless
|
||||
|
return value; |
||||
|
} |
||||
|
if (absolute) { |
||||
|
value = Operation(OperationCode::IAbsolute, NO_PRECISE, value); |
||||
|
} |
||||
|
if (negate) { |
||||
|
value = Operation(OperationCode::INegate, NO_PRECISE, value); |
||||
|
} |
||||
|
return value; |
||||
|
} |
||||
|
|
||||
|
Node ShaderIR::UnpackHalfImmediate(Instruction instr, bool has_negation) { |
||||
|
const Node value = Immediate(instr.half_imm.PackImmediates()); |
||||
|
if (!has_negation) { |
||||
|
return value; |
||||
|
} |
||||
|
const Node first_negate = GetPredicate(instr.half_imm.first_negate != 0); |
||||
|
const Node second_negate = GetPredicate(instr.half_imm.second_negate != 0); |
||||
|
|
||||
|
return Operation(OperationCode::HNegate, HALF_NO_PRECISE, value, first_negate, second_negate); |
||||
|
} |
||||
|
|
||||
|
Node ShaderIR::HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge) { |
||||
|
switch (merge) { |
||||
|
case Tegra::Shader::HalfMerge::H0_H1: |
||||
|
return src; |
||||
|
case Tegra::Shader::HalfMerge::F32: |
||||
|
return Operation(OperationCode::HMergeF32, src); |
||||
|
case Tegra::Shader::HalfMerge::Mrg_H0: |
||||
|
return Operation(OperationCode::HMergeH0, dest, src); |
||||
|
case Tegra::Shader::HalfMerge::Mrg_H1: |
||||
|
return Operation(OperationCode::HMergeH1, dest, src); |
||||
|
} |
||||
|
UNREACHABLE(); |
||||
|
return src; |
||||
|
} |
||||
|
|
||||
|
Node ShaderIR::GetOperandAbsNegHalf(Node value, bool absolute, bool negate) { |
||||
|
if (absolute) { |
||||
|
value = Operation(OperationCode::HAbsolute, HALF_NO_PRECISE, value); |
||||
|
} |
||||
|
if (negate) { |
||||
|
value = Operation(OperationCode::HNegate, HALF_NO_PRECISE, value, GetPredicate(true), |
||||
|
GetPredicate(true)); |
||||
|
} |
||||
|
return value; |
||||
|
} |
||||
|
|
||||
|
Node ShaderIR::GetPredicateComparisonFloat(PredCondition condition, Node op_a, Node op_b) { |
||||
|
static const std::unordered_map<PredCondition, OperationCode> PredicateComparisonTable = { |
||||
|
{PredCondition::LessThan, OperationCode::LogicalFLessThan}, |
||||
|
{PredCondition::Equal, OperationCode::LogicalFEqual}, |
||||
|
{PredCondition::LessEqual, OperationCode::LogicalFLessEqual}, |
||||
|
{PredCondition::GreaterThan, OperationCode::LogicalFGreaterThan}, |
||||
|
{PredCondition::NotEqual, OperationCode::LogicalFNotEqual}, |
||||
|
{PredCondition::GreaterEqual, OperationCode::LogicalFGreaterEqual}, |
||||
|
{PredCondition::LessThanWithNan, OperationCode::LogicalFLessThan}, |
||||
|
{PredCondition::NotEqualWithNan, OperationCode::LogicalFNotEqual}, |
||||
|
{PredCondition::LessEqualWithNan, OperationCode::LogicalFLessEqual}, |
||||
|
{PredCondition::GreaterThanWithNan, OperationCode::LogicalFGreaterThan}, |
||||
|
{PredCondition::GreaterEqualWithNan, OperationCode::LogicalFGreaterEqual}}; |
||||
|
|
||||
|
const auto comparison{PredicateComparisonTable.find(condition)}; |
||||
|
UNIMPLEMENTED_IF_MSG(comparison == PredicateComparisonTable.end(), |
||||
|
"Unknown predicate comparison operation"); |
||||
|
|
||||
|
Node predicate = Operation(comparison->second, NO_PRECISE, op_a, op_b); |
||||
|
|
||||
|
if (condition == PredCondition::LessThanWithNan || |
||||
|
condition == PredCondition::NotEqualWithNan || |
||||
|
condition == PredCondition::LessEqualWithNan || |
||||
|
condition == PredCondition::GreaterThanWithNan || |
||||
|
condition == PredCondition::GreaterEqualWithNan) { |
||||
|
|
||||
|
predicate = Operation(OperationCode::LogicalOr, predicate, |
||||
|
Operation(OperationCode::LogicalFIsNan, op_a)); |
||||
|
predicate = Operation(OperationCode::LogicalOr, predicate, |
||||
|
Operation(OperationCode::LogicalFIsNan, op_b)); |
||||
|
} |
||||
|
|
||||
|
return predicate; |
||||
|
} |
||||
|
|
||||
|
Node ShaderIR::GetPredicateComparisonInteger(PredCondition condition, bool is_signed, Node op_a, |
||||
|
Node op_b) { |
||||
|
static const std::unordered_map<PredCondition, OperationCode> PredicateComparisonTable = { |
||||
|
{PredCondition::LessThan, OperationCode::LogicalILessThan}, |
||||
|
{PredCondition::Equal, OperationCode::LogicalIEqual}, |
||||
|
{PredCondition::LessEqual, OperationCode::LogicalILessEqual}, |
||||
|
{PredCondition::GreaterThan, OperationCode::LogicalIGreaterThan}, |
||||
|
{PredCondition::NotEqual, OperationCode::LogicalINotEqual}, |
||||
|
{PredCondition::GreaterEqual, OperationCode::LogicalIGreaterEqual}, |
||||
|
{PredCondition::LessThanWithNan, OperationCode::LogicalILessThan}, |
||||
|
{PredCondition::NotEqualWithNan, OperationCode::LogicalINotEqual}, |
||||
|
{PredCondition::LessEqualWithNan, OperationCode::LogicalILessEqual}, |
||||
|
{PredCondition::GreaterThanWithNan, OperationCode::LogicalIGreaterThan}, |
||||
|
{PredCondition::GreaterEqualWithNan, OperationCode::LogicalIGreaterEqual}}; |
||||
|
|
||||
|
const auto comparison{PredicateComparisonTable.find(condition)}; |
||||
|
UNIMPLEMENTED_IF_MSG(comparison == PredicateComparisonTable.end(), |
||||
|
"Unknown predicate comparison operation"); |
||||
|
|
||||
|
Node predicate = SignedOperation(comparison->second, is_signed, NO_PRECISE, op_a, op_b); |
||||
|
|
||||
|
UNIMPLEMENTED_IF_MSG(condition == PredCondition::LessThanWithNan || |
||||
|
condition == PredCondition::NotEqualWithNan || |
||||
|
condition == PredCondition::LessEqualWithNan || |
||||
|
condition == PredCondition::GreaterThanWithNan || |
||||
|
condition == PredCondition::GreaterEqualWithNan, |
||||
|
"NaN comparisons for integers are not implemented"); |
||||
|
return predicate; |
||||
|
} |
||||
|
|
||||
|
Node ShaderIR::GetPredicateComparisonHalf(Tegra::Shader::PredCondition condition, |
||||
|
const MetaHalfArithmetic& meta, Node op_a, Node op_b) { |
||||
|
|
||||
|
UNIMPLEMENTED_IF_MSG(condition == PredCondition::LessThanWithNan || |
||||
|
condition == PredCondition::NotEqualWithNan || |
||||
|
condition == PredCondition::LessEqualWithNan || |
||||
|
condition == PredCondition::GreaterThanWithNan || |
||||
|
condition == PredCondition::GreaterEqualWithNan, |
||||
|
"Unimplemented NaN comparison for half floats"); |
||||
|
|
||||
|
static const std::unordered_map<PredCondition, OperationCode> PredicateComparisonTable = { |
||||
|
{PredCondition::LessThan, OperationCode::Logical2HLessThan}, |
||||
|
{PredCondition::Equal, OperationCode::Logical2HEqual}, |
||||
|
{PredCondition::LessEqual, OperationCode::Logical2HLessEqual}, |
||||
|
{PredCondition::GreaterThan, OperationCode::Logical2HGreaterThan}, |
||||
|
{PredCondition::NotEqual, OperationCode::Logical2HNotEqual}, |
||||
|
{PredCondition::GreaterEqual, OperationCode::Logical2HGreaterEqual}, |
||||
|
{PredCondition::LessThanWithNan, OperationCode::Logical2HLessThan}, |
||||
|
{PredCondition::NotEqualWithNan, OperationCode::Logical2HNotEqual}, |
||||
|
{PredCondition::LessEqualWithNan, OperationCode::Logical2HLessEqual}, |
||||
|
{PredCondition::GreaterThanWithNan, OperationCode::Logical2HGreaterThan}, |
||||
|
{PredCondition::GreaterEqualWithNan, OperationCode::Logical2HGreaterEqual}}; |
||||
|
|
||||
|
const auto comparison{PredicateComparisonTable.find(condition)}; |
||||
|
UNIMPLEMENTED_IF_MSG(comparison == PredicateComparisonTable.end(), |
||||
|
"Unknown predicate comparison operation"); |
||||
|
|
||||
|
const Node predicate = Operation(comparison->second, meta, op_a, op_b); |
||||
|
|
||||
|
return predicate; |
||||
|
} |
||||
|
|
||||
|
OperationCode ShaderIR::GetPredicateCombiner(PredOperation operation) { |
||||
|
static const std::unordered_map<PredOperation, OperationCode> PredicateOperationTable = { |
||||
|
{PredOperation::And, OperationCode::LogicalAnd}, |
||||
|
{PredOperation::Or, OperationCode::LogicalOr}, |
||||
|
{PredOperation::Xor, OperationCode::LogicalXor}, |
||||
|
}; |
||||
|
|
||||
|
const auto op = PredicateOperationTable.find(operation); |
||||
|
UNIMPLEMENTED_IF_MSG(op == PredicateOperationTable.end(), "Unknown predicate operation"); |
||||
|
return op->second; |
||||
|
} |
||||
|
|
||||
|
Node ShaderIR::GetConditionCode(Tegra::Shader::ConditionCode cc) { |
||||
|
switch (cc) { |
||||
|
case Tegra::Shader::ConditionCode::NEU: |
||||
|
return GetInternalFlag(InternalFlag::Zero, true); |
||||
|
default: |
||||
|
UNIMPLEMENTED_MSG("Unimplemented condition code: {}", static_cast<u32>(cc)); |
||||
|
return GetPredicate(static_cast<u64>(Pred::NeverExecute)); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
void ShaderIR::SetRegister(BasicBlock& bb, Register dest, Node src) { |
||||
|
bb.push_back(Operation(OperationCode::Assign, GetRegister(dest), src)); |
||||
|
} |
||||
|
|
||||
|
void ShaderIR::SetPredicate(BasicBlock& bb, u64 dest, Node src) { |
||||
|
bb.push_back(Operation(OperationCode::LogicalAssign, GetPredicate(dest), src)); |
||||
|
} |
||||
|
|
||||
|
void ShaderIR::SetInternalFlag(BasicBlock& bb, InternalFlag flag, Node value) { |
||||
|
bb.push_back(Operation(OperationCode::LogicalAssign, GetInternalFlag(flag), value)); |
||||
|
} |
||||
|
|
||||
|
void ShaderIR::SetLocalMemory(BasicBlock& bb, Node address, Node value) { |
||||
|
bb.push_back(Operation(OperationCode::Assign, GetLocalMemory(address), value)); |
||||
|
} |
||||
|
|
||||
|
void ShaderIR::SetTemporal(BasicBlock& bb, u32 id, Node value) { |
||||
|
SetRegister(bb, Register::ZeroIndex + 1 + id, value); |
||||
|
} |
||||
|
|
||||
|
void ShaderIR::SetInternalFlagsFromFloat(BasicBlock& bb, Node value, bool sets_cc) { |
||||
|
if (!sets_cc) { |
||||
|
return; |
||||
|
} |
||||
|
const Node zerop = Operation(OperationCode::LogicalFEqual, value, Immediate(0.0f)); |
||||
|
SetInternalFlag(bb, InternalFlag::Zero, zerop); |
||||
|
LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete"); |
||||
|
} |
||||
|
|
||||
|
void ShaderIR::SetInternalFlagsFromInteger(BasicBlock& bb, Node value, bool sets_cc) { |
||||
|
if (!sets_cc) { |
||||
|
return; |
||||
|
} |
||||
|
const Node zerop = Operation(OperationCode::LogicalIEqual, value, Immediate(0)); |
||||
|
SetInternalFlag(bb, InternalFlag::Zero, zerop); |
||||
|
LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete"); |
||||
|
} |
||||
|
|
||||
|
Node ShaderIR::BitfieldExtract(Node value, u32 offset, u32 bits) { |
||||
|
return Operation(OperationCode::UBitfieldExtract, NO_PRECISE, value, Immediate(offset), |
||||
|
Immediate(bits)); |
||||
|
} |
||||
|
|
||||
|
/*static*/ OperationCode ShaderIR::SignedToUnsignedCode(OperationCode operation_code, |
||||
|
bool is_signed) { |
||||
|
if (is_signed) { |
||||
|
return operation_code; |
||||
|
} |
||||
|
switch (operation_code) { |
||||
|
case OperationCode::FCastInteger: |
||||
|
return OperationCode::FCastUInteger; |
||||
|
case OperationCode::IAdd: |
||||
|
return OperationCode::UAdd; |
||||
|
case OperationCode::IMul: |
||||
|
return OperationCode::UMul; |
||||
|
case OperationCode::IDiv: |
||||
|
return OperationCode::UDiv; |
||||
|
case OperationCode::IMin: |
||||
|
return OperationCode::UMin; |
||||
|
case OperationCode::IMax: |
||||
|
return OperationCode::UMax; |
||||
|
case OperationCode::ICastFloat: |
||||
|
return OperationCode::UCastFloat; |
||||
|
case OperationCode::ICastUnsigned: |
||||
|
return OperationCode::UCastSigned; |
||||
|
case OperationCode::ILogicalShiftLeft: |
||||
|
return OperationCode::ULogicalShiftLeft; |
||||
|
case OperationCode::ILogicalShiftRight: |
||||
|
return OperationCode::ULogicalShiftRight; |
||||
|
case OperationCode::IArithmeticShiftRight: |
||||
|
return OperationCode::UArithmeticShiftRight; |
||||
|
case OperationCode::IBitwiseAnd: |
||||
|
return OperationCode::UBitwiseAnd; |
||||
|
case OperationCode::IBitwiseOr: |
||||
|
return OperationCode::UBitwiseOr; |
||||
|
case OperationCode::IBitwiseXor: |
||||
|
return OperationCode::UBitwiseXor; |
||||
|
case OperationCode::IBitwiseNot: |
||||
|
return OperationCode::UBitwiseNot; |
||||
|
case OperationCode::IBitfieldInsert: |
||||
|
return OperationCode::UBitfieldInsert; |
||||
|
case OperationCode::IBitCount: |
||||
|
return OperationCode::UBitCount; |
||||
|
case OperationCode::LogicalILessThan: |
||||
|
return OperationCode::LogicalULessThan; |
||||
|
case OperationCode::LogicalIEqual: |
||||
|
return OperationCode::LogicalUEqual; |
||||
|
case OperationCode::LogicalILessEqual: |
||||
|
return OperationCode::LogicalULessEqual; |
||||
|
case OperationCode::LogicalIGreaterThan: |
||||
|
return OperationCode::LogicalUGreaterThan; |
||||
|
case OperationCode::LogicalINotEqual: |
||||
|
return OperationCode::LogicalUNotEqual; |
||||
|
case OperationCode::LogicalIGreaterEqual: |
||||
|
return OperationCode::LogicalUGreaterEqual; |
||||
|
case OperationCode::INegate: |
||||
|
UNREACHABLE_MSG("Can't negate an unsigned integer"); |
||||
|
case OperationCode::IAbsolute: |
||||
|
UNREACHABLE_MSG("Can't apply absolute to an unsigned integer"); |
||||
|
} |
||||
|
UNREACHABLE_MSG("Unknown signed operation with code={}", static_cast<u32>(operation_code)); |
||||
|
return {}; |
||||
|
} |
||||
|
|
||||
|
} // namespace VideoCommon::Shader
|
||||
@ -0,0 +1,793 @@ |
|||||
|
// Copyright 2018 yuzu Emulator Project |
||||
|
// Licensed under GPLv2 or any later version |
||||
|
// Refer to the license.txt file included. |
||||
|
|
||||
|
#pragma once |
||||
|
|
||||
|
#include <array> |
||||
|
#include <cstring> |
||||
|
#include <map> |
||||
|
#include <set> |
||||
|
#include <string> |
||||
|
#include <tuple> |
||||
|
#include <variant> |
||||
|
#include <vector> |
||||
|
|
||||
|
#include "common/common_types.h" |
||||
|
#include "video_core/engines/maxwell_3d.h" |
||||
|
#include "video_core/engines/shader_bytecode.h" |
||||
|
#include "video_core/engines/shader_header.h" |
||||
|
|
||||
|
namespace VideoCommon::Shader { |
||||
|
|
||||
|
class OperationNode; |
||||
|
class ConditionalNode; |
||||
|
class GprNode; |
||||
|
class ImmediateNode; |
||||
|
class InternalFlagNode; |
||||
|
class PredicateNode; |
||||
|
class AbufNode; ///< Attribute buffer |
||||
|
class CbufNode; ///< Constant buffer |
||||
|
class LmemNode; ///< Local memory |
||||
|
class GmemNode; ///< Global memory |
||||
|
class CommentNode; |
||||
|
|
||||
|
using ProgramCode = std::vector<u64>; |
||||
|
|
||||
|
using NodeData = |
||||
|
std::variant<OperationNode, ConditionalNode, GprNode, ImmediateNode, InternalFlagNode, |
||||
|
PredicateNode, AbufNode, CbufNode, LmemNode, GmemNode, CommentNode>; |
||||
|
using Node = const NodeData*; |
||||
|
using Node4 = std::array<Node, 4>; |
||||
|
using BasicBlock = std::vector<Node>; |
||||
|
|
||||
|
constexpr u32 MAX_PROGRAM_LENGTH = 0x1000; |
||||
|
|
||||
|
enum class OperationCode { |
||||
|
Assign, /// (float& dest, float src) -> void |
||||
|
|
||||
|
Select, /// (MetaArithmetic, bool pred, float a, float b) -> float |
||||
|
|
||||
|
FAdd, /// (MetaArithmetic, float a, float b) -> float |
||||
|
FMul, /// (MetaArithmetic, float a, float b) -> float |
||||
|
FDiv, /// (MetaArithmetic, float a, float b) -> float |
||||
|
FFma, /// (MetaArithmetic, float a, float b, float c) -> float |
||||
|
FNegate, /// (MetaArithmetic, float a) -> float |
||||
|
FAbsolute, /// (MetaArithmetic, float a) -> float |
||||
|
FClamp, /// (MetaArithmetic, float value, float min, float max) -> float |
||||
|
FMin, /// (MetaArithmetic, float a, float b) -> float |
||||
|
FMax, /// (MetaArithmetic, float a, float b) -> float |
||||
|
FCos, /// (MetaArithmetic, float a) -> float |
||||
|
FSin, /// (MetaArithmetic, float a) -> float |
||||
|
FExp2, /// (MetaArithmetic, float a) -> float |
||||
|
FLog2, /// (MetaArithmetic, float a) -> float |
||||
|
FInverseSqrt, /// (MetaArithmetic, float a) -> float |
||||
|
FSqrt, /// (MetaArithmetic, float a) -> float |
||||
|
FRoundEven, /// (MetaArithmetic, float a) -> float |
||||
|
FFloor, /// (MetaArithmetic, float a) -> float |
||||
|
FCeil, /// (MetaArithmetic, float a) -> float |
||||
|
FTrunc, /// (MetaArithmetic, float a) -> float |
||||
|
FCastInteger, /// (MetaArithmetic, int a) -> float |
||||
|
FCastUInteger, /// (MetaArithmetic, uint a) -> float |
||||
|
|
||||
|
IAdd, /// (MetaArithmetic, int a, int b) -> int |
||||
|
IMul, /// (MetaArithmetic, int a, int b) -> int |
||||
|
IDiv, /// (MetaArithmetic, int a, int b) -> int |
||||
|
INegate, /// (MetaArithmetic, int a) -> int |
||||
|
IAbsolute, /// (MetaArithmetic, int a) -> int |
||||
|
IMin, /// (MetaArithmetic, int a, int b) -> int |
||||
|
IMax, /// (MetaArithmetic, int a, int b) -> int |
||||
|
ICastFloat, /// (MetaArithmetic, float a) -> int |
||||
|
ICastUnsigned, /// (MetaArithmetic, uint a) -> int |
||||
|
ILogicalShiftLeft, /// (MetaArithmetic, int a, uint b) -> int |
||||
|
ILogicalShiftRight, /// (MetaArithmetic, int a, uint b) -> int |
||||
|
IArithmeticShiftRight, /// (MetaArithmetic, int a, uint b) -> int |
||||
|
IBitwiseAnd, /// (MetaArithmetic, int a, int b) -> int |
||||
|
IBitwiseOr, /// (MetaArithmetic, int a, int b) -> int |
||||
|
IBitwiseXor, /// (MetaArithmetic, int a, int b) -> int |
||||
|
IBitwiseNot, /// (MetaArithmetic, int a) -> int |
||||
|
IBitfieldInsert, /// (MetaArithmetic, int base, int insert, int offset, int bits) -> int |
||||
|
IBitfieldExtract, /// (MetaArithmetic, int value, int offset, int offset) -> int |
||||
|
IBitCount, /// (MetaArithmetic, int) -> int |
||||
|
|
||||
|
UAdd, /// (MetaArithmetic, uint a, uint b) -> uint |
||||
|
UMul, /// (MetaArithmetic, uint a, uint b) -> uint |
||||
|
UDiv, /// (MetaArithmetic, uint a, uint b) -> uint |
||||
|
UMin, /// (MetaArithmetic, uint a, uint b) -> uint |
||||
|
UMax, /// (MetaArithmetic, uint a, uint b) -> uint |
||||
|
UCastFloat, /// (MetaArithmetic, float a) -> uint |
||||
|
UCastSigned, /// (MetaArithmetic, int a) -> uint |
||||
|
ULogicalShiftLeft, /// (MetaArithmetic, uint a, uint b) -> uint |
||||
|
ULogicalShiftRight, /// (MetaArithmetic, uint a, uint b) -> uint |
||||
|
UArithmeticShiftRight, /// (MetaArithmetic, uint a, uint b) -> uint |
||||
|
UBitwiseAnd, /// (MetaArithmetic, uint a, uint b) -> uint |
||||
|
UBitwiseOr, /// (MetaArithmetic, uint a, uint b) -> uint |
||||
|
UBitwiseXor, /// (MetaArithmetic, uint a, uint b) -> uint |
||||
|
UBitwiseNot, /// (MetaArithmetic, uint a) -> uint |
||||
|
UBitfieldInsert, /// (MetaArithmetic, uint base, uint insert, int offset, int bits) -> uint |
||||
|
UBitfieldExtract, /// (MetaArithmetic, uint value, int offset, int offset) -> uint |
||||
|
UBitCount, /// (MetaArithmetic, uint) -> uint |
||||
|
|
||||
|
HAdd, /// (MetaHalfArithmetic, f16vec2 a, f16vec2 b) -> f16vec2 |
||||
|
HMul, /// (MetaHalfArithmetic, f16vec2 a, f16vec2 b) -> f16vec2 |
||||
|
HFma, /// (MetaHalfArithmetic, f16vec2 a, f16vec2 b, f16vec2 c) -> f16vec2 |
||||
|
HAbsolute, /// (f16vec2 a) -> f16vec2 |
||||
|
HNegate, /// (f16vec2 a, bool first, bool second) -> f16vec2 |
||||
|
HMergeF32, /// (f16vec2 src) -> float |
||||
|
HMergeH0, /// (f16vec2 dest, f16vec2 src) -> f16vec2 |
||||
|
HMergeH1, /// (f16vec2 dest, f16vec2 src) -> f16vec2 |
||||
|
HPack2, /// (float a, float b) -> f16vec2 |
||||
|
|
||||
|
LogicalAssign, /// (bool& dst, bool src) -> void |
||||
|
LogicalAnd, /// (bool a, bool b) -> bool |
||||
|
LogicalOr, /// (bool a, bool b) -> bool |
||||
|
LogicalXor, /// (bool a, bool b) -> bool |
||||
|
LogicalNegate, /// (bool a) -> bool |
||||
|
LogicalPick2, /// (bool2 pair, uint index) -> bool |
||||
|
LogicalAll2, /// (bool2 a) -> bool |
||||
|
LogicalAny2, /// (bool2 a) -> bool |
||||
|
|
||||
|
LogicalFLessThan, /// (float a, float b) -> bool |
||||
|
LogicalFEqual, /// (float a, float b) -> bool |
||||
|
LogicalFLessEqual, /// (float a, float b) -> bool |
||||
|
LogicalFGreaterThan, /// (float a, float b) -> bool |
||||
|
LogicalFNotEqual, /// (float a, float b) -> bool |
||||
|
LogicalFGreaterEqual, /// (float a, float b) -> bool |
||||
|
LogicalFIsNan, /// (float a) -> bool |
||||
|
|
||||
|
LogicalILessThan, /// (int a, int b) -> bool |
||||
|
LogicalIEqual, /// (int a, int b) -> bool |
||||
|
LogicalILessEqual, /// (int a, int b) -> bool |
||||
|
LogicalIGreaterThan, /// (int a, int b) -> bool |
||||
|
LogicalINotEqual, /// (int a, int b) -> bool |
||||
|
LogicalIGreaterEqual, /// (int a, int b) -> bool |
||||
|
|
||||
|
LogicalULessThan, /// (uint a, uint b) -> bool |
||||
|
LogicalUEqual, /// (uint a, uint b) -> bool |
||||
|
LogicalULessEqual, /// (uint a, uint b) -> bool |
||||
|
LogicalUGreaterThan, /// (uint a, uint b) -> bool |
||||
|
LogicalUNotEqual, /// (uint a, uint b) -> bool |
||||
|
LogicalUGreaterEqual, /// (uint a, uint b) -> bool |
||||
|
|
||||
|
Logical2HLessThan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 |
||||
|
Logical2HEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 |
||||
|
Logical2HLessEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 |
||||
|
Logical2HGreaterThan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 |
||||
|
Logical2HNotEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 |
||||
|
Logical2HGreaterEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 |
||||
|
|
||||
|
F4Texture, /// (MetaTexture, float[N] coords, float[M] params) -> float4 |
||||
|
F4TextureLod, /// (MetaTexture, float[N] coords, float[M] params) -> float4 |
||||
|
F4TextureGather, /// (MetaTexture, float[N] coords, float[M] params) -> float4 |
||||
|
F4TextureQueryDimensions, /// (MetaTexture, float a) -> float4 |
||||
|
F4TextureQueryLod, /// (MetaTexture, float[N] coords) -> float4 |
||||
|
F4TexelFetch, /// (MetaTexture, int[N], int) -> float4 |
||||
|
|
||||
|
Branch, /// (uint branch_target) -> void |
||||
|
PushFlowStack, /// (uint branch_target) -> void |
||||
|
PopFlowStack, /// () -> void |
||||
|
Exit, /// () -> void |
||||
|
Discard, /// () -> void |
||||
|
|
||||
|
EmitVertex, /// () -> void |
||||
|
EndPrimitive, /// () -> void |
||||
|
|
||||
|
YNegate, /// () -> float |
||||
|
|
||||
|
Amount, |
||||
|
}; |
||||
|
|
||||
|
enum class InternalFlag { |
||||
|
Zero = 0, |
||||
|
Sign = 1, |
||||
|
Carry = 2, |
||||
|
Overflow = 3, |
||||
|
Amount = 4, |
||||
|
}; |
||||
|
|
||||
|
/// Describes the behaviour of code path of a given entry point and a return point. |
||||
|
enum class ExitMethod { |
||||
|
Undetermined, ///< Internal value. Only occur when analyzing JMP loop. |
||||
|
AlwaysReturn, ///< All code paths reach the return point. |
||||
|
Conditional, ///< Code path reaches the return point or an END instruction conditionally. |
||||
|
AlwaysEnd, ///< All code paths reach a END instruction. |
||||
|
}; |
||||
|
|
||||
|
class Sampler { |
||||
|
public: |
||||
|
explicit Sampler(std::size_t offset, std::size_t index, Tegra::Shader::TextureType type, |
||||
|
bool is_array, bool is_shadow) |
||||
|
: offset{offset}, index{index}, type{type}, is_array{is_array}, is_shadow{is_shadow} {} |
||||
|
|
||||
|
std::size_t GetOffset() const { |
||||
|
return offset; |
||||
|
} |
||||
|
|
||||
|
std::size_t GetIndex() const { |
||||
|
return index; |
||||
|
} |
||||
|
|
||||
|
Tegra::Shader::TextureType GetType() const { |
||||
|
return type; |
||||
|
} |
||||
|
|
||||
|
bool IsArray() const { |
||||
|
return is_array; |
||||
|
} |
||||
|
|
||||
|
bool IsShadow() const { |
||||
|
return is_shadow; |
||||
|
} |
||||
|
|
||||
|
bool operator<(const Sampler& rhs) const { |
||||
|
return std::tie(offset, index, type, is_array, is_shadow) < |
||||
|
std::tie(rhs.offset, rhs.index, rhs.type, rhs.is_array, rhs.is_shadow); |
||||
|
} |
||||
|
|
||||
|
private: |
||||
|
/// Offset in TSC memory from which to read the sampler object, as specified by the sampling |
||||
|
/// instruction. |
||||
|
std::size_t offset{}; |
||||
|
std::size_t index{}; ///< Value used to index into the generated GLSL sampler array. |
||||
|
Tegra::Shader::TextureType type{}; ///< The type used to sample this texture (Texture2D, etc) |
||||
|
bool is_array{}; ///< Whether the texture is being sampled as an array texture or not. |
||||
|
bool is_shadow{}; ///< Whether the texture is being sampled as a depth texture or not. |
||||
|
}; |
||||
|
|
||||
|
class ConstBuffer { |
||||
|
public: |
||||
|
void MarkAsUsed(u64 offset) { |
||||
|
max_offset = std::max(max_offset, static_cast<u32>(offset)); |
||||
|
} |
||||
|
|
||||
|
void MarkAsUsedIndirect() { |
||||
|
is_indirect = true; |
||||
|
} |
||||
|
|
||||
|
bool IsIndirect() const { |
||||
|
return is_indirect; |
||||
|
} |
||||
|
|
||||
|
u32 GetSize() const { |
||||
|
return max_offset + 1; |
||||
|
} |
||||
|
|
||||
|
private: |
||||
|
u32 max_offset{}; |
||||
|
bool is_indirect{}; |
||||
|
}; |
||||
|
|
||||
|
struct MetaArithmetic { |
||||
|
bool precise{}; |
||||
|
}; |
||||
|
|
||||
|
struct MetaHalfArithmetic { |
||||
|
bool precise{}; |
||||
|
std::array<Tegra::Shader::HalfType, 3> types = {Tegra::Shader::HalfType::H0_H1, |
||||
|
Tegra::Shader::HalfType::H0_H1, |
||||
|
Tegra::Shader::HalfType::H0_H1}; |
||||
|
}; |
||||
|
|
||||
|
struct MetaTexture { |
||||
|
const Sampler& sampler; |
||||
|
u32 element{}; |
||||
|
u32 coords_count{}; |
||||
|
std::optional<u32> array_index; |
||||
|
}; |
||||
|
|
||||
|
constexpr MetaArithmetic PRECISE = {true}; |
||||
|
constexpr MetaArithmetic NO_PRECISE = {false}; |
||||
|
constexpr MetaHalfArithmetic HALF_NO_PRECISE = {false}; |
||||
|
|
||||
|
using Meta = std::variant<MetaArithmetic, MetaHalfArithmetic, MetaTexture>; |
||||
|
|
||||
|
/// Holds any kind of operation that can be done in the IR |
||||
|
class OperationNode final { |
||||
|
public: |
||||
|
template <typename... T> |
||||
|
explicit constexpr OperationNode(OperationCode code) : code{code}, meta{} {} |
||||
|
|
||||
|
template <typename... T> |
||||
|
explicit constexpr OperationNode(OperationCode code, Meta&& meta) |
||||
|
: code{code}, meta{std::move(meta)} {} |
||||
|
|
||||
|
template <typename... T> |
||||
|
explicit constexpr OperationNode(OperationCode code, const T*... operands) |
||||
|
: OperationNode(code, {}, operands...) {} |
||||
|
|
||||
|
template <typename... T> |
||||
|
explicit constexpr OperationNode(OperationCode code, Meta&& meta, const T*... operands_) |
||||
|
: code{code}, meta{std::move(meta)} { |
||||
|
|
||||
|
auto operands_list = {operands_...}; |
||||
|
for (auto& operand : operands_list) { |
||||
|
operands.push_back(operand); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
explicit OperationNode(OperationCode code, Meta&& meta, std::vector<Node>&& operands) |
||||
|
: code{code}, meta{meta}, operands{std::move(operands)} {} |
||||
|
|
||||
|
explicit OperationNode(OperationCode code, std::vector<Node>&& operands) |
||||
|
: code{code}, meta{}, operands{std::move(operands)} {} |
||||
|
|
||||
|
OperationCode GetCode() const { |
||||
|
return code; |
||||
|
} |
||||
|
|
||||
|
const Meta& GetMeta() const { |
||||
|
return meta; |
||||
|
} |
||||
|
|
||||
|
std::size_t GetOperandsCount() const { |
||||
|
return operands.size(); |
||||
|
} |
||||
|
|
||||
|
Node operator[](std::size_t operand_index) const { |
||||
|
return operands.at(operand_index); |
||||
|
} |
||||
|
|
||||
|
private: |
||||
|
const OperationCode code; |
||||
|
const Meta meta; |
||||
|
std::vector<Node> operands; |
||||
|
}; |
||||
|
|
||||
|
/// Encloses inside any kind of node that returns a boolean conditionally-executed code |
||||
|
class ConditionalNode final { |
||||
|
public: |
||||
|
explicit ConditionalNode(Node condition, std::vector<Node>&& code) |
||||
|
: condition{condition}, code{std::move(code)} {} |
||||
|
|
||||
|
Node GetCondition() const { |
||||
|
return condition; |
||||
|
} |
||||
|
|
||||
|
const std::vector<Node>& GetCode() const { |
||||
|
return code; |
||||
|
} |
||||
|
|
||||
|
private: |
||||
|
const Node condition; ///< Condition to be satisfied |
||||
|
std::vector<Node> code; ///< Code to execute |
||||
|
}; |
||||
|
|
||||
|
/// A general purpose register |
||||
|
class GprNode final { |
||||
|
public: |
||||
|
explicit constexpr GprNode(Tegra::Shader::Register index) : index{index} {} |
||||
|
|
||||
|
u32 GetIndex() const { |
||||
|
return static_cast<u32>(index); |
||||
|
} |
||||
|
|
||||
|
private: |
||||
|
const Tegra::Shader::Register index; |
||||
|
}; |
||||
|
|
||||
|
/// A 32-bits value that represents an immediate value |
||||
|
class ImmediateNode final { |
||||
|
public: |
||||
|
explicit constexpr ImmediateNode(u32 value) : value{value} {} |
||||
|
|
||||
|
u32 GetValue() const { |
||||
|
return value; |
||||
|
} |
||||
|
|
||||
|
private: |
||||
|
const u32 value; |
||||
|
}; |
||||
|
|
||||
|
/// One of Maxwell's internal flags |
||||
|
class InternalFlagNode final { |
||||
|
public: |
||||
|
explicit constexpr InternalFlagNode(InternalFlag flag) : flag{flag} {} |
||||
|
|
||||
|
InternalFlag GetFlag() const { |
||||
|
return flag; |
||||
|
} |
||||
|
|
||||
|
private: |
||||
|
const InternalFlag flag; |
||||
|
}; |
||||
|
|
||||
|
/// A predicate register, it can be negated without aditional nodes |
||||
|
class PredicateNode final { |
||||
|
public: |
||||
|
explicit constexpr PredicateNode(Tegra::Shader::Pred index, bool negated) |
||||
|
: index{index}, negated{negated} {} |
||||
|
|
||||
|
Tegra::Shader::Pred GetIndex() const { |
||||
|
return index; |
||||
|
} |
||||
|
|
||||
|
bool IsNegated() const { |
||||
|
return negated; |
||||
|
} |
||||
|
|
||||
|
private: |
||||
|
const Tegra::Shader::Pred index; |
||||
|
const bool negated; |
||||
|
}; |
||||
|
|
||||
|
/// Attribute buffer memory (known as attributes or varyings in GLSL terms) |
||||
|
class AbufNode final { |
||||
|
public: |
||||
|
explicit constexpr AbufNode(Tegra::Shader::Attribute::Index index, u32 element, |
||||
|
const Tegra::Shader::IpaMode& input_mode, Node buffer = {}) |
||||
|
: input_mode{input_mode}, index{index}, element{element}, buffer{buffer} {} |
||||
|
|
||||
|
explicit constexpr AbufNode(Tegra::Shader::Attribute::Index index, u32 element, |
||||
|
Node buffer = {}) |
||||
|
: input_mode{}, index{index}, element{element}, buffer{buffer} {} |
||||
|
|
||||
|
Tegra::Shader::IpaMode GetInputMode() const { |
||||
|
return input_mode; |
||||
|
} |
||||
|
|
||||
|
Tegra::Shader::Attribute::Index GetIndex() const { |
||||
|
return index; |
||||
|
} |
||||
|
|
||||
|
u32 GetElement() const { |
||||
|
return element; |
||||
|
} |
||||
|
|
||||
|
Node GetBuffer() const { |
||||
|
return buffer; |
||||
|
} |
||||
|
|
||||
|
private: |
||||
|
const Tegra::Shader::IpaMode input_mode; |
||||
|
const Node buffer; |
||||
|
const Tegra::Shader::Attribute::Index index; |
||||
|
const u32 element; |
||||
|
}; |
||||
|
|
||||
|
/// Constant buffer node, usually mapped to uniform buffers in GLSL |
||||
|
class CbufNode final { |
||||
|
public: |
||||
|
explicit constexpr CbufNode(u32 index, Node offset) : index{index}, offset{offset} {} |
||||
|
|
||||
|
u32 GetIndex() const { |
||||
|
return index; |
||||
|
} |
||||
|
|
||||
|
Node GetOffset() const { |
||||
|
return offset; |
||||
|
} |
||||
|
|
||||
|
private: |
||||
|
const u32 index; |
||||
|
const Node offset; |
||||
|
}; |
||||
|
|
||||
|
/// Local memory node |
||||
|
class LmemNode final { |
||||
|
public: |
||||
|
explicit constexpr LmemNode(Node address) : address{address} {} |
||||
|
|
||||
|
Node GetAddress() const { |
||||
|
return address; |
||||
|
} |
||||
|
|
||||
|
private: |
||||
|
const Node address; |
||||
|
}; |
||||
|
|
||||
|
/// Global memory node |
||||
|
class GmemNode final { |
||||
|
public: |
||||
|
explicit constexpr GmemNode(Node address) : address{address} {} |
||||
|
|
||||
|
Node GetAddress() const { |
||||
|
return address; |
||||
|
} |
||||
|
|
||||
|
private: |
||||
|
const Node address; |
||||
|
}; |
||||
|
|
||||
|
/// Commentary, can be dropped |
||||
|
class CommentNode final { |
||||
|
public: |
||||
|
explicit CommentNode(std::string text) : text{std::move(text)} {} |
||||
|
|
||||
|
const std::string& GetText() const { |
||||
|
return text; |
||||
|
} |
||||
|
|
||||
|
private: |
||||
|
std::string text; |
||||
|
}; |
||||
|
|
||||
|
class ShaderIR final { |
||||
|
public: |
||||
|
explicit ShaderIR(const ProgramCode& program_code, u32 main_offset) |
||||
|
: program_code{program_code}, main_offset{main_offset} { |
||||
|
|
||||
|
Decode(); |
||||
|
} |
||||
|
|
||||
|
const std::map<u32, BasicBlock>& GetBasicBlocks() const { |
||||
|
return basic_blocks; |
||||
|
} |
||||
|
|
||||
|
const std::set<u32>& GetRegisters() const { |
||||
|
return used_registers; |
||||
|
} |
||||
|
|
||||
|
const std::set<Tegra::Shader::Pred>& GetPredicates() const { |
||||
|
return used_predicates; |
||||
|
} |
||||
|
|
||||
|
const std::map<Tegra::Shader::Attribute::Index, std::set<Tegra::Shader::IpaMode>>& |
||||
|
GetInputAttributes() const { |
||||
|
return used_input_attributes; |
||||
|
} |
||||
|
|
||||
|
const std::set<Tegra::Shader::Attribute::Index>& GetOutputAttributes() const { |
||||
|
return used_output_attributes; |
||||
|
} |
||||
|
|
||||
|
const std::map<u32, ConstBuffer>& GetConstantBuffers() const { |
||||
|
return used_cbufs; |
||||
|
} |
||||
|
|
||||
|
const std::set<Sampler>& GetSamplers() const { |
||||
|
return used_samplers; |
||||
|
} |
||||
|
|
||||
|
const std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances>& GetClipDistances() |
||||
|
const { |
||||
|
return used_clip_distances; |
||||
|
} |
||||
|
|
||||
|
std::size_t GetLength() const { |
||||
|
return static_cast<std::size_t>(coverage_end * sizeof(u64)); |
||||
|
} |
||||
|
|
||||
|
const Tegra::Shader::Header& GetHeader() const { |
||||
|
return header; |
||||
|
} |
||||
|
|
||||
|
private: |
||||
|
void Decode(); |
||||
|
|
||||
|
ExitMethod Scan(u32 begin, u32 end, std::set<u32>& labels); |
||||
|
|
||||
|
BasicBlock DecodeRange(u32 begin, u32 end); |
||||
|
|
||||
|
/** |
||||
|
* Decodes a single instruction from Tegra to IR. |
||||
|
* @param bb Basic block where the nodes will be written to. |
||||
|
* @param pc Program counter. Offset to decode. |
||||
|
* @return Next address to decode. |
||||
|
*/ |
||||
|
u32 DecodeInstr(BasicBlock& bb, u32 pc); |
||||
|
|
||||
|
u32 DecodeArithmetic(BasicBlock& bb, const BasicBlock& code, u32 pc); |
||||
|
u32 DecodeArithmeticImmediate(BasicBlock& bb, const BasicBlock& code, u32 pc); |
||||
|
u32 DecodeBfe(BasicBlock& bb, const BasicBlock& code, u32 pc); |
||||
|
u32 DecodeBfi(BasicBlock& bb, const BasicBlock& code, u32 pc); |
||||
|
u32 DecodeShift(BasicBlock& bb, const BasicBlock& code, u32 pc); |
||||
|
u32 DecodeArithmeticInteger(BasicBlock& bb, const BasicBlock& code, u32 pc); |
||||
|
u32 DecodeArithmeticIntegerImmediate(BasicBlock& bb, const BasicBlock& code, u32 pc); |
||||
|
u32 DecodeArithmeticHalf(BasicBlock& bb, const BasicBlock& code, u32 pc); |
||||
|
u32 DecodeArithmeticHalfImmediate(BasicBlock& bb, const BasicBlock& code, u32 pc); |
||||
|
u32 DecodeFfma(BasicBlock& bb, const BasicBlock& code, u32 pc); |
||||
|
u32 DecodeHfma2(BasicBlock& bb, const BasicBlock& code, u32 pc); |
||||
|
u32 DecodeConversion(BasicBlock& bb, const BasicBlock& code, u32 pc); |
||||
|
u32 DecodeMemory(BasicBlock& bb, const BasicBlock& code, u32 pc); |
||||
|
u32 DecodeFloatSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc); |
||||
|
u32 DecodeIntegerSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc); |
||||
|
u32 DecodeHalfSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc); |
||||
|
u32 DecodePredicateSetRegister(BasicBlock& bb, const BasicBlock& code, u32 pc); |
||||
|
u32 DecodePredicateSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc); |
||||
|
u32 DecodeRegisterSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc); |
||||
|
u32 DecodeFloatSet(BasicBlock& bb, const BasicBlock& code, u32 pc); |
||||
|
u32 DecodeIntegerSet(BasicBlock& bb, const BasicBlock& code, u32 pc); |
||||
|
u32 DecodeHalfSet(BasicBlock& bb, const BasicBlock& code, u32 pc); |
||||
|
u32 DecodeVideo(BasicBlock& bb, const BasicBlock& code, u32 pc); |
||||
|
u32 DecodeXmad(BasicBlock& bb, const BasicBlock& code, u32 pc); |
||||
|
u32 DecodeOther(BasicBlock& bb, const BasicBlock& code, u32 pc); |
||||
|
|
||||
|
/// Internalizes node's data and returns a managed pointer to a clone of that node |
||||
|
Node StoreNode(NodeData&& node_data); |
||||
|
|
||||
|
/// Creates a conditional node |
||||
|
Node Conditional(Node condition, std::vector<Node>&& code); |
||||
|
/// Creates a commentary |
||||
|
Node Comment(const std::string& text); |
||||
|
/// Creates an u32 immediate |
||||
|
Node Immediate(u32 value); |
||||
|
/// Creates a s32 immediate |
||||
|
Node Immediate(s32 value) { |
||||
|
return Immediate(static_cast<u32>(value)); |
||||
|
} |
||||
|
/// Creates a f32 immediate |
||||
|
Node Immediate(f32 value) { |
||||
|
u32 integral; |
||||
|
std::memcpy(&integral, &value, sizeof(u32)); |
||||
|
return Immediate(integral); |
||||
|
} |
||||
|
|
||||
|
/// Generates a node for a passed register. |
||||
|
Node GetRegister(Tegra::Shader::Register reg); |
||||
|
/// Generates a node representing a 19-bit immediate value |
||||
|
Node GetImmediate19(Tegra::Shader::Instruction instr); |
||||
|
/// Generates a node representing a 32-bit immediate value |
||||
|
Node GetImmediate32(Tegra::Shader::Instruction instr); |
||||
|
/// Generates a node representing a constant buffer |
||||
|
Node GetConstBuffer(u64 index, u64 offset); |
||||
|
/// Generates a node representing a constant buffer with a variadic offset |
||||
|
Node GetConstBufferIndirect(u64 index, u64 offset, Node node); |
||||
|
/// Generates a node for a passed predicate. It can be optionally negated |
||||
|
Node GetPredicate(u64 pred, bool negated = false); |
||||
|
/// Generates a predicate node for an immediate true or false value |
||||
|
Node GetPredicate(bool immediate); |
||||
|
/// Generates a node representing an input atttribute. Keeps track of used attributes. |
||||
|
Node GetInputAttribute(Tegra::Shader::Attribute::Index index, u64 element, |
||||
|
const Tegra::Shader::IpaMode& input_mode, Node buffer = {}); |
||||
|
/// Generates a node representing an output atttribute. Keeps track of used attributes. |
||||
|
Node GetOutputAttribute(Tegra::Shader::Attribute::Index index, u64 element, Node buffer); |
||||
|
/// Generates a node representing an internal flag |
||||
|
Node GetInternalFlag(InternalFlag flag, bool negated = false); |
||||
|
/// Generates a node representing a local memory address |
||||
|
Node GetLocalMemory(Node address); |
||||
|
/// Generates a temporal, internally it uses a post-RZ register |
||||
|
Node GetTemporal(u32 id); |
||||
|
|
||||
|
/// Sets a register. src value must be a number-evaluated node. |
||||
|
void SetRegister(BasicBlock& bb, Tegra::Shader::Register dest, Node src); |
||||
|
/// Sets a predicate. src value must be a bool-evaluated node |
||||
|
void SetPredicate(BasicBlock& bb, u64 dest, Node src); |
||||
|
/// Sets an internal flag. src value must be a bool-evaluated node |
||||
|
void SetInternalFlag(BasicBlock& bb, InternalFlag flag, Node value); |
||||
|
/// Sets a local memory address. address and value must be a number-evaluated node |
||||
|
void SetLocalMemory(BasicBlock& bb, Node address, Node value); |
||||
|
/// Sets a temporal. Internally it uses a post-RZ register |
||||
|
void SetTemporal(BasicBlock& bb, u32 id, Node value); |
||||
|
|
||||
|
/// Sets internal flags from a float |
||||
|
void SetInternalFlagsFromFloat(BasicBlock& bb, Node value, bool sets_cc = true); |
||||
|
/// Sets internal flags from an integer |
||||
|
void SetInternalFlagsFromInteger(BasicBlock& bb, Node value, bool sets_cc = true); |
||||
|
|
||||
|
/// Conditionally absolute/negated float. Absolute is applied first |
||||
|
Node GetOperandAbsNegFloat(Node value, bool absolute, bool negate); |
||||
|
/// Conditionally saturates a float |
||||
|
Node GetSaturatedFloat(Node value, bool saturate = true); |
||||
|
|
||||
|
/// Converts an integer to different sizes. |
||||
|
Node ConvertIntegerSize(Node value, Tegra::Shader::Register::Size size, bool is_signed); |
||||
|
/// Conditionally absolute/negated integer. Absolute is applied first |
||||
|
Node GetOperandAbsNegInteger(Node value, bool absolute, bool negate, bool is_signed); |
||||
|
|
||||
|
/// Unpacks a half immediate from an instruction |
||||
|
Node UnpackHalfImmediate(Tegra::Shader::Instruction instr, bool has_negation); |
||||
|
/// Merges a half pair into another value |
||||
|
Node HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge); |
||||
|
/// Conditionally absolute/negated half float pair. Absolute is applied first |
||||
|
Node GetOperandAbsNegHalf(Node value, bool absolute, bool negate); |
||||
|
|
||||
|
/// Returns a predicate comparing two floats |
||||
|
Node GetPredicateComparisonFloat(Tegra::Shader::PredCondition condition, Node op_a, Node op_b); |
||||
|
/// Returns a predicate comparing two integers |
||||
|
Node GetPredicateComparisonInteger(Tegra::Shader::PredCondition condition, bool is_signed, |
||||
|
Node op_a, Node op_b); |
||||
|
/// Returns a predicate comparing two half floats. meta consumes how both pairs will be compared |
||||
|
Node GetPredicateComparisonHalf(Tegra::Shader::PredCondition condition, |
||||
|
const MetaHalfArithmetic& meta, Node op_a, Node op_b); |
||||
|
|
||||
|
/// Returns a predicate combiner operation |
||||
|
OperationCode GetPredicateCombiner(Tegra::Shader::PredOperation operation); |
||||
|
|
||||
|
/// Returns a condition code evaluated from internal flags |
||||
|
Node GetConditionCode(Tegra::Shader::ConditionCode cc); |
||||
|
|
||||
|
/// Accesses a texture sampler |
||||
|
const Sampler& GetSampler(const Tegra::Shader::Sampler& sampler, |
||||
|
Tegra::Shader::TextureType type, bool is_array, bool is_shadow); |
||||
|
|
||||
|
/// Extracts a sequence of bits from a node |
||||
|
Node BitfieldExtract(Node value, u32 offset, u32 bits); |
||||
|
|
||||
|
void WriteTexInstructionFloat(BasicBlock& bb, Tegra::Shader::Instruction instr, |
||||
|
const Node4& components); |
||||
|
|
||||
|
void WriteTexsInstructionFloat(BasicBlock& bb, Tegra::Shader::Instruction instr, |
||||
|
const Node4& components); |
||||
|
void WriteTexsInstructionHalfFloat(BasicBlock& bb, Tegra::Shader::Instruction instr, |
||||
|
const Node4& components); |
||||
|
|
||||
|
Node4 GetTexCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, |
||||
|
Tegra::Shader::TextureProcessMode process_mode, bool depth_compare, |
||||
|
bool is_array); |
||||
|
|
||||
|
Node4 GetTexsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, |
||||
|
Tegra::Shader::TextureProcessMode process_mode, bool depth_compare, |
||||
|
bool is_array); |
||||
|
|
||||
|
Node4 GetTld4Code(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, |
||||
|
bool depth_compare, bool is_array); |
||||
|
|
||||
|
Node4 GetTldsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, |
||||
|
bool is_array); |
||||
|
|
||||
|
std::tuple<std::size_t, std::size_t> ValidateAndGetCoordinateElement( |
||||
|
Tegra::Shader::TextureType texture_type, bool depth_compare, bool is_array, |
||||
|
bool lod_bias_enabled, std::size_t max_coords, std::size_t max_inputs); |
||||
|
|
||||
|
Node4 GetTextureCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, |
||||
|
Tegra::Shader::TextureProcessMode process_mode, bool depth_compare, |
||||
|
bool is_array, std::size_t array_offset, std::size_t bias_offset, |
||||
|
std::vector<Node>&& coords); |
||||
|
|
||||
|
Node GetVideoOperand(Node op, bool is_chunk, bool is_signed, Tegra::Shader::VideoType type, |
||||
|
u64 byte_height); |
||||
|
|
||||
|
void WriteLogicOperation(BasicBlock& bb, Tegra::Shader::Register dest, |
||||
|
Tegra::Shader::LogicOperation logic_op, Node op_a, Node op_b, |
||||
|
Tegra::Shader::PredicateResultMode predicate_mode, |
||||
|
Tegra::Shader::Pred predicate, bool sets_cc); |
||||
|
void WriteLop3Instruction(BasicBlock& bb, Tegra::Shader::Register dest, Node op_a, Node op_b, |
||||
|
Node op_c, Node imm_lut, bool sets_cc); |
||||
|
|
||||
|
template <typename... T> |
||||
|
Node Operation(OperationCode code, const T*... operands) { |
||||
|
return StoreNode(OperationNode(code, operands...)); |
||||
|
} |
||||
|
|
||||
|
template <typename... T> |
||||
|
Node Operation(OperationCode code, Meta&& meta, const T*... operands) { |
||||
|
return StoreNode(OperationNode(code, std::move(meta), operands...)); |
||||
|
} |
||||
|
|
||||
|
template <typename... T> |
||||
|
Node Operation(OperationCode code, std::vector<Node>&& operands) { |
||||
|
return StoreNode(OperationNode(code, std::move(operands))); |
||||
|
} |
||||
|
|
||||
|
template <typename... T> |
||||
|
Node Operation(OperationCode code, Meta&& meta, std::vector<Node>&& operands) { |
||||
|
return StoreNode(OperationNode(code, std::move(meta), std::move(operands))); |
||||
|
} |
||||
|
|
||||
|
template <typename... T> |
||||
|
Node SignedOperation(OperationCode code, bool is_signed, const T*... operands) { |
||||
|
return StoreNode(OperationNode(SignedToUnsignedCode(code, is_signed), operands...)); |
||||
|
} |
||||
|
|
||||
|
template <typename... T> |
||||
|
Node SignedOperation(OperationCode code, bool is_signed, Meta&& meta, const T*... operands) { |
||||
|
return StoreNode( |
||||
|
OperationNode(SignedToUnsignedCode(code, is_signed), std::move(meta), operands...)); |
||||
|
} |
||||
|
|
||||
|
static OperationCode SignedToUnsignedCode(OperationCode operation_code, bool is_signed); |
||||
|
|
||||
|
const ProgramCode& program_code; |
||||
|
const u32 main_offset; |
||||
|
|
||||
|
u32 coverage_begin{}; |
||||
|
u32 coverage_end{}; |
||||
|
std::map<std::pair<u32, u32>, ExitMethod> exit_method_map; |
||||
|
|
||||
|
std::map<u32, BasicBlock> basic_blocks; |
||||
|
|
||||
|
std::vector<std::unique_ptr<NodeData>> stored_nodes; |
||||
|
|
||||
|
std::set<u32> used_registers; |
||||
|
std::set<Tegra::Shader::Pred> used_predicates; |
||||
|
std::map<Tegra::Shader::Attribute::Index, std::set<Tegra::Shader::IpaMode>> |
||||
|
used_input_attributes; |
||||
|
std::set<Tegra::Shader::Attribute::Index> used_output_attributes; |
||||
|
std::map<u32, ConstBuffer> used_cbufs; |
||||
|
std::set<Sampler> used_samplers; |
||||
|
std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances> used_clip_distances{}; |
||||
|
|
||||
|
Tegra::Shader::Header header; |
||||
|
}; |
||||
|
|
||||
|
} // namespace VideoCommon::Shader |
||||
Write
Preview
Loading…
Cancel
Save
Reference in new issue