Browse Source
Merge pull request #1927 from ReinUsesLisp/shader-ir
Merge pull request #1927 from ReinUsesLisp/shader-ir
video_core: Replace gl_shader_decompiler with an IR based decompilernce_cpp
committed by
GitHub
39 changed files with 5549 additions and 3860 deletions
-
28src/video_core/CMakeLists.txt
-
10src/video_core/engines/shader_bytecode.h
-
2src/video_core/engines/shader_header.h
-
4src/video_core/renderer_opengl/gl_rasterizer.cpp
-
8src/video_core/renderer_opengl/gl_shader_cache.cpp
-
1src/video_core/renderer_opengl/gl_shader_cache.h
-
4846src/video_core/renderer_opengl/gl_shader_decompiler.cpp
-
81src/video_core/renderer_opengl/gl_shader_decompiler.h
-
104src/video_core/renderer_opengl/gl_shader_gen.cpp
-
158src/video_core/renderer_opengl/gl_shader_gen.h
-
206src/video_core/shader/decode.cpp
-
155src/video_core/shader/decode/arithmetic.cpp
-
70src/video_core/shader/decode/arithmetic_half.cpp
-
51src/video_core/shader/decode/arithmetic_half_immediate.cpp
-
52src/video_core/shader/decode/arithmetic_immediate.cpp
-
287src/video_core/shader/decode/arithmetic_integer.cpp
-
96src/video_core/shader/decode/arithmetic_integer_immediate.cpp
-
49src/video_core/shader/decode/bfe.cpp
-
41src/video_core/shader/decode/bfi.cpp
-
149src/video_core/shader/decode/conversion.cpp
-
0src/video_core/shader/decode/decode_integer_set.cpp
-
59src/video_core/shader/decode/ffma.cpp
-
58src/video_core/shader/decode/float_set.cpp
-
56src/video_core/shader/decode/float_set_predicate.cpp
-
67src/video_core/shader/decode/half_set.cpp
-
62src/video_core/shader/decode/half_set_predicate.cpp
-
76src/video_core/shader/decode/hfma2.cpp
-
50src/video_core/shader/decode/integer_set.cpp
-
53src/video_core/shader/decode/integer_set_predicate.cpp
-
688src/video_core/shader/decode/memory.cpp
-
178src/video_core/shader/decode/other.cpp
-
67src/video_core/shader/decode/predicate_set_predicate.cpp
-
46src/video_core/shader/decode/predicate_set_register.cpp
-
51src/video_core/shader/decode/register_set_predicate.cpp
-
55src/video_core/shader/decode/shift.cpp
-
111src/video_core/shader/decode/video.cpp
-
97src/video_core/shader/decode/xmad.cpp
-
444src/video_core/shader/shader_ir.cpp
-
793src/video_core/shader/shader_ir.h
4846
src/video_core/renderer_opengl/gl_shader_decompiler.cpp
File diff suppressed because it is too large
View File
File diff suppressed because it is too large
View File
@ -0,0 +1,206 @@ |
|||
// Copyright 2018 yuzu Emulator Project
|
|||
// Licensed under GPLv2 or any later version
|
|||
// Refer to the license.txt file included.
|
|||
|
|||
#include <cstring>
|
|||
#include <set>
|
|||
|
|||
#include <fmt/format.h>
|
|||
|
|||
#include "common/assert.h"
|
|||
#include "common/common_types.h"
|
|||
#include "video_core/engines/shader_bytecode.h"
|
|||
#include "video_core/engines/shader_header.h"
|
|||
#include "video_core/shader/shader_ir.h"
|
|||
|
|||
namespace VideoCommon::Shader { |
|||
|
|||
using Tegra::Shader::Instruction; |
|||
using Tegra::Shader::OpCode; |
|||
|
|||
namespace { |
|||
|
|||
/// Merges exit method of two parallel branches.
|
|||
constexpr ExitMethod ParallelExit(ExitMethod a, ExitMethod b) { |
|||
if (a == ExitMethod::Undetermined) { |
|||
return b; |
|||
} |
|||
if (b == ExitMethod::Undetermined) { |
|||
return a; |
|||
} |
|||
if (a == b) { |
|||
return a; |
|||
} |
|||
return ExitMethod::Conditional; |
|||
} |
|||
|
|||
/**
|
|||
* Returns whether the instruction at the specified offset is a 'sched' instruction. |
|||
* Sched instructions always appear before a sequence of 3 instructions. |
|||
*/ |
|||
constexpr bool IsSchedInstruction(u32 offset, u32 main_offset) { |
|||
constexpr u32 SchedPeriod = 4; |
|||
u32 absolute_offset = offset - main_offset; |
|||
|
|||
return (absolute_offset % SchedPeriod) == 0; |
|||
} |
|||
|
|||
} // namespace
|
|||
|
|||
void ShaderIR::Decode() { |
|||
std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header)); |
|||
|
|||
std::set<u32> labels; |
|||
const ExitMethod exit_method = Scan(main_offset, MAX_PROGRAM_LENGTH, labels); |
|||
if (exit_method != ExitMethod::AlwaysEnd) { |
|||
UNREACHABLE_MSG("Program does not always end"); |
|||
} |
|||
|
|||
if (labels.empty()) { |
|||
basic_blocks.insert({main_offset, DecodeRange(main_offset, MAX_PROGRAM_LENGTH)}); |
|||
return; |
|||
} |
|||
|
|||
labels.insert(main_offset); |
|||
|
|||
for (const u32 label : labels) { |
|||
const auto next_it = labels.lower_bound(label + 1); |
|||
const u32 next_label = next_it == labels.end() ? MAX_PROGRAM_LENGTH : *next_it; |
|||
|
|||
basic_blocks.insert({label, DecodeRange(label, next_label)}); |
|||
} |
|||
} |
|||
|
|||
ExitMethod ShaderIR::Scan(u32 begin, u32 end, std::set<u32>& labels) { |
|||
const auto [iter, inserted] = |
|||
exit_method_map.emplace(std::make_pair(begin, end), ExitMethod::Undetermined); |
|||
ExitMethod& exit_method = iter->second; |
|||
if (!inserted) |
|||
return exit_method; |
|||
|
|||
for (u32 offset = begin; offset != end && offset != MAX_PROGRAM_LENGTH; ++offset) { |
|||
coverage_begin = std::min(coverage_begin, offset); |
|||
coverage_end = std::max(coverage_end, offset + 1); |
|||
|
|||
const Instruction instr = {program_code[offset]}; |
|||
const auto opcode = OpCode::Decode(instr); |
|||
if (!opcode) |
|||
continue; |
|||
switch (opcode->get().GetId()) { |
|||
case OpCode::Id::EXIT: { |
|||
// The EXIT instruction can be predicated, which means that the shader can conditionally
|
|||
// end on this instruction. We have to consider the case where the condition is not met
|
|||
// and check the exit method of that other basic block.
|
|||
using Tegra::Shader::Pred; |
|||
if (instr.pred.pred_index == static_cast<u64>(Pred::UnusedIndex)) { |
|||
return exit_method = ExitMethod::AlwaysEnd; |
|||
} else { |
|||
const ExitMethod not_met = Scan(offset + 1, end, labels); |
|||
return exit_method = ParallelExit(ExitMethod::AlwaysEnd, not_met); |
|||
} |
|||
} |
|||
case OpCode::Id::BRA: { |
|||
const u32 target = offset + instr.bra.GetBranchTarget(); |
|||
labels.insert(target); |
|||
const ExitMethod no_jmp = Scan(offset + 1, end, labels); |
|||
const ExitMethod jmp = Scan(target, end, labels); |
|||
return exit_method = ParallelExit(no_jmp, jmp); |
|||
} |
|||
case OpCode::Id::SSY: |
|||
case OpCode::Id::PBK: { |
|||
// The SSY and PBK use a similar encoding as the BRA instruction.
|
|||
UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, |
|||
"Constant buffer branching is not supported"); |
|||
const u32 target = offset + instr.bra.GetBranchTarget(); |
|||
labels.insert(target); |
|||
// Continue scanning for an exit method.
|
|||
break; |
|||
} |
|||
} |
|||
} |
|||
return exit_method = ExitMethod::AlwaysReturn; |
|||
} |
|||
|
|||
BasicBlock ShaderIR::DecodeRange(u32 begin, u32 end) { |
|||
BasicBlock basic_block; |
|||
for (u32 pc = begin; pc < (begin > end ? MAX_PROGRAM_LENGTH : end);) { |
|||
pc = DecodeInstr(basic_block, pc); |
|||
} |
|||
return std::move(basic_block); |
|||
} |
|||
|
|||
u32 ShaderIR::DecodeInstr(BasicBlock& bb, u32 pc) { |
|||
// Ignore sched instructions when generating code.
|
|||
if (IsSchedInstruction(pc, main_offset)) { |
|||
return pc + 1; |
|||
} |
|||
|
|||
const Instruction instr = {program_code[pc]}; |
|||
const auto opcode = OpCode::Decode(instr); |
|||
|
|||
// Decoding failure
|
|||
if (!opcode) { |
|||
UNIMPLEMENTED_MSG("Unhandled instruction: {0:x}", instr.value); |
|||
return pc + 1; |
|||
} |
|||
|
|||
bb.push_back( |
|||
Comment(fmt::format("{}: {} (0x{:016x})", pc, opcode->get().GetName(), instr.value))); |
|||
|
|||
using Tegra::Shader::Pred; |
|||
UNIMPLEMENTED_IF_MSG(instr.pred.full_pred == Pred::NeverExecute, |
|||
"NeverExecute predicate not implemented"); |
|||
|
|||
static const std::map<OpCode::Type, u32 (ShaderIR::*)(BasicBlock&, const BasicBlock&, u32)> |
|||
decoders = { |
|||
{OpCode::Type::Arithmetic, &ShaderIR::DecodeArithmetic}, |
|||
{OpCode::Type::ArithmeticImmediate, &ShaderIR::DecodeArithmeticImmediate}, |
|||
{OpCode::Type::Bfe, &ShaderIR::DecodeBfe}, |
|||
{OpCode::Type::Bfi, &ShaderIR::DecodeBfi}, |
|||
{OpCode::Type::Shift, &ShaderIR::DecodeShift}, |
|||
{OpCode::Type::ArithmeticInteger, &ShaderIR::DecodeArithmeticInteger}, |
|||
{OpCode::Type::ArithmeticIntegerImmediate, &ShaderIR::DecodeArithmeticIntegerImmediate}, |
|||
{OpCode::Type::ArithmeticHalf, &ShaderIR::DecodeArithmeticHalf}, |
|||
{OpCode::Type::ArithmeticHalfImmediate, &ShaderIR::DecodeArithmeticHalfImmediate}, |
|||
{OpCode::Type::Ffma, &ShaderIR::DecodeFfma}, |
|||
{OpCode::Type::Hfma2, &ShaderIR::DecodeHfma2}, |
|||
{OpCode::Type::Conversion, &ShaderIR::DecodeConversion}, |
|||
{OpCode::Type::Memory, &ShaderIR::DecodeMemory}, |
|||
{OpCode::Type::FloatSetPredicate, &ShaderIR::DecodeFloatSetPredicate}, |
|||
{OpCode::Type::IntegerSetPredicate, &ShaderIR::DecodeIntegerSetPredicate}, |
|||
{OpCode::Type::HalfSetPredicate, &ShaderIR::DecodeHalfSetPredicate}, |
|||
{OpCode::Type::PredicateSetRegister, &ShaderIR::DecodePredicateSetRegister}, |
|||
{OpCode::Type::PredicateSetPredicate, &ShaderIR::DecodePredicateSetPredicate}, |
|||
{OpCode::Type::RegisterSetPredicate, &ShaderIR::DecodeRegisterSetPredicate}, |
|||
{OpCode::Type::FloatSet, &ShaderIR::DecodeFloatSet}, |
|||
{OpCode::Type::IntegerSet, &ShaderIR::DecodeIntegerSet}, |
|||
{OpCode::Type::HalfSet, &ShaderIR::DecodeHalfSet}, |
|||
{OpCode::Type::Video, &ShaderIR::DecodeVideo}, |
|||
{OpCode::Type::Xmad, &ShaderIR::DecodeXmad}, |
|||
}; |
|||
|
|||
std::vector<Node> tmp_block; |
|||
if (const auto decoder = decoders.find(opcode->get().GetType()); decoder != decoders.end()) { |
|||
pc = (this->*decoder->second)(tmp_block, bb, pc); |
|||
} else { |
|||
pc = DecodeOther(tmp_block, bb, pc); |
|||
} |
|||
|
|||
// Some instructions (like SSY) don't have a predicate field, they are always unconditionally
|
|||
// executed.
|
|||
const bool can_be_predicated = OpCode::IsPredicatedInstruction(opcode->get().GetId()); |
|||
const auto pred_index = static_cast<u32>(instr.pred.pred_index); |
|||
|
|||
if (can_be_predicated && pred_index != static_cast<u32>(Pred::UnusedIndex)) { |
|||
bb.push_back( |
|||
Conditional(GetPredicate(pred_index, instr.negate_pred != 0), std::move(tmp_block))); |
|||
} else { |
|||
for (auto& node : tmp_block) { |
|||
bb.push_back(std::move(node)); |
|||
} |
|||
} |
|||
|
|||
return pc + 1; |
|||
} |
|||
|
|||
} // namespace VideoCommon::Shader
|
|||
@ -0,0 +1,155 @@ |
|||
// Copyright 2018 yuzu Emulator Project
|
|||
// Licensed under GPLv2 or any later version
|
|||
// Refer to the license.txt file included.
|
|||
|
|||
#include "common/assert.h"
|
|||
#include "common/common_types.h"
|
|||
#include "video_core/engines/shader_bytecode.h"
|
|||
#include "video_core/shader/shader_ir.h"
|
|||
|
|||
namespace VideoCommon::Shader { |
|||
|
|||
using Tegra::Shader::Instruction; |
|||
using Tegra::Shader::OpCode; |
|||
using Tegra::Shader::SubOp; |
|||
|
|||
u32 ShaderIR::DecodeArithmetic(BasicBlock& bb, const BasicBlock& code, u32 pc) { |
|||
const Instruction instr = {program_code[pc]}; |
|||
const auto opcode = OpCode::Decode(instr); |
|||
|
|||
Node op_a = GetRegister(instr.gpr8); |
|||
|
|||
Node op_b = [&]() -> Node { |
|||
if (instr.is_b_imm) { |
|||
return GetImmediate19(instr); |
|||
} else if (instr.is_b_gpr) { |
|||
return GetRegister(instr.gpr20); |
|||
} else { |
|||
return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset); |
|||
} |
|||
}(); |
|||
|
|||
switch (opcode->get().GetId()) { |
|||
case OpCode::Id::MOV_C: |
|||
case OpCode::Id::MOV_R: { |
|||
// MOV does not have neither 'abs' nor 'neg' bits.
|
|||
SetRegister(bb, instr.gpr0, op_b); |
|||
break; |
|||
} |
|||
case OpCode::Id::FMUL_C: |
|||
case OpCode::Id::FMUL_R: |
|||
case OpCode::Id::FMUL_IMM: { |
|||
// FMUL does not have 'abs' bits and only the second operand has a 'neg' bit.
|
|||
UNIMPLEMENTED_IF_MSG(instr.fmul.tab5cb8_2 != 0, "FMUL tab5cb8_2({}) is not implemented", |
|||
instr.fmul.tab5cb8_2.Value()); |
|||
UNIMPLEMENTED_IF_MSG( |
|||
instr.fmul.tab5c68_0 != 1, "FMUL tab5cb8_0({}) is not implemented", |
|||
instr.fmul.tab5c68_0.Value()); // SMO typical sends 1 here which seems to be the default
|
|||
|
|||
op_b = GetOperandAbsNegFloat(op_b, false, instr.fmul.negate_b); |
|||
|
|||
// TODO(Rodrigo): Should precise be used when there's a postfactor?
|
|||
Node value = Operation(OperationCode::FMul, PRECISE, op_a, op_b); |
|||
|
|||
if (instr.fmul.postfactor != 0) { |
|||
auto postfactor = static_cast<s32>(instr.fmul.postfactor); |
|||
|
|||
// Postfactor encoded as 3-bit 1's complement in instruction, interpreted with below
|
|||
// logic.
|
|||
if (postfactor >= 4) { |
|||
postfactor = 7 - postfactor; |
|||
} else { |
|||
postfactor = 0 - postfactor; |
|||
} |
|||
|
|||
if (postfactor > 0) { |
|||
value = Operation(OperationCode::FMul, NO_PRECISE, value, |
|||
Immediate(static_cast<f32>(1 << postfactor))); |
|||
} else { |
|||
value = Operation(OperationCode::FDiv, NO_PRECISE, value, |
|||
Immediate(static_cast<f32>(1 << -postfactor))); |
|||
} |
|||
} |
|||
|
|||
value = GetSaturatedFloat(value, instr.alu.saturate_d); |
|||
|
|||
SetInternalFlagsFromFloat(bb, value, instr.generates_cc); |
|||
SetRegister(bb, instr.gpr0, value); |
|||
break; |
|||
} |
|||
case OpCode::Id::FADD_C: |
|||
case OpCode::Id::FADD_R: |
|||
case OpCode::Id::FADD_IMM: { |
|||
op_a = GetOperandAbsNegFloat(op_a, instr.alu.abs_a, instr.alu.negate_a); |
|||
op_b = GetOperandAbsNegFloat(op_b, instr.alu.abs_b, instr.alu.negate_b); |
|||
|
|||
Node value = Operation(OperationCode::FAdd, PRECISE, op_a, op_b); |
|||
value = GetSaturatedFloat(value, instr.alu.saturate_d); |
|||
|
|||
SetInternalFlagsFromFloat(bb, value, instr.generates_cc); |
|||
SetRegister(bb, instr.gpr0, value); |
|||
break; |
|||
} |
|||
case OpCode::Id::MUFU: { |
|||
op_a = GetOperandAbsNegFloat(op_a, instr.alu.abs_a, instr.alu.negate_a); |
|||
|
|||
Node value = [&]() { |
|||
switch (instr.sub_op) { |
|||
case SubOp::Cos: |
|||
return Operation(OperationCode::FCos, PRECISE, op_a); |
|||
case SubOp::Sin: |
|||
return Operation(OperationCode::FSin, PRECISE, op_a); |
|||
case SubOp::Ex2: |
|||
return Operation(OperationCode::FExp2, PRECISE, op_a); |
|||
case SubOp::Lg2: |
|||
return Operation(OperationCode::FLog2, PRECISE, op_a); |
|||
case SubOp::Rcp: |
|||
return Operation(OperationCode::FDiv, PRECISE, Immediate(1.0f), op_a); |
|||
case SubOp::Rsq: |
|||
return Operation(OperationCode::FInverseSqrt, PRECISE, op_a); |
|||
case SubOp::Sqrt: |
|||
return Operation(OperationCode::FSqrt, PRECISE, op_a); |
|||
default: |
|||
UNIMPLEMENTED_MSG("Unhandled MUFU sub op={0:x}", |
|||
static_cast<unsigned>(instr.sub_op.Value())); |
|||
return Immediate(0); |
|||
} |
|||
}(); |
|||
value = GetSaturatedFloat(value, instr.alu.saturate_d); |
|||
|
|||
SetRegister(bb, instr.gpr0, value); |
|||
break; |
|||
} |
|||
case OpCode::Id::FMNMX_C: |
|||
case OpCode::Id::FMNMX_R: |
|||
case OpCode::Id::FMNMX_IMM: { |
|||
op_a = GetOperandAbsNegFloat(op_a, instr.alu.abs_a, instr.alu.negate_a); |
|||
op_b = GetOperandAbsNegFloat(op_b, instr.alu.abs_b, instr.alu.negate_b); |
|||
|
|||
const Node condition = GetPredicate(instr.alu.fmnmx.pred, instr.alu.fmnmx.negate_pred != 0); |
|||
|
|||
const Node min = Operation(OperationCode::FMin, NO_PRECISE, op_a, op_b); |
|||
const Node max = Operation(OperationCode::FMax, NO_PRECISE, op_a, op_b); |
|||
const Node value = Operation(OperationCode::Select, NO_PRECISE, condition, min, max); |
|||
|
|||
SetInternalFlagsFromFloat(bb, value, instr.generates_cc); |
|||
SetRegister(bb, instr.gpr0, value); |
|||
break; |
|||
} |
|||
case OpCode::Id::RRO_C: |
|||
case OpCode::Id::RRO_R: |
|||
case OpCode::Id::RRO_IMM: { |
|||
// Currently RRO is only implemented as a register move.
|
|||
op_b = GetOperandAbsNegFloat(op_b, instr.alu.abs_b, instr.alu.negate_b); |
|||
SetRegister(bb, instr.gpr0, op_b); |
|||
LOG_WARNING(HW_GPU, "RRO instruction is incomplete"); |
|||
break; |
|||
} |
|||
default: |
|||
UNIMPLEMENTED_MSG("Unhandled arithmetic instruction: {}", opcode->get().GetName()); |
|||
} |
|||
|
|||
return pc; |
|||
} |
|||
|
|||
} // namespace VideoCommon::Shader
|
|||
@ -0,0 +1,70 @@ |
|||
// Copyright 2018 yuzu Emulator Project
|
|||
// Licensed under GPLv2 or any later version
|
|||
// Refer to the license.txt file included.
|
|||
|
|||
#include "common/assert.h"
|
|||
#include "common/common_types.h"
|
|||
#include "video_core/engines/shader_bytecode.h"
|
|||
#include "video_core/shader/shader_ir.h"
|
|||
|
|||
namespace VideoCommon::Shader { |
|||
|
|||
using Tegra::Shader::Instruction; |
|||
using Tegra::Shader::OpCode; |
|||
|
|||
u32 ShaderIR::DecodeArithmeticHalf(BasicBlock& bb, const BasicBlock& code, u32 pc) { |
|||
const Instruction instr = {program_code[pc]}; |
|||
const auto opcode = OpCode::Decode(instr); |
|||
|
|||
if (opcode->get().GetId() == OpCode::Id::HADD2_C || |
|||
opcode->get().GetId() == OpCode::Id::HADD2_R) { |
|||
UNIMPLEMENTED_IF(instr.alu_half.ftz != 0); |
|||
} |
|||
UNIMPLEMENTED_IF_MSG(instr.alu_half.saturate != 0, "Half float saturation not implemented"); |
|||
|
|||
const bool negate_a = |
|||
opcode->get().GetId() != OpCode::Id::HMUL2_R && instr.alu_half.negate_a != 0; |
|||
const bool negate_b = |
|||
opcode->get().GetId() != OpCode::Id::HMUL2_C && instr.alu_half.negate_b != 0; |
|||
|
|||
const Node op_a = GetOperandAbsNegHalf(GetRegister(instr.gpr8), instr.alu_half.abs_a, negate_a); |
|||
|
|||
// instr.alu_half.type_a
|
|||
|
|||
Node op_b = [&]() { |
|||
switch (opcode->get().GetId()) { |
|||
case OpCode::Id::HADD2_C: |
|||
case OpCode::Id::HMUL2_C: |
|||
return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset); |
|||
case OpCode::Id::HADD2_R: |
|||
case OpCode::Id::HMUL2_R: |
|||
return GetRegister(instr.gpr20); |
|||
default: |
|||
UNREACHABLE(); |
|||
return Immediate(0); |
|||
} |
|||
}(); |
|||
op_b = GetOperandAbsNegHalf(op_b, instr.alu_half.abs_b, negate_b); |
|||
|
|||
Node value = [&]() { |
|||
MetaHalfArithmetic meta{true, {instr.alu_half_imm.type_a, instr.alu_half.type_b}}; |
|||
switch (opcode->get().GetId()) { |
|||
case OpCode::Id::HADD2_C: |
|||
case OpCode::Id::HADD2_R: |
|||
return Operation(OperationCode::HAdd, meta, op_a, op_b); |
|||
case OpCode::Id::HMUL2_C: |
|||
case OpCode::Id::HMUL2_R: |
|||
return Operation(OperationCode::HMul, meta, op_a, op_b); |
|||
default: |
|||
UNIMPLEMENTED_MSG("Unhandled half float instruction: {}", opcode->get().GetName()); |
|||
return Immediate(0); |
|||
} |
|||
}(); |
|||
value = HalfMerge(GetRegister(instr.gpr0), value, instr.alu_half.merge); |
|||
|
|||
SetRegister(bb, instr.gpr0, value); |
|||
|
|||
return pc; |
|||
} |
|||
|
|||
} // namespace VideoCommon::Shader
|
|||
@ -0,0 +1,51 @@ |
|||
// Copyright 2018 yuzu Emulator Project
|
|||
// Licensed under GPLv2 or any later version
|
|||
// Refer to the license.txt file included.
|
|||
|
|||
#include "common/assert.h"
|
|||
#include "common/common_types.h"
|
|||
#include "video_core/engines/shader_bytecode.h"
|
|||
#include "video_core/shader/shader_ir.h"
|
|||
|
|||
namespace VideoCommon::Shader { |
|||
|
|||
using Tegra::Shader::Instruction; |
|||
using Tegra::Shader::OpCode; |
|||
|
|||
u32 ShaderIR::DecodeArithmeticHalfImmediate(BasicBlock& bb, const BasicBlock& code, u32 pc) { |
|||
const Instruction instr = {program_code[pc]}; |
|||
const auto opcode = OpCode::Decode(instr); |
|||
|
|||
if (opcode->get().GetId() == OpCode::Id::HADD2_IMM) { |
|||
UNIMPLEMENTED_IF(instr.alu_half_imm.ftz != 0); |
|||
} else { |
|||
UNIMPLEMENTED_IF(instr.alu_half_imm.precision != Tegra::Shader::HalfPrecision::None); |
|||
} |
|||
UNIMPLEMENTED_IF_MSG(instr.alu_half_imm.saturate != 0, |
|||
"Half float immediate saturation not implemented"); |
|||
|
|||
Node op_a = GetRegister(instr.gpr8); |
|||
op_a = GetOperandAbsNegHalf(op_a, instr.alu_half_imm.abs_a, instr.alu_half_imm.negate_a); |
|||
|
|||
const Node op_b = UnpackHalfImmediate(instr, true); |
|||
|
|||
Node value = [&]() { |
|||
MetaHalfArithmetic meta{true, {instr.alu_half_imm.type_a}}; |
|||
switch (opcode->get().GetId()) { |
|||
case OpCode::Id::HADD2_IMM: |
|||
return Operation(OperationCode::HAdd, meta, op_a, op_b); |
|||
case OpCode::Id::HMUL2_IMM: |
|||
return Operation(OperationCode::HMul, meta, op_a, op_b); |
|||
default: |
|||
UNREACHABLE(); |
|||
return Immediate(0); |
|||
} |
|||
}(); |
|||
value = HalfMerge(GetRegister(instr.gpr0), value, instr.alu_half_imm.merge); |
|||
|
|||
SetRegister(bb, instr.gpr0, value); |
|||
|
|||
return pc; |
|||
} |
|||
|
|||
} // namespace VideoCommon::Shader
|
|||
@ -0,0 +1,52 @@ |
|||
// Copyright 2018 yuzu Emulator Project
|
|||
// Licensed under GPLv2 or any later version
|
|||
// Refer to the license.txt file included.
|
|||
|
|||
#include "common/assert.h"
|
|||
#include "common/common_types.h"
|
|||
#include "video_core/engines/shader_bytecode.h"
|
|||
#include "video_core/shader/shader_ir.h"
|
|||
|
|||
namespace VideoCommon::Shader { |
|||
|
|||
using Tegra::Shader::Instruction; |
|||
using Tegra::Shader::OpCode; |
|||
|
|||
u32 ShaderIR::DecodeArithmeticImmediate(BasicBlock& bb, const BasicBlock& code, u32 pc) { |
|||
const Instruction instr = {program_code[pc]}; |
|||
const auto opcode = OpCode::Decode(instr); |
|||
|
|||
switch (opcode->get().GetId()) { |
|||
case OpCode::Id::MOV32_IMM: { |
|||
SetRegister(bb, instr.gpr0, GetImmediate32(instr)); |
|||
break; |
|||
} |
|||
case OpCode::Id::FMUL32_IMM: { |
|||
Node value = |
|||
Operation(OperationCode::FMul, PRECISE, GetRegister(instr.gpr8), GetImmediate32(instr)); |
|||
value = GetSaturatedFloat(value, instr.fmul32.saturate); |
|||
|
|||
SetInternalFlagsFromFloat(bb, value, instr.op_32.generates_cc); |
|||
SetRegister(bb, instr.gpr0, value); |
|||
break; |
|||
} |
|||
case OpCode::Id::FADD32I: { |
|||
const Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fadd32i.abs_a, |
|||
instr.fadd32i.negate_a); |
|||
const Node op_b = GetOperandAbsNegFloat(GetImmediate32(instr), instr.fadd32i.abs_b, |
|||
instr.fadd32i.negate_b); |
|||
|
|||
const Node value = Operation(OperationCode::FAdd, PRECISE, op_a, op_b); |
|||
SetInternalFlagsFromFloat(bb, value, instr.op_32.generates_cc); |
|||
SetRegister(bb, instr.gpr0, value); |
|||
break; |
|||
} |
|||
default: |
|||
UNIMPLEMENTED_MSG("Unhandled arithmetic immediate instruction: {}", |
|||
opcode->get().GetName()); |
|||
} |
|||
|
|||
return pc; |
|||
} |
|||
|
|||
} // namespace VideoCommon::Shader
|
|||
@ -0,0 +1,287 @@ |
|||
// Copyright 2018 yuzu Emulator Project
|
|||
// Licensed under GPLv2 or any later version
|
|||
// Refer to the license.txt file included.
|
|||
|
|||
#include "common/assert.h"
|
|||
#include "common/common_types.h"
|
|||
#include "video_core/engines/shader_bytecode.h"
|
|||
#include "video_core/shader/shader_ir.h"
|
|||
|
|||
namespace VideoCommon::Shader { |
|||
|
|||
using Tegra::Shader::IAdd3Height; |
|||
using Tegra::Shader::Instruction; |
|||
using Tegra::Shader::OpCode; |
|||
using Tegra::Shader::Pred; |
|||
using Tegra::Shader::Register; |
|||
|
|||
u32 ShaderIR::DecodeArithmeticInteger(BasicBlock& bb, const BasicBlock& code, u32 pc) { |
|||
const Instruction instr = {program_code[pc]}; |
|||
const auto opcode = OpCode::Decode(instr); |
|||
|
|||
Node op_a = GetRegister(instr.gpr8); |
|||
Node op_b = [&]() { |
|||
if (instr.is_b_imm) { |
|||
return Immediate(instr.alu.GetSignedImm20_20()); |
|||
} else if (instr.is_b_gpr) { |
|||
return GetRegister(instr.gpr20); |
|||
} else { |
|||
return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset); |
|||
} |
|||
}(); |
|||
|
|||
switch (opcode->get().GetId()) { |
|||
case OpCode::Id::IADD_C: |
|||
case OpCode::Id::IADD_R: |
|||
case OpCode::Id::IADD_IMM: { |
|||
UNIMPLEMENTED_IF_MSG(instr.alu.saturate_d, "IADD saturation not implemented"); |
|||
|
|||
op_a = GetOperandAbsNegInteger(op_a, false, instr.alu_integer.negate_a, true); |
|||
op_b = GetOperandAbsNegInteger(op_b, false, instr.alu_integer.negate_b, true); |
|||
|
|||
const Node value = Operation(OperationCode::IAdd, PRECISE, op_a, op_b); |
|||
|
|||
SetInternalFlagsFromInteger(bb, value, instr.op_32.generates_cc); |
|||
SetRegister(bb, instr.gpr0, value); |
|||
break; |
|||
} |
|||
case OpCode::Id::IADD3_C: |
|||
case OpCode::Id::IADD3_R: |
|||
case OpCode::Id::IADD3_IMM: { |
|||
Node op_c = GetRegister(instr.gpr39); |
|||
|
|||
const auto ApplyHeight = [&](IAdd3Height height, Node value) { |
|||
switch (height) { |
|||
case IAdd3Height::None: |
|||
return value; |
|||
case IAdd3Height::LowerHalfWord: |
|||
return BitfieldExtract(value, 0, 16); |
|||
case IAdd3Height::UpperHalfWord: |
|||
return BitfieldExtract(value, 16, 16); |
|||
default: |
|||
UNIMPLEMENTED_MSG("Unhandled IADD3 height: {}", static_cast<u32>(height)); |
|||
return Immediate(0); |
|||
} |
|||
}; |
|||
|
|||
if (opcode->get().GetId() == OpCode::Id::IADD3_R) { |
|||
op_a = ApplyHeight(instr.iadd3.height_a, op_a); |
|||
op_b = ApplyHeight(instr.iadd3.height_b, op_b); |
|||
op_c = ApplyHeight(instr.iadd3.height_c, op_c); |
|||
} |
|||
|
|||
op_a = GetOperandAbsNegInteger(op_a, false, instr.iadd3.neg_a, true); |
|||
op_b = GetOperandAbsNegInteger(op_b, false, instr.iadd3.neg_b, true); |
|||
op_c = GetOperandAbsNegInteger(op_c, false, instr.iadd3.neg_c, true); |
|||
|
|||
const Node value = [&]() { |
|||
const Node add_ab = Operation(OperationCode::IAdd, NO_PRECISE, op_a, op_b); |
|||
if (opcode->get().GetId() != OpCode::Id::IADD3_R) { |
|||
return Operation(OperationCode::IAdd, NO_PRECISE, add_ab, op_c); |
|||
} |
|||
const Node shifted = [&]() { |
|||
switch (instr.iadd3.mode) { |
|||
case Tegra::Shader::IAdd3Mode::RightShift: |
|||
// TODO(tech4me): According to
|
|||
// https://envytools.readthedocs.io/en/latest/hw/graph/maxwell/cuda/int.html?highlight=iadd3
|
|||
// The addition between op_a and op_b should be done in uint33, more
|
|||
// investigation required
|
|||
return Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, add_ab, |
|||
Immediate(16)); |
|||
case Tegra::Shader::IAdd3Mode::LeftShift: |
|||
return Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, add_ab, |
|||
Immediate(16)); |
|||
default: |
|||
return add_ab; |
|||
} |
|||
}(); |
|||
return Operation(OperationCode::IAdd, NO_PRECISE, shifted, op_c); |
|||
}(); |
|||
|
|||
SetInternalFlagsFromInteger(bb, value, instr.generates_cc); |
|||
SetRegister(bb, instr.gpr0, value); |
|||
break; |
|||
} |
|||
case OpCode::Id::ISCADD_C: |
|||
case OpCode::Id::ISCADD_R: |
|||
case OpCode::Id::ISCADD_IMM: { |
|||
UNIMPLEMENTED_IF_MSG(instr.generates_cc, |
|||
"Condition codes generation in ISCADD is not implemented"); |
|||
|
|||
op_a = GetOperandAbsNegInteger(op_a, false, instr.alu_integer.negate_a, true); |
|||
op_b = GetOperandAbsNegInteger(op_b, false, instr.alu_integer.negate_b, true); |
|||
|
|||
const Node shift = Immediate(static_cast<u32>(instr.alu_integer.shift_amount)); |
|||
const Node shifted_a = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, op_a, shift); |
|||
const Node value = Operation(OperationCode::IAdd, NO_PRECISE, shifted_a, op_b); |
|||
|
|||
SetInternalFlagsFromInteger(bb, value, instr.generates_cc); |
|||
SetRegister(bb, instr.gpr0, value); |
|||
break; |
|||
} |
|||
case OpCode::Id::POPC_C: |
|||
case OpCode::Id::POPC_R: |
|||
case OpCode::Id::POPC_IMM: { |
|||
if (instr.popc.invert) { |
|||
op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_b); |
|||
} |
|||
const Node value = Operation(OperationCode::IBitCount, PRECISE, op_b); |
|||
SetRegister(bb, instr.gpr0, value); |
|||
break; |
|||
} |
|||
case OpCode::Id::SEL_C: |
|||
case OpCode::Id::SEL_R: |
|||
case OpCode::Id::SEL_IMM: { |
|||
const Node condition = GetPredicate(instr.sel.pred, instr.sel.neg_pred != 0); |
|||
const Node value = Operation(OperationCode::Select, PRECISE, condition, op_a, op_b); |
|||
SetRegister(bb, instr.gpr0, value); |
|||
break; |
|||
} |
|||
case OpCode::Id::LOP_C: |
|||
case OpCode::Id::LOP_R: |
|||
case OpCode::Id::LOP_IMM: { |
|||
if (instr.alu.lop.invert_a) |
|||
op_a = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_a); |
|||
if (instr.alu.lop.invert_b) |
|||
op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_b); |
|||
|
|||
WriteLogicOperation(bb, instr.gpr0, instr.alu.lop.operation, op_a, op_b, |
|||
instr.alu.lop.pred_result_mode, instr.alu.lop.pred48, |
|||
instr.generates_cc); |
|||
break; |
|||
} |
|||
case OpCode::Id::LOP3_C: |
|||
case OpCode::Id::LOP3_R: |
|||
case OpCode::Id::LOP3_IMM: { |
|||
const Node op_c = GetRegister(instr.gpr39); |
|||
const Node lut = [&]() { |
|||
if (opcode->get().GetId() == OpCode::Id::LOP3_R) { |
|||
return Immediate(instr.alu.lop3.GetImmLut28()); |
|||
} else { |
|||
return Immediate(instr.alu.lop3.GetImmLut48()); |
|||
} |
|||
}(); |
|||
|
|||
WriteLop3Instruction(bb, instr.gpr0, op_a, op_b, op_c, lut, instr.generates_cc); |
|||
break; |
|||
} |
|||
case OpCode::Id::IMNMX_C: |
|||
case OpCode::Id::IMNMX_R: |
|||
case OpCode::Id::IMNMX_IMM: { |
|||
UNIMPLEMENTED_IF(instr.imnmx.exchange != Tegra::Shader::IMinMaxExchange::None); |
|||
|
|||
const bool is_signed = instr.imnmx.is_signed; |
|||
|
|||
const Node condition = GetPredicate(instr.imnmx.pred, instr.imnmx.negate_pred != 0); |
|||
const Node min = SignedOperation(OperationCode::IMin, is_signed, NO_PRECISE, op_a, op_b); |
|||
const Node max = SignedOperation(OperationCode::IMax, is_signed, NO_PRECISE, op_a, op_b); |
|||
const Node value = Operation(OperationCode::Select, NO_PRECISE, condition, min, max); |
|||
|
|||
SetInternalFlagsFromInteger(bb, value, instr.generates_cc); |
|||
SetRegister(bb, instr.gpr0, value); |
|||
break; |
|||
} |
|||
case OpCode::Id::LEA_R2: |
|||
case OpCode::Id::LEA_R1: |
|||
case OpCode::Id::LEA_IMM: |
|||
case OpCode::Id::LEA_RZ: |
|||
case OpCode::Id::LEA_HI: { |
|||
const auto [op_a, op_b, op_c] = [&]() -> std::tuple<Node, Node, Node> { |
|||
switch (opcode->get().GetId()) { |
|||
case OpCode::Id::LEA_R2: { |
|||
return {GetRegister(instr.gpr20), GetRegister(instr.gpr39), |
|||
Immediate(static_cast<u32>(instr.lea.r2.entry_a))}; |
|||
} |
|||
|
|||
case OpCode::Id::LEA_R1: { |
|||
const bool neg = instr.lea.r1.neg != 0; |
|||
return {GetOperandAbsNegInteger(GetRegister(instr.gpr8), false, neg, true), |
|||
GetRegister(instr.gpr20), |
|||
Immediate(static_cast<u32>(instr.lea.r1.entry_a))}; |
|||
} |
|||
|
|||
case OpCode::Id::LEA_IMM: { |
|||
const bool neg = instr.lea.imm.neg != 0; |
|||
return {Immediate(static_cast<u32>(instr.lea.imm.entry_a)), |
|||
GetOperandAbsNegInteger(GetRegister(instr.gpr8), false, neg, true), |
|||
Immediate(static_cast<u32>(instr.lea.imm.entry_b))}; |
|||
} |
|||
|
|||
case OpCode::Id::LEA_RZ: { |
|||
const bool neg = instr.lea.rz.neg != 0; |
|||
return {GetConstBuffer(instr.lea.rz.cb_index, instr.lea.rz.cb_offset), |
|||
GetOperandAbsNegInteger(GetRegister(instr.gpr8), false, neg, true), |
|||
Immediate(static_cast<u32>(instr.lea.rz.entry_a))}; |
|||
} |
|||
|
|||
case OpCode::Id::LEA_HI: |
|||
default: |
|||
UNIMPLEMENTED_MSG("Unhandled LEA subinstruction: {}", opcode->get().GetName()); |
|||
|
|||
return {Immediate(static_cast<u32>(instr.lea.imm.entry_a)), GetRegister(instr.gpr8), |
|||
Immediate(static_cast<u32>(instr.lea.imm.entry_b))}; |
|||
} |
|||
}(); |
|||
|
|||
UNIMPLEMENTED_IF_MSG(instr.lea.pred48 != static_cast<u64>(Pred::UnusedIndex), |
|||
"Unhandled LEA Predicate"); |
|||
|
|||
const Node shifted_c = |
|||
Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, Immediate(1), op_c); |
|||
const Node mul_bc = Operation(OperationCode::IMul, NO_PRECISE, op_b, shifted_c); |
|||
const Node value = Operation(OperationCode::IAdd, NO_PRECISE, op_a, mul_bc); |
|||
|
|||
SetRegister(bb, instr.gpr0, value); |
|||
|
|||
break; |
|||
} |
|||
default: |
|||
UNIMPLEMENTED_MSG("Unhandled ArithmeticInteger instruction: {}", opcode->get().GetName()); |
|||
} |
|||
|
|||
return pc; |
|||
} |
|||
|
|||
void ShaderIR::WriteLop3Instruction(BasicBlock& bb, Register dest, Node op_a, Node op_b, Node op_c, |
|||
Node imm_lut, bool sets_cc) { |
|||
constexpr u32 lop_iterations = 32; |
|||
const Node one = Immediate(1); |
|||
const Node two = Immediate(2); |
|||
|
|||
Node value{}; |
|||
for (u32 i = 0; i < lop_iterations; ++i) { |
|||
const Node shift_amount = Immediate(i); |
|||
|
|||
const Node a = Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, op_c, shift_amount); |
|||
const Node pack_0 = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, one); |
|||
|
|||
const Node b = Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, op_b, shift_amount); |
|||
const Node c = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, b, one); |
|||
const Node pack_1 = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, c, one); |
|||
|
|||
const Node d = Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, op_a, shift_amount); |
|||
const Node e = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, d, one); |
|||
const Node pack_2 = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, e, two); |
|||
|
|||
const Node pack_01 = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, pack_0, pack_1); |
|||
const Node pack_012 = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, pack_01, pack_2); |
|||
|
|||
const Node shifted_bit = |
|||
Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, imm_lut, pack_012); |
|||
const Node bit = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, shifted_bit, one); |
|||
|
|||
const Node right = |
|||
Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, bit, shift_amount); |
|||
|
|||
if (i > 0) { |
|||
value = Operation(OperationCode::IBitwiseOr, NO_PRECISE, value, right); |
|||
} else { |
|||
value = right; |
|||
} |
|||
} |
|||
|
|||
SetInternalFlagsFromInteger(bb, value, sets_cc); |
|||
SetRegister(bb, dest, value); |
|||
} |
|||
|
|||
} // namespace VideoCommon::Shader
|
|||
@ -0,0 +1,96 @@ |
|||
// Copyright 2018 yuzu Emulator Project
|
|||
// Licensed under GPLv2 or any later version
|
|||
// Refer to the license.txt file included.
|
|||
|
|||
#include "common/assert.h"
|
|||
#include "common/common_types.h"
|
|||
#include "video_core/engines/shader_bytecode.h"
|
|||
#include "video_core/shader/shader_ir.h"
|
|||
|
|||
namespace VideoCommon::Shader { |
|||
|
|||
using Tegra::Shader::Instruction; |
|||
using Tegra::Shader::LogicOperation; |
|||
using Tegra::Shader::OpCode; |
|||
using Tegra::Shader::Pred; |
|||
using Tegra::Shader::PredicateResultMode; |
|||
using Tegra::Shader::Register; |
|||
|
|||
u32 ShaderIR::DecodeArithmeticIntegerImmediate(BasicBlock& bb, const BasicBlock& code, u32 pc) { |
|||
const Instruction instr = {program_code[pc]}; |
|||
const auto opcode = OpCode::Decode(instr); |
|||
|
|||
Node op_a = GetRegister(instr.gpr8); |
|||
Node op_b = Immediate(static_cast<s32>(instr.alu.imm20_32)); |
|||
|
|||
switch (opcode->get().GetId()) { |
|||
case OpCode::Id::IADD32I: { |
|||
UNIMPLEMENTED_IF_MSG(instr.iadd32i.saturate, "IADD32I saturation is not implemented"); |
|||
|
|||
op_a = GetOperandAbsNegInteger(op_a, false, instr.iadd32i.negate_a, true); |
|||
|
|||
const Node value = Operation(OperationCode::IAdd, PRECISE, op_a, op_b); |
|||
|
|||
SetInternalFlagsFromInteger(bb, value, instr.op_32.generates_cc); |
|||
SetRegister(bb, instr.gpr0, value); |
|||
break; |
|||
} |
|||
case OpCode::Id::LOP32I: { |
|||
if (instr.alu.lop32i.invert_a) |
|||
op_a = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_a); |
|||
|
|||
if (instr.alu.lop32i.invert_b) |
|||
op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_b); |
|||
|
|||
WriteLogicOperation(bb, instr.gpr0, instr.alu.lop32i.operation, op_a, op_b, |
|||
PredicateResultMode::None, Pred::UnusedIndex, instr.op_32.generates_cc); |
|||
break; |
|||
} |
|||
default: |
|||
UNIMPLEMENTED_MSG("Unhandled ArithmeticIntegerImmediate instruction: {}", |
|||
opcode->get().GetName()); |
|||
} |
|||
|
|||
return pc; |
|||
} |
|||
|
|||
void ShaderIR::WriteLogicOperation(BasicBlock& bb, Register dest, LogicOperation logic_op, |
|||
Node op_a, Node op_b, PredicateResultMode predicate_mode, |
|||
Pred predicate, bool sets_cc) { |
|||
const Node result = [&]() { |
|||
switch (logic_op) { |
|||
case LogicOperation::And: |
|||
return Operation(OperationCode::IBitwiseAnd, PRECISE, op_a, op_b); |
|||
case LogicOperation::Or: |
|||
return Operation(OperationCode::IBitwiseOr, PRECISE, op_a, op_b); |
|||
case LogicOperation::Xor: |
|||
return Operation(OperationCode::IBitwiseXor, PRECISE, op_a, op_b); |
|||
case LogicOperation::PassB: |
|||
return op_b; |
|||
default: |
|||
UNIMPLEMENTED_MSG("Unimplemented logic operation={}", static_cast<u32>(logic_op)); |
|||
return Immediate(0); |
|||
} |
|||
}(); |
|||
|
|||
SetInternalFlagsFromInteger(bb, result, sets_cc); |
|||
SetRegister(bb, dest, result); |
|||
|
|||
// Write the predicate value depending on the predicate mode.
|
|||
switch (predicate_mode) { |
|||
case PredicateResultMode::None: |
|||
// Do nothing.
|
|||
return; |
|||
case PredicateResultMode::NotZero: { |
|||
// Set the predicate to true if the result is not zero.
|
|||
const Node compare = Operation(OperationCode::LogicalINotEqual, result, Immediate(0)); |
|||
SetPredicate(bb, static_cast<u64>(predicate), compare); |
|||
break; |
|||
} |
|||
default: |
|||
UNIMPLEMENTED_MSG("Unimplemented predicate result mode: {}", |
|||
static_cast<u32>(predicate_mode)); |
|||
} |
|||
} |
|||
|
|||
} // namespace VideoCommon::Shader
|
|||
@ -0,0 +1,49 @@ |
|||
// Copyright 2018 yuzu Emulator Project
|
|||
// Licensed under GPLv2 or any later version
|
|||
// Refer to the license.txt file included.
|
|||
|
|||
#include "common/assert.h"
|
|||
#include "common/common_types.h"
|
|||
#include "video_core/engines/shader_bytecode.h"
|
|||
#include "video_core/shader/shader_ir.h"
|
|||
|
|||
namespace VideoCommon::Shader { |
|||
|
|||
using Tegra::Shader::Instruction; |
|||
using Tegra::Shader::OpCode; |
|||
|
|||
u32 ShaderIR::DecodeBfe(BasicBlock& bb, const BasicBlock& code, u32 pc) { |
|||
const Instruction instr = {program_code[pc]}; |
|||
const auto opcode = OpCode::Decode(instr); |
|||
|
|||
UNIMPLEMENTED_IF(instr.bfe.negate_b); |
|||
|
|||
Node op_a = GetRegister(instr.gpr8); |
|||
op_a = GetOperandAbsNegInteger(op_a, false, instr.bfe.negate_a, false); |
|||
|
|||
switch (opcode->get().GetId()) { |
|||
case OpCode::Id::BFE_IMM: { |
|||
UNIMPLEMENTED_IF_MSG(instr.generates_cc, |
|||
"Condition codes generation in BFE is not implemented"); |
|||
|
|||
const Node inner_shift_imm = Immediate(static_cast<u32>(instr.bfe.GetLeftShiftValue())); |
|||
const Node outer_shift_imm = |
|||
Immediate(static_cast<u32>(instr.bfe.GetLeftShiftValue() + instr.bfe.shift_position)); |
|||
|
|||
const Node inner_shift = |
|||
Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, op_a, inner_shift_imm); |
|||
const Node outer_shift = |
|||
Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, inner_shift, outer_shift_imm); |
|||
|
|||
SetInternalFlagsFromInteger(bb, outer_shift, instr.generates_cc); |
|||
SetRegister(bb, instr.gpr0, outer_shift); |
|||
break; |
|||
} |
|||
default: |
|||
UNIMPLEMENTED_MSG("Unhandled BFE instruction: {}", opcode->get().GetName()); |
|||
} |
|||
|
|||
return pc; |
|||
} |
|||
|
|||
} // namespace VideoCommon::Shader
|
|||
@ -0,0 +1,41 @@ |
|||
// Copyright 2018 yuzu Emulator Project
|
|||
// Licensed under GPLv2 or any later version
|
|||
// Refer to the license.txt file included.
|
|||
|
|||
#include "common/assert.h"
|
|||
#include "common/common_types.h"
|
|||
#include "video_core/engines/shader_bytecode.h"
|
|||
#include "video_core/shader/shader_ir.h"
|
|||
|
|||
namespace VideoCommon::Shader { |
|||
|
|||
using Tegra::Shader::Instruction; |
|||
using Tegra::Shader::OpCode; |
|||
|
|||
u32 ShaderIR::DecodeBfi(BasicBlock& bb, const BasicBlock& code, u32 pc) { |
|||
const Instruction instr = {program_code[pc]}; |
|||
const auto opcode = OpCode::Decode(instr); |
|||
|
|||
const auto [base, packed_shift] = [&]() -> std::tuple<Node, Node> { |
|||
switch (opcode->get().GetId()) { |
|||
case OpCode::Id::BFI_IMM_R: |
|||
return {GetRegister(instr.gpr39), Immediate(instr.alu.GetSignedImm20_20())}; |
|||
default: |
|||
UNREACHABLE(); |
|||
return {Immediate(0), Immediate(0)}; |
|||
} |
|||
}(); |
|||
const Node insert = GetRegister(instr.gpr8); |
|||
const Node offset = BitfieldExtract(packed_shift, 0, 8); |
|||
const Node bits = BitfieldExtract(packed_shift, 8, 8); |
|||
|
|||
const Node value = |
|||
Operation(OperationCode::UBitfieldInsert, PRECISE, base, insert, offset, bits); |
|||
|
|||
SetInternalFlagsFromInteger(bb, value, instr.generates_cc); |
|||
SetRegister(bb, instr.gpr0, value); |
|||
|
|||
return pc; |
|||
} |
|||
|
|||
} // namespace VideoCommon::Shader
|
|||
@ -0,0 +1,149 @@ |
|||
// Copyright 2018 yuzu Emulator Project
|
|||
// Licensed under GPLv2 or any later version
|
|||
// Refer to the license.txt file included.
|
|||
|
|||
#include "common/assert.h"
|
|||
#include "common/common_types.h"
|
|||
#include "video_core/engines/shader_bytecode.h"
|
|||
#include "video_core/shader/shader_ir.h"
|
|||
|
|||
namespace VideoCommon::Shader { |
|||
|
|||
using Tegra::Shader::Instruction; |
|||
using Tegra::Shader::OpCode; |
|||
using Tegra::Shader::Register; |
|||
|
|||
u32 ShaderIR::DecodeConversion(BasicBlock& bb, const BasicBlock& code, u32 pc) { |
|||
const Instruction instr = {program_code[pc]}; |
|||
const auto opcode = OpCode::Decode(instr); |
|||
|
|||
switch (opcode->get().GetId()) { |
|||
case OpCode::Id::I2I_R: { |
|||
UNIMPLEMENTED_IF(instr.conversion.selector); |
|||
|
|||
const bool input_signed = instr.conversion.is_input_signed; |
|||
const bool output_signed = instr.conversion.is_output_signed; |
|||
|
|||
Node value = GetRegister(instr.gpr20); |
|||
value = ConvertIntegerSize(value, instr.conversion.src_size, input_signed); |
|||
|
|||
value = GetOperandAbsNegInteger(value, instr.conversion.abs_a, instr.conversion.negate_a, |
|||
input_signed); |
|||
if (input_signed != output_signed) { |
|||
value = SignedOperation(OperationCode::ICastUnsigned, output_signed, NO_PRECISE, value); |
|||
} |
|||
|
|||
SetInternalFlagsFromInteger(bb, value, instr.generates_cc); |
|||
SetRegister(bb, instr.gpr0, value); |
|||
break; |
|||
} |
|||
case OpCode::Id::I2F_R: |
|||
case OpCode::Id::I2F_C: { |
|||
UNIMPLEMENTED_IF(instr.conversion.dest_size != Register::Size::Word); |
|||
UNIMPLEMENTED_IF(instr.conversion.selector); |
|||
UNIMPLEMENTED_IF_MSG(instr.generates_cc, |
|||
"Condition codes generation in I2F is not implemented"); |
|||
|
|||
Node value = [&]() { |
|||
if (instr.is_b_gpr) { |
|||
return GetRegister(instr.gpr20); |
|||
} else { |
|||
return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset); |
|||
} |
|||
}(); |
|||
const bool input_signed = instr.conversion.is_input_signed; |
|||
value = ConvertIntegerSize(value, instr.conversion.src_size, input_signed); |
|||
value = GetOperandAbsNegInteger(value, instr.conversion.abs_a, false, input_signed); |
|||
value = SignedOperation(OperationCode::FCastInteger, input_signed, PRECISE, value); |
|||
value = GetOperandAbsNegFloat(value, false, instr.conversion.negate_a); |
|||
|
|||
SetInternalFlagsFromFloat(bb, value, instr.generates_cc); |
|||
SetRegister(bb, instr.gpr0, value); |
|||
break; |
|||
} |
|||
case OpCode::Id::F2F_R: |
|||
case OpCode::Id::F2F_C: { |
|||
UNIMPLEMENTED_IF(instr.conversion.dest_size != Register::Size::Word); |
|||
UNIMPLEMENTED_IF(instr.conversion.src_size != Register::Size::Word); |
|||
UNIMPLEMENTED_IF_MSG(instr.generates_cc, |
|||
"Condition codes generation in F2F is not implemented"); |
|||
|
|||
Node value = [&]() { |
|||
if (instr.is_b_gpr) { |
|||
return GetRegister(instr.gpr20); |
|||
} else { |
|||
return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset); |
|||
} |
|||
}(); |
|||
|
|||
value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a); |
|||
|
|||
value = [&]() { |
|||
switch (instr.conversion.f2f.rounding) { |
|||
case Tegra::Shader::F2fRoundingOp::None: |
|||
return value; |
|||
case Tegra::Shader::F2fRoundingOp::Round: |
|||
return Operation(OperationCode::FRoundEven, PRECISE, value); |
|||
case Tegra::Shader::F2fRoundingOp::Floor: |
|||
return Operation(OperationCode::FFloor, PRECISE, value); |
|||
case Tegra::Shader::F2fRoundingOp::Ceil: |
|||
return Operation(OperationCode::FCeil, PRECISE, value); |
|||
case Tegra::Shader::F2fRoundingOp::Trunc: |
|||
return Operation(OperationCode::FTrunc, PRECISE, value); |
|||
} |
|||
UNIMPLEMENTED_MSG("Unimplemented F2F rounding mode {}", |
|||
static_cast<u32>(instr.conversion.f2f.rounding.Value())); |
|||
return Immediate(0); |
|||
}(); |
|||
value = GetSaturatedFloat(value, instr.alu.saturate_d); |
|||
|
|||
SetInternalFlagsFromFloat(bb, value, instr.generates_cc); |
|||
SetRegister(bb, instr.gpr0, value); |
|||
break; |
|||
} |
|||
case OpCode::Id::F2I_R: |
|||
case OpCode::Id::F2I_C: { |
|||
UNIMPLEMENTED_IF(instr.conversion.src_size != Register::Size::Word); |
|||
UNIMPLEMENTED_IF_MSG(instr.generates_cc, |
|||
"Condition codes generation in F2I is not implemented"); |
|||
Node value = [&]() { |
|||
if (instr.is_b_gpr) { |
|||
return GetRegister(instr.gpr20); |
|||
} else { |
|||
return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset); |
|||
} |
|||
}(); |
|||
|
|||
value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a); |
|||
|
|||
value = [&]() { |
|||
switch (instr.conversion.f2i.rounding) { |
|||
case Tegra::Shader::F2iRoundingOp::None: |
|||
return value; |
|||
case Tegra::Shader::F2iRoundingOp::Floor: |
|||
return Operation(OperationCode::FFloor, PRECISE, value); |
|||
case Tegra::Shader::F2iRoundingOp::Ceil: |
|||
return Operation(OperationCode::FCeil, PRECISE, value); |
|||
case Tegra::Shader::F2iRoundingOp::Trunc: |
|||
return Operation(OperationCode::FTrunc, PRECISE, value); |
|||
default: |
|||
UNIMPLEMENTED_MSG("Unimplemented F2I rounding mode {}", |
|||
static_cast<u32>(instr.conversion.f2i.rounding.Value())); |
|||
return Immediate(0); |
|||
} |
|||
}(); |
|||
const bool is_signed = instr.conversion.is_output_signed; |
|||
value = SignedOperation(OperationCode::ICastFloat, is_signed, PRECISE, value); |
|||
value = ConvertIntegerSize(value, instr.conversion.dest_size, is_signed); |
|||
|
|||
SetRegister(bb, instr.gpr0, value); |
|||
break; |
|||
} |
|||
default: |
|||
UNIMPLEMENTED_MSG("Unhandled conversion instruction: {}", opcode->get().GetName()); |
|||
} |
|||
|
|||
return pc; |
|||
} |
|||
|
|||
} // namespace VideoCommon::Shader
|
|||
@ -0,0 +1,59 @@ |
|||
// Copyright 2018 yuzu Emulator Project
|
|||
// Licensed under GPLv2 or any later version
|
|||
// Refer to the license.txt file included.
|
|||
|
|||
#include "common/assert.h"
|
|||
#include "common/common_types.h"
|
|||
#include "video_core/engines/shader_bytecode.h"
|
|||
#include "video_core/shader/shader_ir.h"
|
|||
|
|||
namespace VideoCommon::Shader { |
|||
|
|||
using Tegra::Shader::Instruction; |
|||
using Tegra::Shader::OpCode; |
|||
|
|||
u32 ShaderIR::DecodeFfma(BasicBlock& bb, const BasicBlock& code, u32 pc) { |
|||
const Instruction instr = {program_code[pc]}; |
|||
const auto opcode = OpCode::Decode(instr); |
|||
|
|||
UNIMPLEMENTED_IF_MSG(instr.ffma.cc != 0, "FFMA cc not implemented"); |
|||
UNIMPLEMENTED_IF_MSG(instr.ffma.tab5980_0 != 1, "FFMA tab5980_0({}) not implemented", |
|||
instr.ffma.tab5980_0.Value()); // Seems to be 1 by default based on SMO
|
|||
UNIMPLEMENTED_IF_MSG(instr.ffma.tab5980_1 != 0, "FFMA tab5980_1({}) not implemented", |
|||
instr.ffma.tab5980_1.Value()); |
|||
|
|||
const Node op_a = GetRegister(instr.gpr8); |
|||
|
|||
auto [op_b, op_c] = [&]() -> std::tuple<Node, Node> { |
|||
switch (opcode->get().GetId()) { |
|||
case OpCode::Id::FFMA_CR: { |
|||
return {GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset), |
|||
GetRegister(instr.gpr39)}; |
|||
} |
|||
case OpCode::Id::FFMA_RR: |
|||
return {GetRegister(instr.gpr20), GetRegister(instr.gpr39)}; |
|||
case OpCode::Id::FFMA_RC: { |
|||
return {GetRegister(instr.gpr39), |
|||
GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset)}; |
|||
} |
|||
case OpCode::Id::FFMA_IMM: |
|||
return {GetImmediate19(instr), GetRegister(instr.gpr39)}; |
|||
default: |
|||
UNIMPLEMENTED_MSG("Unhandled FFMA instruction: {}", opcode->get().GetName()); |
|||
return {Immediate(0), Immediate(0)}; |
|||
} |
|||
}(); |
|||
|
|||
op_b = GetOperandAbsNegFloat(op_b, false, instr.ffma.negate_b); |
|||
op_c = GetOperandAbsNegFloat(op_c, false, instr.ffma.negate_c); |
|||
|
|||
Node value = Operation(OperationCode::FFma, PRECISE, op_a, op_b, op_c); |
|||
value = GetSaturatedFloat(value, instr.alu.saturate_d); |
|||
|
|||
SetInternalFlagsFromFloat(bb, value, instr.generates_cc); |
|||
SetRegister(bb, instr.gpr0, value); |
|||
|
|||
return pc; |
|||
} |
|||
|
|||
} // namespace VideoCommon::Shader
|
|||
@ -0,0 +1,58 @@ |
|||
// Copyright 2018 yuzu Emulator Project
|
|||
// Licensed under GPLv2 or any later version
|
|||
// Refer to the license.txt file included.
|
|||
|
|||
#include "common/assert.h"
|
|||
#include "common/common_types.h"
|
|||
#include "video_core/engines/shader_bytecode.h"
|
|||
#include "video_core/shader/shader_ir.h"
|
|||
|
|||
namespace VideoCommon::Shader { |
|||
|
|||
using Tegra::Shader::Instruction; |
|||
using Tegra::Shader::OpCode; |
|||
|
|||
u32 ShaderIR::DecodeFloatSet(BasicBlock& bb, const BasicBlock& code, u32 pc) { |
|||
const Instruction instr = {program_code[pc]}; |
|||
const auto opcode = OpCode::Decode(instr); |
|||
|
|||
const Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fset.abs_a != 0, |
|||
instr.fset.neg_a != 0); |
|||
|
|||
Node op_b = [&]() { |
|||
if (instr.is_b_imm) { |
|||
return GetImmediate19(instr); |
|||
} else if (instr.is_b_gpr) { |
|||
return GetRegister(instr.gpr20); |
|||
} else { |
|||
return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset); |
|||
} |
|||
}(); |
|||
|
|||
op_b = GetOperandAbsNegFloat(op_b, instr.fset.abs_b != 0, instr.fset.neg_b != 0); |
|||
|
|||
// The fset instruction sets a register to 1.0 or -1 (depending on the bf bit) if the
|
|||
// condition is true, and to 0 otherwise.
|
|||
const Node second_pred = GetPredicate(instr.fset.pred39, instr.fset.neg_pred != 0); |
|||
|
|||
const OperationCode combiner = GetPredicateCombiner(instr.fset.op); |
|||
const Node first_pred = GetPredicateComparisonFloat(instr.fset.cond, op_a, op_b); |
|||
|
|||
const Node predicate = Operation(combiner, first_pred, second_pred); |
|||
|
|||
const Node true_value = instr.fset.bf ? Immediate(1.0f) : Immediate(-1); |
|||
const Node false_value = instr.fset.bf ? Immediate(0.0f) : Immediate(0); |
|||
const Node value = |
|||
Operation(OperationCode::Select, PRECISE, predicate, true_value, false_value); |
|||
|
|||
if (instr.fset.bf) { |
|||
SetInternalFlagsFromFloat(bb, value, instr.generates_cc); |
|||
} else { |
|||
SetInternalFlagsFromInteger(bb, value, instr.generates_cc); |
|||
} |
|||
SetRegister(bb, instr.gpr0, value); |
|||
|
|||
return pc; |
|||
} |
|||
|
|||
} // namespace VideoCommon::Shader
|
|||
@ -0,0 +1,56 @@ |
|||
// Copyright 2018 yuzu Emulator Project
|
|||
// Licensed under GPLv2 or any later version
|
|||
// Refer to the license.txt file included.
|
|||
|
|||
#include "common/assert.h"
|
|||
#include "common/common_types.h"
|
|||
#include "video_core/engines/shader_bytecode.h"
|
|||
#include "video_core/shader/shader_ir.h"
|
|||
|
|||
namespace VideoCommon::Shader { |
|||
|
|||
using Tegra::Shader::Instruction; |
|||
using Tegra::Shader::OpCode; |
|||
using Tegra::Shader::Pred; |
|||
|
|||
u32 ShaderIR::DecodeFloatSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc) { |
|||
const Instruction instr = {program_code[pc]}; |
|||
const auto opcode = OpCode::Decode(instr); |
|||
|
|||
const Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fsetp.abs_a != 0, |
|||
instr.fsetp.neg_a != 0); |
|||
Node op_b = [&]() { |
|||
if (instr.is_b_imm) { |
|||
return GetImmediate19(instr); |
|||
} else if (instr.is_b_gpr) { |
|||
return GetRegister(instr.gpr20); |
|||
} else { |
|||
return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset); |
|||
} |
|||
}(); |
|||
op_b = GetOperandAbsNegFloat(op_b, instr.fsetp.abs_b, false); |
|||
|
|||
// We can't use the constant predicate as destination.
|
|||
ASSERT(instr.fsetp.pred3 != static_cast<u64>(Pred::UnusedIndex)); |
|||
|
|||
const Node predicate = GetPredicateComparisonFloat(instr.fsetp.cond, op_a, op_b); |
|||
const Node second_pred = GetPredicate(instr.fsetp.pred39, instr.fsetp.neg_pred != 0); |
|||
|
|||
const OperationCode combiner = GetPredicateCombiner(instr.fsetp.op); |
|||
const Node value = Operation(combiner, predicate, second_pred); |
|||
|
|||
// Set the primary predicate to the result of Predicate OP SecondPredicate
|
|||
SetPredicate(bb, instr.fsetp.pred3, value); |
|||
|
|||
if (instr.fsetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) { |
|||
// Set the secondary predicate to the result of !Predicate OP SecondPredicate,
|
|||
// if enabled
|
|||
const Node negated_pred = Operation(OperationCode::LogicalNegate, predicate); |
|||
const Node second_value = Operation(combiner, negated_pred, second_pred); |
|||
SetPredicate(bb, instr.fsetp.pred0, second_value); |
|||
} |
|||
|
|||
return pc; |
|||
} |
|||
|
|||
} // namespace VideoCommon::Shader
|
|||
@ -0,0 +1,67 @@ |
|||
// Copyright 2018 yuzu Emulator Project
|
|||
// Licensed under GPLv2 or any later version
|
|||
// Refer to the license.txt file included.
|
|||
|
|||
#include <array>
|
|||
|
|||
#include "common/assert.h"
|
|||
#include "common/common_types.h"
|
|||
#include "video_core/engines/shader_bytecode.h"
|
|||
#include "video_core/shader/shader_ir.h"
|
|||
|
|||
namespace VideoCommon::Shader { |
|||
|
|||
using Tegra::Shader::Instruction; |
|||
using Tegra::Shader::OpCode; |
|||
|
|||
u32 ShaderIR::DecodeHalfSet(BasicBlock& bb, const BasicBlock& code, u32 pc) { |
|||
const Instruction instr = {program_code[pc]}; |
|||
const auto opcode = OpCode::Decode(instr); |
|||
|
|||
UNIMPLEMENTED_IF(instr.hset2.ftz != 0); |
|||
|
|||
// instr.hset2.type_a
|
|||
// instr.hset2.type_b
|
|||
Node op_a = GetRegister(instr.gpr8); |
|||
Node op_b = [&]() { |
|||
switch (opcode->get().GetId()) { |
|||
case OpCode::Id::HSET2_R: |
|||
return GetRegister(instr.gpr20); |
|||
default: |
|||
UNREACHABLE(); |
|||
return Immediate(0); |
|||
} |
|||
}(); |
|||
|
|||
op_a = GetOperandAbsNegHalf(op_a, instr.hset2.abs_a, instr.hset2.negate_a); |
|||
op_b = GetOperandAbsNegHalf(op_b, instr.hset2.abs_b, instr.hset2.negate_b); |
|||
|
|||
const Node second_pred = GetPredicate(instr.hset2.pred39, instr.hset2.neg_pred); |
|||
|
|||
MetaHalfArithmetic meta{false, {instr.hset2.type_a, instr.hset2.type_b}}; |
|||
const Node comparison_pair = GetPredicateComparisonHalf(instr.hset2.cond, meta, op_a, op_b); |
|||
|
|||
const OperationCode combiner = GetPredicateCombiner(instr.hset2.op); |
|||
|
|||
// HSET2 operates on each half float in the pack.
|
|||
std::array<Node, 2> values; |
|||
for (u32 i = 0; i < 2; ++i) { |
|||
const u32 raw_value = instr.hset2.bf ? 0x3c00 : 0xffff; |
|||
const Node true_value = Immediate(raw_value << (i * 16)); |
|||
const Node false_value = Immediate(0); |
|||
|
|||
const Node comparison = |
|||
Operation(OperationCode::LogicalPick2, comparison_pair, Immediate(i)); |
|||
const Node predicate = Operation(combiner, comparison, second_pred); |
|||
|
|||
values[i] = |
|||
Operation(OperationCode::Select, NO_PRECISE, predicate, true_value, false_value); |
|||
} |
|||
|
|||
const Node value = Operation(OperationCode::UBitwiseOr, NO_PRECISE, values[0], values[1]); |
|||
SetRegister(bb, instr.gpr0, value); |
|||
|
|||
return pc; |
|||
} |
|||
|
|||
} // namespace VideoCommon::Shader
|
|||
@ -0,0 +1,62 @@ |
|||
// Copyright 2018 yuzu Emulator Project
|
|||
// Licensed under GPLv2 or any later version
|
|||
// Refer to the license.txt file included.
|
|||
|
|||
#include "common/assert.h"
|
|||
#include "common/common_types.h"
|
|||
#include "video_core/engines/shader_bytecode.h"
|
|||
#include "video_core/shader/shader_ir.h"
|
|||
|
|||
namespace VideoCommon::Shader { |
|||
|
|||
using Tegra::Shader::Instruction; |
|||
using Tegra::Shader::OpCode; |
|||
using Tegra::Shader::Pred; |
|||
|
|||
u32 ShaderIR::DecodeHalfSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc) { |
|||
const Instruction instr = {program_code[pc]}; |
|||
const auto opcode = OpCode::Decode(instr); |
|||
|
|||
UNIMPLEMENTED_IF(instr.hsetp2.ftz != 0); |
|||
|
|||
Node op_a = GetRegister(instr.gpr8); |
|||
op_a = GetOperandAbsNegHalf(op_a, instr.hsetp2.abs_a, instr.hsetp2.negate_a); |
|||
|
|||
const Node op_b = [&]() { |
|||
switch (opcode->get().GetId()) { |
|||
case OpCode::Id::HSETP2_R: |
|||
return GetOperandAbsNegHalf(GetRegister(instr.gpr20), instr.hsetp2.abs_a, |
|||
instr.hsetp2.negate_b); |
|||
default: |
|||
UNREACHABLE(); |
|||
return Immediate(0); |
|||
} |
|||
}(); |
|||
|
|||
// We can't use the constant predicate as destination.
|
|||
ASSERT(instr.hsetp2.pred3 != static_cast<u64>(Pred::UnusedIndex)); |
|||
|
|||
const Node second_pred = GetPredicate(instr.hsetp2.pred39, instr.hsetp2.neg_pred != 0); |
|||
|
|||
const OperationCode combiner = GetPredicateCombiner(instr.hsetp2.op); |
|||
const OperationCode pair_combiner = |
|||
instr.hsetp2.h_and ? OperationCode::LogicalAll2 : OperationCode::LogicalAny2; |
|||
|
|||
MetaHalfArithmetic meta = {false, {instr.hsetp2.type_a, instr.hsetp2.type_b}}; |
|||
const Node comparison = GetPredicateComparisonHalf(instr.hsetp2.cond, meta, op_a, op_b); |
|||
const Node first_pred = Operation(pair_combiner, comparison); |
|||
|
|||
// Set the primary predicate to the result of Predicate OP SecondPredicate
|
|||
const Node value = Operation(combiner, first_pred, second_pred); |
|||
SetPredicate(bb, instr.hsetp2.pred3, value); |
|||
|
|||
if (instr.hsetp2.pred0 != static_cast<u64>(Pred::UnusedIndex)) { |
|||
// Set the secondary predicate to the result of !Predicate OP SecondPredicate, if enabled
|
|||
const Node negated_pred = Operation(OperationCode::LogicalNegate, first_pred); |
|||
SetPredicate(bb, instr.hsetp2.pred0, Operation(combiner, negated_pred, second_pred)); |
|||
} |
|||
|
|||
return pc; |
|||
} |
|||
|
|||
} // namespace VideoCommon::Shader
|
|||
@ -0,0 +1,76 @@ |
|||
// Copyright 2018 yuzu Emulator Project
|
|||
// Licensed under GPLv2 or any later version
|
|||
// Refer to the license.txt file included.
|
|||
|
|||
#include <tuple>
|
|||
|
|||
#include "common/assert.h"
|
|||
#include "common/common_types.h"
|
|||
#include "video_core/engines/shader_bytecode.h"
|
|||
#include "video_core/shader/shader_ir.h"
|
|||
|
|||
namespace VideoCommon::Shader { |
|||
|
|||
using Tegra::Shader::HalfPrecision; |
|||
using Tegra::Shader::HalfType; |
|||
using Tegra::Shader::Instruction; |
|||
using Tegra::Shader::OpCode; |
|||
|
|||
u32 ShaderIR::DecodeHfma2(BasicBlock& bb, const BasicBlock& code, u32 pc) { |
|||
const Instruction instr = {program_code[pc]}; |
|||
const auto opcode = OpCode::Decode(instr); |
|||
|
|||
if (opcode->get().GetId() == OpCode::Id::HFMA2_RR) { |
|||
UNIMPLEMENTED_IF(instr.hfma2.rr.precision != HalfPrecision::None); |
|||
} else { |
|||
UNIMPLEMENTED_IF(instr.hfma2.precision != HalfPrecision::None); |
|||
} |
|||
|
|||
constexpr auto identity = HalfType::H0_H1; |
|||
|
|||
const HalfType type_a = instr.hfma2.type_a; |
|||
const Node op_a = GetRegister(instr.gpr8); |
|||
|
|||
bool neg_b{}, neg_c{}; |
|||
auto [saturate, type_b, op_b, type_c, |
|||
op_c] = [&]() -> std::tuple<bool, HalfType, Node, HalfType, Node> { |
|||
switch (opcode->get().GetId()) { |
|||
case OpCode::Id::HFMA2_CR: |
|||
neg_b = instr.hfma2.negate_b; |
|||
neg_c = instr.hfma2.negate_c; |
|||
return {instr.hfma2.saturate, instr.hfma2.type_b, |
|||
GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset), instr.hfma2.type_reg39, |
|||
GetRegister(instr.gpr39)}; |
|||
case OpCode::Id::HFMA2_RC: |
|||
neg_b = instr.hfma2.negate_b; |
|||
neg_c = instr.hfma2.negate_c; |
|||
return {instr.hfma2.saturate, instr.hfma2.type_reg39, GetRegister(instr.gpr39), |
|||
instr.hfma2.type_b, GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset)}; |
|||
case OpCode::Id::HFMA2_RR: |
|||
neg_b = instr.hfma2.rr.negate_b; |
|||
neg_c = instr.hfma2.rr.negate_c; |
|||
return {instr.hfma2.rr.saturate, instr.hfma2.type_b, GetRegister(instr.gpr20), |
|||
instr.hfma2.rr.type_c, GetRegister(instr.gpr39)}; |
|||
case OpCode::Id::HFMA2_IMM_R: |
|||
neg_c = instr.hfma2.negate_c; |
|||
return {instr.hfma2.saturate, identity, UnpackHalfImmediate(instr, true), |
|||
instr.hfma2.type_reg39, GetRegister(instr.gpr39)}; |
|||
default: |
|||
return {false, identity, Immediate(0), identity, Immediate(0)}; |
|||
} |
|||
}(); |
|||
UNIMPLEMENTED_IF_MSG(saturate, "HFMA2 saturation is not implemented"); |
|||
|
|||
op_b = GetOperandAbsNegHalf(op_b, false, neg_b); |
|||
op_c = GetOperandAbsNegHalf(op_c, false, neg_c); |
|||
|
|||
MetaHalfArithmetic meta{true, {type_a, type_b, type_c}}; |
|||
Node value = Operation(OperationCode::HFma, meta, op_a, op_b, op_c); |
|||
value = HalfMerge(GetRegister(instr.gpr0), value, instr.hfma2.merge); |
|||
|
|||
SetRegister(bb, instr.gpr0, value); |
|||
|
|||
return pc; |
|||
} |
|||
|
|||
} // namespace VideoCommon::Shader
|
|||
@ -0,0 +1,50 @@ |
|||
// Copyright 2018 yuzu Emulator Project
|
|||
// Licensed under GPLv2 or any later version
|
|||
// Refer to the license.txt file included.
|
|||
|
|||
#include "common/assert.h"
|
|||
#include "common/common_types.h"
|
|||
#include "video_core/engines/shader_bytecode.h"
|
|||
#include "video_core/shader/shader_ir.h"
|
|||
|
|||
namespace VideoCommon::Shader { |
|||
|
|||
using Tegra::Shader::Instruction; |
|||
using Tegra::Shader::OpCode; |
|||
|
|||
u32 ShaderIR::DecodeIntegerSet(BasicBlock& bb, const BasicBlock& code, u32 pc) { |
|||
const Instruction instr = {program_code[pc]}; |
|||
const auto opcode = OpCode::Decode(instr); |
|||
|
|||
const Node op_a = GetRegister(instr.gpr8); |
|||
const Node op_b = [&]() { |
|||
if (instr.is_b_imm) { |
|||
return Immediate(instr.alu.GetSignedImm20_20()); |
|||
} else if (instr.is_b_gpr) { |
|||
return GetRegister(instr.gpr20); |
|||
} else { |
|||
return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset); |
|||
} |
|||
}(); |
|||
|
|||
// The iset instruction sets a register to 1.0 or -1 (depending on the bf bit) if the condition
|
|||
// is true, and to 0 otherwise.
|
|||
const Node second_pred = GetPredicate(instr.iset.pred39, instr.iset.neg_pred != 0); |
|||
const Node first_pred = |
|||
GetPredicateComparisonInteger(instr.iset.cond, instr.iset.is_signed, op_a, op_b); |
|||
|
|||
const OperationCode combiner = GetPredicateCombiner(instr.iset.op); |
|||
|
|||
const Node predicate = Operation(combiner, first_pred, second_pred); |
|||
|
|||
const Node true_value = instr.iset.bf ? Immediate(1.0f) : Immediate(-1); |
|||
const Node false_value = instr.iset.bf ? Immediate(0.0f) : Immediate(0); |
|||
const Node value = |
|||
Operation(OperationCode::Select, PRECISE, predicate, true_value, false_value); |
|||
|
|||
SetRegister(bb, instr.gpr0, value); |
|||
|
|||
return pc; |
|||
} |
|||
|
|||
} // namespace VideoCommon::Shader
|
|||
@ -0,0 +1,53 @@ |
|||
// Copyright 2018 yuzu Emulator Project
|
|||
// Licensed under GPLv2 or any later version
|
|||
// Refer to the license.txt file included.
|
|||
|
|||
#include "common/assert.h"
|
|||
#include "common/common_types.h"
|
|||
#include "video_core/engines/shader_bytecode.h"
|
|||
#include "video_core/shader/shader_ir.h"
|
|||
|
|||
namespace VideoCommon::Shader { |
|||
|
|||
using Tegra::Shader::Instruction; |
|||
using Tegra::Shader::OpCode; |
|||
using Tegra::Shader::Pred; |
|||
|
|||
u32 ShaderIR::DecodeIntegerSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc) { |
|||
const Instruction instr = {program_code[pc]}; |
|||
const auto opcode = OpCode::Decode(instr); |
|||
|
|||
const Node op_a = GetRegister(instr.gpr8); |
|||
|
|||
const Node op_b = [&]() { |
|||
if (instr.is_b_imm) { |
|||
return Immediate(instr.alu.GetSignedImm20_20()); |
|||
} else if (instr.is_b_gpr) { |
|||
return GetRegister(instr.gpr20); |
|||
} else { |
|||
return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset); |
|||
} |
|||
}(); |
|||
|
|||
// We can't use the constant predicate as destination.
|
|||
ASSERT(instr.isetp.pred3 != static_cast<u64>(Pred::UnusedIndex)); |
|||
|
|||
const Node second_pred = GetPredicate(instr.isetp.pred39, instr.isetp.neg_pred != 0); |
|||
const Node predicate = |
|||
GetPredicateComparisonInteger(instr.isetp.cond, instr.isetp.is_signed, op_a, op_b); |
|||
|
|||
// Set the primary predicate to the result of Predicate OP SecondPredicate
|
|||
const OperationCode combiner = GetPredicateCombiner(instr.isetp.op); |
|||
const Node value = Operation(combiner, predicate, second_pred); |
|||
SetPredicate(bb, instr.isetp.pred3, value); |
|||
|
|||
if (instr.isetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) { |
|||
// Set the secondary predicate to the result of !Predicate OP SecondPredicate, if enabled
|
|||
const Node negated_pred = Operation(OperationCode::LogicalNegate, predicate); |
|||
SetPredicate(bb, instr.isetp.pred0, Operation(combiner, negated_pred, second_pred)); |
|||
} |
|||
|
|||
return pc; |
|||
} |
|||
|
|||
} // namespace VideoCommon::Shader
|
|||
@ -0,0 +1,688 @@ |
|||
// Copyright 2018 yuzu Emulator Project
|
|||
// Licensed under GPLv2 or any later version
|
|||
// Refer to the license.txt file included.
|
|||
|
|||
#include <algorithm>
|
|||
#include <vector>
|
|||
|
|||
#include "common/assert.h"
|
|||
#include "common/common_types.h"
|
|||
#include "video_core/engines/shader_bytecode.h"
|
|||
#include "video_core/shader/shader_ir.h"
|
|||
|
|||
namespace VideoCommon::Shader { |
|||
|
|||
using Tegra::Shader::Attribute; |
|||
using Tegra::Shader::Instruction; |
|||
using Tegra::Shader::OpCode; |
|||
using Tegra::Shader::Register; |
|||
using Tegra::Shader::TextureMiscMode; |
|||
using Tegra::Shader::TextureProcessMode; |
|||
using Tegra::Shader::TextureType; |
|||
|
|||
static std::size_t GetCoordCount(TextureType texture_type) { |
|||
switch (texture_type) { |
|||
case TextureType::Texture1D: |
|||
return 1; |
|||
case TextureType::Texture2D: |
|||
return 2; |
|||
case TextureType::Texture3D: |
|||
case TextureType::TextureCube: |
|||
return 3; |
|||
default: |
|||
UNIMPLEMENTED_MSG("Unhandled texture type: {}", static_cast<u32>(texture_type)); |
|||
return 0; |
|||
} |
|||
} |
|||
|
|||
u32 ShaderIR::DecodeMemory(BasicBlock& bb, const BasicBlock& code, u32 pc) { |
|||
const Instruction instr = {program_code[pc]}; |
|||
const auto opcode = OpCode::Decode(instr); |
|||
|
|||
switch (opcode->get().GetId()) { |
|||
case OpCode::Id::LD_A: { |
|||
// Note: Shouldn't this be interp mode flat? As in no interpolation made.
|
|||
UNIMPLEMENTED_IF_MSG(instr.gpr8.Value() != Register::ZeroIndex, |
|||
"Indirect attribute loads are not supported"); |
|||
UNIMPLEMENTED_IF_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) != 0, |
|||
"Unaligned attribute loads are not supported"); |
|||
|
|||
Tegra::Shader::IpaMode input_mode{Tegra::Shader::IpaInterpMode::Perspective, |
|||
Tegra::Shader::IpaSampleMode::Default}; |
|||
|
|||
u64 next_element = instr.attribute.fmt20.element; |
|||
auto next_index = static_cast<u64>(instr.attribute.fmt20.index.Value()); |
|||
|
|||
const auto LoadNextElement = [&](u32 reg_offset) { |
|||
const Node buffer = GetRegister(instr.gpr39); |
|||
const Node attribute = GetInputAttribute(static_cast<Attribute::Index>(next_index), |
|||
next_element, input_mode, buffer); |
|||
|
|||
SetRegister(bb, instr.gpr0.Value() + reg_offset, attribute); |
|||
|
|||
// Load the next attribute element into the following register. If the element
|
|||
// to load goes beyond the vec4 size, load the first element of the next
|
|||
// attribute.
|
|||
next_element = (next_element + 1) % 4; |
|||
next_index = next_index + (next_element == 0 ? 1 : 0); |
|||
}; |
|||
|
|||
const u32 num_words = static_cast<u32>(instr.attribute.fmt20.size.Value()) + 1; |
|||
for (u32 reg_offset = 0; reg_offset < num_words; ++reg_offset) { |
|||
LoadNextElement(reg_offset); |
|||
} |
|||
break; |
|||
} |
|||
case OpCode::Id::LD_C: { |
|||
UNIMPLEMENTED_IF(instr.ld_c.unknown != 0); |
|||
|
|||
Node index = GetRegister(instr.gpr8); |
|||
|
|||
const Node op_a = |
|||
GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.offset + 0, index); |
|||
|
|||
switch (instr.ld_c.type.Value()) { |
|||
case Tegra::Shader::UniformType::Single: |
|||
SetRegister(bb, instr.gpr0, op_a); |
|||
break; |
|||
|
|||
case Tegra::Shader::UniformType::Double: { |
|||
const Node op_b = |
|||
GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.offset + 4, index); |
|||
|
|||
SetTemporal(bb, 0, op_a); |
|||
SetTemporal(bb, 1, op_b); |
|||
SetRegister(bb, instr.gpr0, GetTemporal(0)); |
|||
SetRegister(bb, instr.gpr0.Value() + 1, GetTemporal(1)); |
|||
break; |
|||
} |
|||
default: |
|||
UNIMPLEMENTED_MSG("Unhandled type: {}", static_cast<unsigned>(instr.ld_c.type.Value())); |
|||
} |
|||
break; |
|||
} |
|||
case OpCode::Id::LD_L: { |
|||
UNIMPLEMENTED_IF_MSG(instr.ld_l.unknown == 1, "LD_L Unhandled mode: {}", |
|||
static_cast<unsigned>(instr.ld_l.unknown.Value())); |
|||
|
|||
const Node index = Operation(OperationCode::IAdd, GetRegister(instr.gpr8), |
|||
Immediate(static_cast<s32>(instr.smem_imm))); |
|||
const Node lmem = GetLocalMemory(index); |
|||
|
|||
switch (instr.ldst_sl.type.Value()) { |
|||
case Tegra::Shader::StoreType::Bytes32: |
|||
SetRegister(bb, instr.gpr0, lmem); |
|||
break; |
|||
default: |
|||
UNIMPLEMENTED_MSG("LD_L Unhandled type: {}", |
|||
static_cast<unsigned>(instr.ldst_sl.type.Value())); |
|||
} |
|||
break; |
|||
} |
|||
case OpCode::Id::ST_A: { |
|||
UNIMPLEMENTED_IF_MSG(instr.gpr8.Value() != Register::ZeroIndex, |
|||
"Indirect attribute loads are not supported"); |
|||
UNIMPLEMENTED_IF_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) != 0, |
|||
"Unaligned attribute loads are not supported"); |
|||
|
|||
u64 next_element = instr.attribute.fmt20.element; |
|||
auto next_index = static_cast<u64>(instr.attribute.fmt20.index.Value()); |
|||
|
|||
const auto StoreNextElement = [&](u32 reg_offset) { |
|||
const auto dest = GetOutputAttribute(static_cast<Attribute::Index>(next_index), |
|||
next_element, GetRegister(instr.gpr39)); |
|||
const auto src = GetRegister(instr.gpr0.Value() + reg_offset); |
|||
|
|||
bb.push_back(Operation(OperationCode::Assign, dest, src)); |
|||
|
|||
// Load the next attribute element into the following register. If the element
|
|||
// to load goes beyond the vec4 size, load the first element of the next
|
|||
// attribute.
|
|||
next_element = (next_element + 1) % 4; |
|||
next_index = next_index + (next_element == 0 ? 1 : 0); |
|||
}; |
|||
|
|||
const u32 num_words = static_cast<u32>(instr.attribute.fmt20.size.Value()) + 1; |
|||
for (u32 reg_offset = 0; reg_offset < num_words; ++reg_offset) { |
|||
StoreNextElement(reg_offset); |
|||
} |
|||
|
|||
break; |
|||
} |
|||
case OpCode::Id::ST_L: { |
|||
UNIMPLEMENTED_IF_MSG(instr.st_l.unknown == 0, "ST_L Unhandled mode: {}", |
|||
static_cast<u32>(instr.st_l.unknown.Value())); |
|||
|
|||
const Node index = Operation(OperationCode::IAdd, NO_PRECISE, GetRegister(instr.gpr8), |
|||
Immediate(static_cast<s32>(instr.smem_imm))); |
|||
|
|||
switch (instr.ldst_sl.type.Value()) { |
|||
case Tegra::Shader::StoreType::Bytes32: |
|||
SetLocalMemory(bb, index, GetRegister(instr.gpr0)); |
|||
break; |
|||
default: |
|||
UNIMPLEMENTED_MSG("ST_L Unhandled type: {}", |
|||
static_cast<u32>(instr.ldst_sl.type.Value())); |
|||
} |
|||
break; |
|||
} |
|||
case OpCode::Id::TEX: { |
|||
UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(TextureMiscMode::AOFFI), |
|||
"AOFFI is not implemented"); |
|||
|
|||
if (instr.tex.UsesMiscMode(TextureMiscMode::NODEP)) { |
|||
LOG_WARNING(HW_GPU, "TEX.NODEP implementation is incomplete"); |
|||
} |
|||
|
|||
const TextureType texture_type{instr.tex.texture_type}; |
|||
const bool is_array = instr.tex.array != 0; |
|||
const bool depth_compare = instr.tex.UsesMiscMode(TextureMiscMode::DC); |
|||
const auto process_mode = instr.tex.GetTextureProcessMode(); |
|||
WriteTexInstructionFloat( |
|||
bb, instr, GetTexCode(instr, texture_type, process_mode, depth_compare, is_array)); |
|||
break; |
|||
} |
|||
case OpCode::Id::TEXS: { |
|||
const TextureType texture_type{instr.texs.GetTextureType()}; |
|||
const bool is_array{instr.texs.IsArrayTexture()}; |
|||
const bool depth_compare = instr.texs.UsesMiscMode(TextureMiscMode::DC); |
|||
const auto process_mode = instr.texs.GetTextureProcessMode(); |
|||
|
|||
if (instr.texs.UsesMiscMode(TextureMiscMode::NODEP)) { |
|||
LOG_WARNING(HW_GPU, "TEXS.NODEP implementation is incomplete"); |
|||
} |
|||
|
|||
const Node4 components = |
|||
GetTexsCode(instr, texture_type, process_mode, depth_compare, is_array); |
|||
|
|||
if (instr.texs.fp32_flag) { |
|||
WriteTexsInstructionFloat(bb, instr, components); |
|||
} else { |
|||
WriteTexsInstructionHalfFloat(bb, instr, components); |
|||
} |
|||
break; |
|||
} |
|||
case OpCode::Id::TLD4: { |
|||
ASSERT(instr.tld4.array == 0); |
|||
UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::AOFFI), |
|||
"AOFFI is not implemented"); |
|||
UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::NDV), |
|||
"NDV is not implemented"); |
|||
UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::PTP), |
|||
"PTP is not implemented"); |
|||
|
|||
if (instr.tld4.UsesMiscMode(TextureMiscMode::NODEP)) { |
|||
LOG_WARNING(HW_GPU, "TLD4.NODEP implementation is incomplete"); |
|||
} |
|||
|
|||
const auto texture_type = instr.tld4.texture_type.Value(); |
|||
const bool depth_compare = instr.tld4.UsesMiscMode(TextureMiscMode::DC); |
|||
const bool is_array = instr.tld4.array != 0; |
|||
WriteTexInstructionFloat(bb, instr, |
|||
GetTld4Code(instr, texture_type, depth_compare, is_array)); |
|||
break; |
|||
} |
|||
case OpCode::Id::TLD4S: { |
|||
UNIMPLEMENTED_IF_MSG(instr.tld4s.UsesMiscMode(TextureMiscMode::AOFFI), |
|||
"AOFFI is not implemented"); |
|||
|
|||
if (instr.tld4s.UsesMiscMode(TextureMiscMode::NODEP)) { |
|||
LOG_WARNING(HW_GPU, "TLD4S.NODEP implementation is incomplete"); |
|||
} |
|||
|
|||
const bool depth_compare = instr.tld4s.UsesMiscMode(TextureMiscMode::DC); |
|||
const Node op_a = GetRegister(instr.gpr8); |
|||
const Node op_b = GetRegister(instr.gpr20); |
|||
|
|||
std::vector<Node> coords; |
|||
|
|||
// TODO(Subv): Figure out how the sampler type is encoded in the TLD4S instruction.
|
|||
if (depth_compare) { |
|||
// Note: TLD4S coordinate encoding works just like TEXS's
|
|||
const Node op_y = GetRegister(instr.gpr8.Value() + 1); |
|||
coords.push_back(op_a); |
|||
coords.push_back(op_y); |
|||
coords.push_back(op_b); |
|||
} else { |
|||
coords.push_back(op_a); |
|||
coords.push_back(op_b); |
|||
} |
|||
const auto num_coords = static_cast<u32>(coords.size()); |
|||
coords.push_back(Immediate(static_cast<u32>(instr.tld4s.component))); |
|||
|
|||
const auto& sampler = |
|||
GetSampler(instr.sampler, TextureType::Texture2D, false, depth_compare); |
|||
|
|||
Node4 values; |
|||
for (u32 element = 0; element < values.size(); ++element) { |
|||
auto params = coords; |
|||
MetaTexture meta{sampler, element, num_coords}; |
|||
values[element] = |
|||
Operation(OperationCode::F4TextureGather, std::move(meta), std::move(params)); |
|||
} |
|||
|
|||
WriteTexsInstructionFloat(bb, instr, values); |
|||
break; |
|||
} |
|||
case OpCode::Id::TXQ: { |
|||
if (instr.txq.UsesMiscMode(TextureMiscMode::NODEP)) { |
|||
LOG_WARNING(HW_GPU, "TXQ.NODEP implementation is incomplete"); |
|||
} |
|||
|
|||
// TODO: The new commits on the texture refactor, change the way samplers work.
|
|||
// Sadly, not all texture instructions specify the type of texture their sampler
|
|||
// uses. This must be fixed at a later instance.
|
|||
const auto& sampler = |
|||
GetSampler(instr.sampler, Tegra::Shader::TextureType::Texture2D, false, false); |
|||
|
|||
switch (instr.txq.query_type) { |
|||
case Tegra::Shader::TextureQueryType::Dimension: { |
|||
for (u32 element = 0; element < 4; ++element) { |
|||
MetaTexture meta{sampler, element}; |
|||
const Node value = Operation(OperationCode::F4TextureQueryDimensions, |
|||
std::move(meta), GetRegister(instr.gpr8)); |
|||
SetTemporal(bb, element, value); |
|||
} |
|||
for (u32 i = 0; i < 4; ++i) { |
|||
SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i)); |
|||
} |
|||
break; |
|||
} |
|||
default: |
|||
UNIMPLEMENTED_MSG("Unhandled texture query type: {}", |
|||
static_cast<u32>(instr.txq.query_type.Value())); |
|||
} |
|||
break; |
|||
} |
|||
case OpCode::Id::TMML: { |
|||
UNIMPLEMENTED_IF_MSG(instr.tmml.UsesMiscMode(Tegra::Shader::TextureMiscMode::NDV), |
|||
"NDV is not implemented"); |
|||
|
|||
if (instr.tmml.UsesMiscMode(TextureMiscMode::NODEP)) { |
|||
LOG_WARNING(HW_GPU, "TMML.NODEP implementation is incomplete"); |
|||
} |
|||
|
|||
auto texture_type = instr.tmml.texture_type.Value(); |
|||
const bool is_array = instr.tmml.array != 0; |
|||
const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false); |
|||
|
|||
std::vector<Node> coords; |
|||
|
|||
// TODO: Add coordinates for different samplers once other texture types are implemented.
|
|||
switch (texture_type) { |
|||
case TextureType::Texture1D: |
|||
coords.push_back(GetRegister(instr.gpr8)); |
|||
break; |
|||
case TextureType::Texture2D: |
|||
coords.push_back(GetRegister(instr.gpr8.Value() + 0)); |
|||
coords.push_back(GetRegister(instr.gpr8.Value() + 1)); |
|||
break; |
|||
default: |
|||
UNIMPLEMENTED_MSG("Unhandled texture type {}", static_cast<u32>(texture_type)); |
|||
|
|||
// Fallback to interpreting as a 2D texture for now
|
|||
coords.push_back(GetRegister(instr.gpr8.Value() + 0)); |
|||
coords.push_back(GetRegister(instr.gpr8.Value() + 1)); |
|||
texture_type = TextureType::Texture2D; |
|||
} |
|||
|
|||
for (u32 element = 0; element < 2; ++element) { |
|||
auto params = coords; |
|||
MetaTexture meta_texture{sampler, element, static_cast<u32>(coords.size())}; |
|||
const Node value = |
|||
Operation(OperationCode::F4TextureQueryLod, meta_texture, std::move(params)); |
|||
SetTemporal(bb, element, value); |
|||
} |
|||
for (u32 element = 0; element < 2; ++element) { |
|||
SetRegister(bb, instr.gpr0.Value() + element, GetTemporal(element)); |
|||
} |
|||
|
|||
break; |
|||
} |
|||
case OpCode::Id::TLDS: { |
|||
const Tegra::Shader::TextureType texture_type{instr.tlds.GetTextureType()}; |
|||
const bool is_array{instr.tlds.IsArrayTexture()}; |
|||
|
|||
UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::AOFFI), |
|||
"AOFFI is not implemented"); |
|||
UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::MZ), "MZ is not implemented"); |
|||
|
|||
if (instr.tlds.UsesMiscMode(TextureMiscMode::NODEP)) { |
|||
LOG_WARNING(HW_GPU, "TMML.NODEP implementation is incomplete"); |
|||
} |
|||
|
|||
WriteTexsInstructionFloat(bb, instr, GetTldsCode(instr, texture_type, is_array)); |
|||
break; |
|||
} |
|||
default: |
|||
UNIMPLEMENTED_MSG("Unhandled memory instruction: {}", opcode->get().GetName()); |
|||
} |
|||
|
|||
return pc; |
|||
} |
|||
|
|||
const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, TextureType type, |
|||
bool is_array, bool is_shadow) { |
|||
const auto offset = static_cast<std::size_t>(sampler.index.Value()); |
|||
|
|||
// If this sampler has already been used, return the existing mapping.
|
|||
const auto itr = |
|||
std::find_if(used_samplers.begin(), used_samplers.end(), |
|||
[&](const Sampler& entry) { return entry.GetOffset() == offset; }); |
|||
if (itr != used_samplers.end()) { |
|||
ASSERT(itr->GetType() == type && itr->IsArray() == is_array && |
|||
itr->IsShadow() == is_shadow); |
|||
return *itr; |
|||
} |
|||
|
|||
// Otherwise create a new mapping for this sampler
|
|||
const std::size_t next_index = used_samplers.size(); |
|||
const Sampler entry{offset, next_index, type, is_array, is_shadow}; |
|||
return *used_samplers.emplace(entry).first; |
|||
} |
|||
|
|||
void ShaderIR::WriteTexInstructionFloat(BasicBlock& bb, Instruction instr, |
|||
const Node4& components) { |
|||
u32 dest_elem = 0; |
|||
for (u32 elem = 0; elem < 4; ++elem) { |
|||
if (!instr.tex.IsComponentEnabled(elem)) { |
|||
// Skip disabled components
|
|||
continue; |
|||
} |
|||
SetTemporal(bb, dest_elem++, components[elem]); |
|||
} |
|||
// After writing values in temporals, move them to the real registers
|
|||
for (u32 i = 0; i < dest_elem; ++i) { |
|||
SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i)); |
|||
} |
|||
} |
|||
|
|||
void ShaderIR::WriteTexsInstructionFloat(BasicBlock& bb, Instruction instr, |
|||
const Node4& components) { |
|||
// TEXS has two destination registers and a swizzle. The first two elements in the swizzle
|
|||
// go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1
|
|||
|
|||
u32 dest_elem = 0; |
|||
for (u32 component = 0; component < 4; ++component) { |
|||
if (!instr.texs.IsComponentEnabled(component)) |
|||
continue; |
|||
SetTemporal(bb, dest_elem++, components[component]); |
|||
} |
|||
|
|||
for (u32 i = 0; i < dest_elem; ++i) { |
|||
if (i < 2) { |
|||
// Write the first two swizzle components to gpr0 and gpr0+1
|
|||
SetRegister(bb, instr.gpr0.Value() + i % 2, GetTemporal(i)); |
|||
} else { |
|||
ASSERT(instr.texs.HasTwoDestinations()); |
|||
// Write the rest of the swizzle components to gpr28 and gpr28+1
|
|||
SetRegister(bb, instr.gpr28.Value() + i % 2, GetTemporal(i)); |
|||
} |
|||
} |
|||
} |
|||
|
|||
void ShaderIR::WriteTexsInstructionHalfFloat(BasicBlock& bb, Instruction instr, |
|||
const Node4& components) { |
|||
// TEXS.F16 destionation registers are packed in two registers in pairs (just like any half
|
|||
// float instruction).
|
|||
|
|||
Node4 values; |
|||
u32 dest_elem = 0; |
|||
for (u32 component = 0; component < 4; ++component) { |
|||
if (!instr.texs.IsComponentEnabled(component)) |
|||
continue; |
|||
values[dest_elem++] = components[component]; |
|||
} |
|||
if (dest_elem == 0) |
|||
return; |
|||
|
|||
std::generate(values.begin() + dest_elem, values.end(), [&]() { return Immediate(0); }); |
|||
|
|||
const Node first_value = Operation(OperationCode::HPack2, values[0], values[1]); |
|||
if (dest_elem <= 2) { |
|||
SetRegister(bb, instr.gpr0, first_value); |
|||
return; |
|||
} |
|||
|
|||
SetTemporal(bb, 0, first_value); |
|||
SetTemporal(bb, 1, Operation(OperationCode::HPack2, values[2], values[3])); |
|||
|
|||
SetRegister(bb, instr.gpr0, GetTemporal(0)); |
|||
SetRegister(bb, instr.gpr28, GetTemporal(1)); |
|||
} |
|||
|
|||
Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, |
|||
TextureProcessMode process_mode, bool depth_compare, bool is_array, |
|||
std::size_t array_offset, std::size_t bias_offset, |
|||
std::vector<Node>&& coords) { |
|||
UNIMPLEMENTED_IF_MSG( |
|||
(texture_type == TextureType::Texture3D && (is_array || depth_compare)) || |
|||
(texture_type == TextureType::TextureCube && is_array && depth_compare), |
|||
"This method is not supported."); |
|||
|
|||
const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, depth_compare); |
|||
|
|||
const bool lod_needed = process_mode == TextureProcessMode::LZ || |
|||
process_mode == TextureProcessMode::LL || |
|||
process_mode == TextureProcessMode::LLA; |
|||
|
|||
// LOD selection (either via bias or explicit textureLod) not supported in GL for
|
|||
// sampler2DArrayShadow and samplerCubeArrayShadow.
|
|||
const bool gl_lod_supported = |
|||
!((texture_type == Tegra::Shader::TextureType::Texture2D && is_array && depth_compare) || |
|||
(texture_type == Tegra::Shader::TextureType::TextureCube && is_array && depth_compare)); |
|||
|
|||
const OperationCode read_method = |
|||
lod_needed && gl_lod_supported ? OperationCode::F4TextureLod : OperationCode::F4Texture; |
|||
|
|||
UNIMPLEMENTED_IF(process_mode != TextureProcessMode::None && !gl_lod_supported); |
|||
|
|||
std::optional<u32> array_offset_value; |
|||
if (is_array) |
|||
array_offset_value = static_cast<u32>(array_offset); |
|||
|
|||
const auto coords_count = static_cast<u32>(coords.size()); |
|||
|
|||
if (process_mode != TextureProcessMode::None && gl_lod_supported) { |
|||
if (process_mode == TextureProcessMode::LZ) { |
|||
coords.push_back(Immediate(0.0f)); |
|||
} else { |
|||
// If present, lod or bias are always stored in the register indexed by the gpr20
|
|||
// field with an offset depending on the usage of the other registers
|
|||
coords.push_back(GetRegister(instr.gpr20.Value() + bias_offset)); |
|||
} |
|||
} |
|||
|
|||
Node4 values; |
|||
for (u32 element = 0; element < values.size(); ++element) { |
|||
auto params = coords; |
|||
MetaTexture meta{sampler, element, coords_count, array_offset_value}; |
|||
values[element] = Operation(read_method, std::move(meta), std::move(params)); |
|||
} |
|||
|
|||
return values; |
|||
} |
|||
|
|||
Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type, |
|||
TextureProcessMode process_mode, bool depth_compare, bool is_array) { |
|||
const bool lod_bias_enabled = |
|||
(process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ); |
|||
|
|||
const auto [coord_count, total_coord_count] = ValidateAndGetCoordinateElement( |
|||
texture_type, depth_compare, is_array, lod_bias_enabled, 4, 5); |
|||
// If enabled arrays index is always stored in the gpr8 field
|
|||
const u64 array_register = instr.gpr8.Value(); |
|||
// First coordinate index is the gpr8 or gpr8 + 1 when arrays are used
|
|||
const u64 coord_register = array_register + (is_array ? 1 : 0); |
|||
|
|||
std::vector<Node> coords; |
|||
for (std::size_t i = 0; i < coord_count; ++i) { |
|||
coords.push_back(GetRegister(coord_register + i)); |
|||
} |
|||
// 1D.DC in opengl the 2nd component is ignored.
|
|||
if (depth_compare && !is_array && texture_type == TextureType::Texture1D) { |
|||
coords.push_back(Immediate(0.0f)); |
|||
} |
|||
std::size_t array_offset{}; |
|||
if (is_array) { |
|||
array_offset = coords.size(); |
|||
coords.push_back(GetRegister(array_register)); |
|||
} |
|||
if (depth_compare) { |
|||
// Depth is always stored in the register signaled by gpr20
|
|||
// or in the next register if lod or bias are used
|
|||
const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0); |
|||
coords.push_back(GetRegister(depth_register)); |
|||
} |
|||
// Fill ignored coordinates
|
|||
while (coords.size() < total_coord_count) { |
|||
coords.push_back(Immediate(0)); |
|||
} |
|||
|
|||
return GetTextureCode(instr, texture_type, process_mode, depth_compare, is_array, array_offset, |
|||
0, std::move(coords)); |
|||
} |
|||
|
|||
Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type, |
|||
TextureProcessMode process_mode, bool depth_compare, bool is_array) { |
|||
const bool lod_bias_enabled = |
|||
(process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ); |
|||
|
|||
const auto [coord_count, total_coord_count] = ValidateAndGetCoordinateElement( |
|||
texture_type, depth_compare, is_array, lod_bias_enabled, 4, 4); |
|||
// If enabled arrays index is always stored in the gpr8 field
|
|||
const u64 array_register = instr.gpr8.Value(); |
|||
// First coordinate index is stored in gpr8 field or (gpr8 + 1) when arrays are used
|
|||
const u64 coord_register = array_register + (is_array ? 1 : 0); |
|||
const u64 last_coord_register = |
|||
(is_array || !(lod_bias_enabled || depth_compare) || (coord_count > 2)) |
|||
? static_cast<u64>(instr.gpr20.Value()) |
|||
: coord_register + 1; |
|||
|
|||
std::vector<Node> coords; |
|||
for (std::size_t i = 0; i < coord_count; ++i) { |
|||
const bool last = (i == (coord_count - 1)) && (coord_count > 1); |
|||
coords.push_back(GetRegister(last ? last_coord_register : coord_register + i)); |
|||
} |
|||
|
|||
std::size_t array_offset{}; |
|||
if (is_array) { |
|||
array_offset = coords.size(); |
|||
coords.push_back(GetRegister(array_register)); |
|||
} |
|||
if (depth_compare) { |
|||
// Depth is always stored in the register signaled by gpr20
|
|||
// or in the next register if lod or bias are used
|
|||
const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0); |
|||
coords.push_back(GetRegister(depth_register)); |
|||
} |
|||
// Fill ignored coordinates
|
|||
while (coords.size() < total_coord_count) { |
|||
coords.push_back(Immediate(0)); |
|||
} |
|||
|
|||
return GetTextureCode(instr, texture_type, process_mode, depth_compare, is_array, array_offset, |
|||
(coord_count > 2 ? 1 : 0), std::move(coords)); |
|||
} |
|||
|
|||
Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare, |
|||
bool is_array) { |
|||
const std::size_t coord_count = GetCoordCount(texture_type); |
|||
const std::size_t total_coord_count = coord_count + (is_array ? 1 : 0); |
|||
const std::size_t total_reg_count = total_coord_count + (depth_compare ? 1 : 0); |
|||
|
|||
// If enabled arrays index is always stored in the gpr8 field
|
|||
const u64 array_register = instr.gpr8.Value(); |
|||
// First coordinate index is the gpr8 or gpr8 + 1 when arrays are used
|
|||
const u64 coord_register = array_register + (is_array ? 1 : 0); |
|||
|
|||
std::vector<Node> coords; |
|||
|
|||
for (size_t i = 0; i < coord_count; ++i) { |
|||
coords.push_back(GetRegister(coord_register + i)); |
|||
} |
|||
std::optional<u32> array_offset; |
|||
if (is_array) { |
|||
array_offset = static_cast<u32>(coords.size()); |
|||
coords.push_back(GetRegister(array_register)); |
|||
} |
|||
|
|||
const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, depth_compare); |
|||
|
|||
Node4 values; |
|||
for (u32 element = 0; element < values.size(); ++element) { |
|||
auto params = coords; |
|||
MetaTexture meta{sampler, element, static_cast<u32>(coords.size()), array_offset}; |
|||
values[element] = |
|||
Operation(OperationCode::F4TextureGather, std::move(meta), std::move(params)); |
|||
} |
|||
|
|||
return values; |
|||
} |
|||
|
|||
Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is_array) { |
|||
const std::size_t type_coord_count = GetCoordCount(texture_type); |
|||
const std::size_t total_coord_count = type_coord_count + (is_array ? 1 : 0); |
|||
const bool lod_enabled = instr.tlds.GetTextureProcessMode() == TextureProcessMode::LL; |
|||
|
|||
// If enabled arrays index is always stored in the gpr8 field
|
|||
const u64 array_register = instr.gpr8.Value(); |
|||
// if is array gpr20 is used
|
|||
const u64 coord_register = is_array ? instr.gpr20.Value() : instr.gpr8.Value(); |
|||
|
|||
const u64 last_coord_register = |
|||
((type_coord_count > 2) || (type_coord_count == 2 && !lod_enabled)) && !is_array |
|||
? static_cast<u64>(instr.gpr20.Value()) |
|||
: coord_register + 1; |
|||
|
|||
std::vector<Node> coords; |
|||
|
|||
for (std::size_t i = 0; i < type_coord_count; ++i) { |
|||
const bool last = (i == (type_coord_count - 1)) && (type_coord_count > 1); |
|||
coords.push_back(GetRegister(last ? last_coord_register : coord_register + i)); |
|||
} |
|||
std::optional<u32> array_offset; |
|||
if (is_array) { |
|||
array_offset = static_cast<u32>(coords.size()); |
|||
coords.push_back(GetRegister(array_register)); |
|||
} |
|||
const auto coords_count = static_cast<u32>(coords.size()); |
|||
|
|||
if (lod_enabled) { |
|||
// When lod is used always is in grp20
|
|||
coords.push_back(GetRegister(instr.gpr20)); |
|||
} else { |
|||
coords.push_back(Immediate(0)); |
|||
} |
|||
|
|||
const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false); |
|||
|
|||
Node4 values; |
|||
for (u32 element = 0; element < values.size(); ++element) { |
|||
auto params = coords; |
|||
MetaTexture meta{sampler, element, coords_count, array_offset}; |
|||
values[element] = |
|||
Operation(OperationCode::F4TexelFetch, std::move(meta), std::move(params)); |
|||
} |
|||
return values; |
|||
} |
|||
|
|||
std::tuple<std::size_t, std::size_t> ShaderIR::ValidateAndGetCoordinateElement( |
|||
TextureType texture_type, bool depth_compare, bool is_array, bool lod_bias_enabled, |
|||
std::size_t max_coords, std::size_t max_inputs) { |
|||
const std::size_t coord_count = GetCoordCount(texture_type); |
|||
|
|||
std::size_t total_coord_count = coord_count + (is_array ? 1 : 0) + (depth_compare ? 1 : 0); |
|||
const std::size_t total_reg_count = total_coord_count + (lod_bias_enabled ? 1 : 0); |
|||
if (total_coord_count > max_coords || total_reg_count > max_inputs) { |
|||
UNIMPLEMENTED_MSG("Unsupported Texture operation"); |
|||
total_coord_count = std::min(total_coord_count, max_coords); |
|||
} |
|||
// 1D.DC OpenGL is using a vec3 but 2nd component is ignored later.
|
|||
total_coord_count += |
|||
(depth_compare && !is_array && texture_type == TextureType::Texture1D) ? 1 : 0; |
|||
|
|||
return {coord_count, total_coord_count}; |
|||
} |
|||
|
|||
} // namespace VideoCommon::Shader
|
|||
@ -0,0 +1,178 @@ |
|||
// Copyright 2018 yuzu Emulator Project
|
|||
// Licensed under GPLv2 or any later version
|
|||
// Refer to the license.txt file included.
|
|||
|
|||
#include "common/assert.h"
|
|||
#include "common/common_types.h"
|
|||
#include "video_core/engines/shader_bytecode.h"
|
|||
#include "video_core/shader/shader_ir.h"
|
|||
|
|||
namespace VideoCommon::Shader { |
|||
|
|||
using Tegra::Shader::ConditionCode; |
|||
using Tegra::Shader::Instruction; |
|||
using Tegra::Shader::OpCode; |
|||
using Tegra::Shader::Register; |
|||
|
|||
u32 ShaderIR::DecodeOther(BasicBlock& bb, const BasicBlock& code, u32 pc) { |
|||
const Instruction instr = {program_code[pc]}; |
|||
const auto opcode = OpCode::Decode(instr); |
|||
|
|||
switch (opcode->get().GetId()) { |
|||
case OpCode::Id::EXIT: { |
|||
const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; |
|||
UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "EXIT condition code used: {}", |
|||
static_cast<u32>(cc)); |
|||
|
|||
switch (instr.flow.cond) { |
|||
case Tegra::Shader::FlowCondition::Always: |
|||
bb.push_back(Operation(OperationCode::Exit)); |
|||
if (instr.pred.pred_index == static_cast<u64>(Tegra::Shader::Pred::UnusedIndex)) { |
|||
// If this is an unconditional exit then just end processing here,
|
|||
// otherwise we have to account for the possibility of the condition
|
|||
// not being met, so continue processing the next instruction.
|
|||
pc = MAX_PROGRAM_LENGTH - 1; |
|||
} |
|||
break; |
|||
|
|||
case Tegra::Shader::FlowCondition::Fcsm_Tr: |
|||
// TODO(bunnei): What is this used for? If we assume this conditon is not
|
|||
// satisifed, dual vertex shaders in Farming Simulator make more sense
|
|||
UNIMPLEMENTED_MSG("Skipping unknown FlowCondition::Fcsm_Tr"); |
|||
break; |
|||
|
|||
default: |
|||
UNIMPLEMENTED_MSG("Unhandled flow condition: {}", |
|||
static_cast<u32>(instr.flow.cond.Value())); |
|||
} |
|||
break; |
|||
} |
|||
case OpCode::Id::KIL: { |
|||
UNIMPLEMENTED_IF(instr.flow.cond != Tegra::Shader::FlowCondition::Always); |
|||
|
|||
const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; |
|||
UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "KIL condition code used: {}", |
|||
static_cast<u32>(cc)); |
|||
|
|||
bb.push_back(Operation(OperationCode::Discard)); |
|||
break; |
|||
} |
|||
case OpCode::Id::MOV_SYS: { |
|||
switch (instr.sys20) { |
|||
case Tegra::Shader::SystemVariable::InvocationInfo: { |
|||
LOG_WARNING(HW_GPU, "MOV_SYS instruction with InvocationInfo is incomplete"); |
|||
SetRegister(bb, instr.gpr0, Immediate(0u)); |
|||
break; |
|||
} |
|||
case Tegra::Shader::SystemVariable::Ydirection: { |
|||
// Config pack's third value is Y_NEGATE's state.
|
|||
SetRegister(bb, instr.gpr0, Operation(OperationCode::YNegate)); |
|||
break; |
|||
} |
|||
default: |
|||
UNIMPLEMENTED_MSG("Unhandled system move: {}", static_cast<u32>(instr.sys20.Value())); |
|||
} |
|||
break; |
|||
} |
|||
case OpCode::Id::BRA: { |
|||
UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, |
|||
"BRA with constant buffers are not implemented"); |
|||
|
|||
const u32 target = pc + instr.bra.GetBranchTarget(); |
|||
const Node branch = Operation(OperationCode::Branch, Immediate(target)); |
|||
|
|||
const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; |
|||
if (cc != Tegra::Shader::ConditionCode::T) { |
|||
bb.push_back(Conditional(GetConditionCode(cc), {branch})); |
|||
} else { |
|||
bb.push_back(branch); |
|||
} |
|||
break; |
|||
} |
|||
case OpCode::Id::SSY: { |
|||
UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, |
|||
"Constant buffer flow is not supported"); |
|||
|
|||
// The SSY opcode tells the GPU where to re-converge divergent execution paths, it sets the
|
|||
// target of the jump that the SYNC instruction will make. The SSY opcode has a similar
|
|||
// structure to the BRA opcode.
|
|||
const u32 target = pc + instr.bra.GetBranchTarget(); |
|||
bb.push_back(Operation(OperationCode::PushFlowStack, Immediate(target))); |
|||
break; |
|||
} |
|||
case OpCode::Id::PBK: { |
|||
UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, |
|||
"Constant buffer PBK is not supported"); |
|||
|
|||
// PBK pushes to a stack the address where BRK will jump to. This shares stack with SSY but
|
|||
// using SYNC on a PBK address will kill the shader execution. We don't emulate this because
|
|||
// it's very unlikely a driver will emit such invalid shader.
|
|||
const u32 target = pc + instr.bra.GetBranchTarget(); |
|||
bb.push_back(Operation(OperationCode::PushFlowStack, Immediate(target))); |
|||
break; |
|||
} |
|||
case OpCode::Id::SYNC: { |
|||
const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; |
|||
UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "SYNC condition code used: {}", |
|||
static_cast<u32>(cc)); |
|||
|
|||
// The SYNC opcode jumps to the address previously set by the SSY opcode
|
|||
bb.push_back(Operation(OperationCode::PopFlowStack)); |
|||
break; |
|||
} |
|||
case OpCode::Id::BRK: { |
|||
const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; |
|||
UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "BRK condition code used: {}", |
|||
static_cast<u32>(cc)); |
|||
|
|||
// The BRK opcode jumps to the address previously set by the PBK opcode
|
|||
bb.push_back(Operation(OperationCode::PopFlowStack)); |
|||
break; |
|||
} |
|||
case OpCode::Id::IPA: { |
|||
const auto& attribute = instr.attribute.fmt28; |
|||
const Tegra::Shader::IpaMode input_mode{instr.ipa.interp_mode.Value(), |
|||
instr.ipa.sample_mode.Value()}; |
|||
|
|||
const Node attr = GetInputAttribute(attribute.index, attribute.element, input_mode); |
|||
const Node value = GetSaturatedFloat(attr, instr.ipa.saturate); |
|||
|
|||
SetRegister(bb, instr.gpr0, value); |
|||
break; |
|||
} |
|||
case OpCode::Id::OUT_R: { |
|||
UNIMPLEMENTED_IF_MSG(instr.gpr20.Value() != Register::ZeroIndex, |
|||
"Stream buffer is not supported"); |
|||
|
|||
if (instr.out.emit) { |
|||
// gpr0 is used to store the next address and gpr8 contains the address to emit.
|
|||
// Hardware uses pointers here but we just ignore it
|
|||
bb.push_back(Operation(OperationCode::EmitVertex)); |
|||
SetRegister(bb, instr.gpr0, Immediate(0)); |
|||
} |
|||
if (instr.out.cut) { |
|||
bb.push_back(Operation(OperationCode::EndPrimitive)); |
|||
} |
|||
break; |
|||
} |
|||
case OpCode::Id::ISBERD: { |
|||
UNIMPLEMENTED_IF(instr.isberd.o != 0); |
|||
UNIMPLEMENTED_IF(instr.isberd.skew != 0); |
|||
UNIMPLEMENTED_IF(instr.isberd.shift != Tegra::Shader::IsberdShift::None); |
|||
UNIMPLEMENTED_IF(instr.isberd.mode != Tegra::Shader::IsberdMode::None); |
|||
LOG_WARNING(HW_GPU, "ISBERD instruction is incomplete"); |
|||
SetRegister(bb, instr.gpr0, GetRegister(instr.gpr8)); |
|||
break; |
|||
} |
|||
case OpCode::Id::DEPBAR: { |
|||
LOG_WARNING(HW_GPU, "DEPBAR instruction is stubbed"); |
|||
break; |
|||
} |
|||
default: |
|||
UNIMPLEMENTED_MSG("Unhandled instruction: {}", opcode->get().GetName()); |
|||
} |
|||
|
|||
return pc; |
|||
} |
|||
|
|||
} // namespace VideoCommon::Shader
|
|||
@ -0,0 +1,67 @@ |
|||
// Copyright 2018 yuzu Emulator Project
|
|||
// Licensed under GPLv2 or any later version
|
|||
// Refer to the license.txt file included.
|
|||
|
|||
#include "common/assert.h"
|
|||
#include "common/common_types.h"
|
|||
#include "video_core/engines/shader_bytecode.h"
|
|||
#include "video_core/shader/shader_ir.h"
|
|||
|
|||
namespace VideoCommon::Shader { |
|||
|
|||
using Tegra::Shader::Instruction; |
|||
using Tegra::Shader::OpCode; |
|||
using Tegra::Shader::Pred; |
|||
|
|||
u32 ShaderIR::DecodePredicateSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc) { |
|||
const Instruction instr = {program_code[pc]}; |
|||
const auto opcode = OpCode::Decode(instr); |
|||
|
|||
switch (opcode->get().GetId()) { |
|||
case OpCode::Id::PSETP: { |
|||
const Node op_a = GetPredicate(instr.psetp.pred12, instr.psetp.neg_pred12 != 0); |
|||
const Node op_b = GetPredicate(instr.psetp.pred29, instr.psetp.neg_pred29 != 0); |
|||
|
|||
// We can't use the constant predicate as destination.
|
|||
ASSERT(instr.psetp.pred3 != static_cast<u64>(Pred::UnusedIndex)); |
|||
|
|||
const Node second_pred = GetPredicate(instr.psetp.pred39, instr.psetp.neg_pred39 != 0); |
|||
|
|||
const OperationCode combiner = GetPredicateCombiner(instr.psetp.op); |
|||
const Node predicate = Operation(combiner, op_a, op_b); |
|||
|
|||
// Set the primary predicate to the result of Predicate OP SecondPredicate
|
|||
SetPredicate(bb, instr.psetp.pred3, Operation(combiner, predicate, second_pred)); |
|||
|
|||
if (instr.psetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) { |
|||
// Set the secondary predicate to the result of !Predicate OP SecondPredicate, if
|
|||
// enabled
|
|||
SetPredicate(bb, instr.psetp.pred0, |
|||
Operation(combiner, Operation(OperationCode::LogicalNegate, predicate), |
|||
second_pred)); |
|||
} |
|||
break; |
|||
} |
|||
case OpCode::Id::CSETP: { |
|||
const Node pred = GetPredicate(instr.csetp.pred39, instr.csetp.neg_pred39 != 0); |
|||
const Node condition_code = GetConditionCode(instr.csetp.cc); |
|||
|
|||
const OperationCode combiner = GetPredicateCombiner(instr.csetp.op); |
|||
|
|||
if (instr.csetp.pred3 != static_cast<u64>(Pred::UnusedIndex)) { |
|||
SetPredicate(bb, instr.csetp.pred3, Operation(combiner, condition_code, pred)); |
|||
} |
|||
if (instr.csetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) { |
|||
const Node neg_cc = Operation(OperationCode::LogicalNegate, condition_code); |
|||
SetPredicate(bb, instr.csetp.pred0, Operation(combiner, neg_cc, pred)); |
|||
} |
|||
break; |
|||
} |
|||
default: |
|||
UNIMPLEMENTED_MSG("Unhandled predicate instruction: {}", opcode->get().GetName()); |
|||
} |
|||
|
|||
return pc; |
|||
} |
|||
|
|||
} // namespace VideoCommon::Shader
|
|||
@ -0,0 +1,46 @@ |
|||
// Copyright 2018 yuzu Emulator Project
|
|||
// Licensed under GPLv2 or any later version
|
|||
// Refer to the license.txt file included.
|
|||
|
|||
#include "common/assert.h"
|
|||
#include "common/common_types.h"
|
|||
#include "video_core/engines/shader_bytecode.h"
|
|||
#include "video_core/shader/shader_ir.h"
|
|||
|
|||
namespace VideoCommon::Shader { |
|||
|
|||
using Tegra::Shader::Instruction; |
|||
using Tegra::Shader::OpCode; |
|||
|
|||
u32 ShaderIR::DecodePredicateSetRegister(BasicBlock& bb, const BasicBlock& code, u32 pc) { |
|||
const Instruction instr = {program_code[pc]}; |
|||
const auto opcode = OpCode::Decode(instr); |
|||
|
|||
UNIMPLEMENTED_IF_MSG(instr.generates_cc, |
|||
"Condition codes generation in PSET is not implemented"); |
|||
|
|||
const Node op_a = GetPredicate(instr.pset.pred12, instr.pset.neg_pred12 != 0); |
|||
const Node op_b = GetPredicate(instr.pset.pred29, instr.pset.neg_pred29 != 0); |
|||
const Node first_pred = Operation(GetPredicateCombiner(instr.pset.cond), op_a, op_b); |
|||
|
|||
const Node second_pred = GetPredicate(instr.pset.pred39, instr.pset.neg_pred39 != 0); |
|||
|
|||
const OperationCode combiner = GetPredicateCombiner(instr.pset.op); |
|||
const Node predicate = Operation(combiner, first_pred, second_pred); |
|||
|
|||
const Node true_value = instr.pset.bf ? Immediate(1.0f) : Immediate(0xffffffff); |
|||
const Node false_value = instr.pset.bf ? Immediate(0.0f) : Immediate(0); |
|||
const Node value = |
|||
Operation(OperationCode::Select, PRECISE, predicate, true_value, false_value); |
|||
|
|||
if (instr.pset.bf) { |
|||
SetInternalFlagsFromFloat(bb, value, instr.generates_cc); |
|||
} else { |
|||
SetInternalFlagsFromInteger(bb, value, instr.generates_cc); |
|||
} |
|||
SetRegister(bb, instr.gpr0, value); |
|||
|
|||
return pc; |
|||
} |
|||
|
|||
} // namespace VideoCommon::Shader
|
|||
@ -0,0 +1,51 @@ |
|||
// Copyright 2018 yuzu Emulator Project
|
|||
// Licensed under GPLv2 or any later version
|
|||
// Refer to the license.txt file included.
|
|||
|
|||
#include "common/assert.h"
|
|||
#include "common/common_types.h"
|
|||
#include "video_core/engines/shader_bytecode.h"
|
|||
#include "video_core/shader/shader_ir.h"
|
|||
|
|||
namespace VideoCommon::Shader { |
|||
|
|||
using Tegra::Shader::Instruction; |
|||
using Tegra::Shader::OpCode; |
|||
|
|||
u32 ShaderIR::DecodeRegisterSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc) { |
|||
const Instruction instr = {program_code[pc]}; |
|||
const auto opcode = OpCode::Decode(instr); |
|||
|
|||
UNIMPLEMENTED_IF(instr.r2p.mode != Tegra::Shader::R2pMode::Pr); |
|||
|
|||
const Node apply_mask = [&]() { |
|||
switch (opcode->get().GetId()) { |
|||
case OpCode::Id::R2P_IMM: |
|||
return Immediate(static_cast<u32>(instr.r2p.immediate_mask)); |
|||
default: |
|||
UNREACHABLE(); |
|||
return Immediate(static_cast<u32>(instr.r2p.immediate_mask)); |
|||
} |
|||
}(); |
|||
const Node mask = GetRegister(instr.gpr8); |
|||
const auto offset = static_cast<u32>(instr.r2p.byte) * 8; |
|||
|
|||
constexpr u32 programmable_preds = 7; |
|||
for (u64 pred = 0; pred < programmable_preds; ++pred) { |
|||
const auto shift = static_cast<u32>(pred); |
|||
|
|||
const Node apply_compare = BitfieldExtract(apply_mask, shift, 1); |
|||
const Node condition = |
|||
Operation(OperationCode::LogicalUNotEqual, apply_compare, Immediate(0)); |
|||
|
|||
const Node value_compare = BitfieldExtract(mask, offset + shift, 1); |
|||
const Node value = Operation(OperationCode::LogicalUNotEqual, value_compare, Immediate(0)); |
|||
|
|||
const Node code = Operation(OperationCode::LogicalAssign, GetPredicate(pred), value); |
|||
bb.push_back(Conditional(condition, {code})); |
|||
} |
|||
|
|||
return pc; |
|||
} |
|||
|
|||
} // namespace VideoCommon::Shader
|
|||
@ -0,0 +1,55 @@ |
|||
// Copyright 2018 yuzu Emulator Project
|
|||
// Licensed under GPLv2 or any later version
|
|||
// Refer to the license.txt file included.
|
|||
|
|||
#include "common/assert.h"
|
|||
#include "common/common_types.h"
|
|||
#include "video_core/engines/shader_bytecode.h"
|
|||
#include "video_core/shader/shader_ir.h"
|
|||
|
|||
namespace VideoCommon::Shader { |
|||
|
|||
using Tegra::Shader::Instruction; |
|||
using Tegra::Shader::OpCode; |
|||
|
|||
u32 ShaderIR::DecodeShift(BasicBlock& bb, const BasicBlock& code, u32 pc) { |
|||
const Instruction instr = {program_code[pc]}; |
|||
const auto opcode = OpCode::Decode(instr); |
|||
|
|||
const Node op_a = GetRegister(instr.gpr8); |
|||
const Node op_b = [&]() { |
|||
if (instr.is_b_imm) { |
|||
return Immediate(instr.alu.GetSignedImm20_20()); |
|||
} else if (instr.is_b_gpr) { |
|||
return GetRegister(instr.gpr20); |
|||
} else { |
|||
return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset); |
|||
} |
|||
}(); |
|||
|
|||
switch (opcode->get().GetId()) { |
|||
case OpCode::Id::SHR_C: |
|||
case OpCode::Id::SHR_R: |
|||
case OpCode::Id::SHR_IMM: { |
|||
const Node value = SignedOperation(OperationCode::IArithmeticShiftRight, |
|||
instr.shift.is_signed, PRECISE, op_a, op_b); |
|||
SetInternalFlagsFromInteger(bb, value, instr.generates_cc); |
|||
SetRegister(bb, instr.gpr0, value); |
|||
break; |
|||
} |
|||
case OpCode::Id::SHL_C: |
|||
case OpCode::Id::SHL_R: |
|||
case OpCode::Id::SHL_IMM: { |
|||
const Node value = Operation(OperationCode::ILogicalShiftLeft, PRECISE, op_a, op_b); |
|||
SetInternalFlagsFromInteger(bb, value, instr.generates_cc); |
|||
SetRegister(bb, instr.gpr0, value); |
|||
break; |
|||
} |
|||
default: |
|||
UNIMPLEMENTED_MSG("Unhandled shift instruction: {}", opcode->get().GetName()); |
|||
} |
|||
|
|||
return pc; |
|||
} |
|||
|
|||
} // namespace VideoCommon::Shader
|
|||
@ -0,0 +1,111 @@ |
|||
// Copyright 2018 yuzu Emulator Project
|
|||
// Licensed under GPLv2 or any later version
|
|||
// Refer to the license.txt file included.
|
|||
|
|||
#include "common/assert.h"
|
|||
#include "common/common_types.h"
|
|||
#include "video_core/engines/shader_bytecode.h"
|
|||
#include "video_core/shader/shader_ir.h"
|
|||
|
|||
namespace VideoCommon::Shader { |
|||
|
|||
using Tegra::Shader::Instruction; |
|||
using Tegra::Shader::OpCode; |
|||
using Tegra::Shader::Pred; |
|||
using Tegra::Shader::VideoType; |
|||
using Tegra::Shader::VmadShr; |
|||
|
|||
u32 ShaderIR::DecodeVideo(BasicBlock& bb, const BasicBlock& code, u32 pc) { |
|||
const Instruction instr = {program_code[pc]}; |
|||
const auto opcode = OpCode::Decode(instr); |
|||
|
|||
const Node op_a = |
|||
GetVideoOperand(GetRegister(instr.gpr8), instr.video.is_byte_chunk_a, instr.video.signed_a, |
|||
instr.video.type_a, instr.video.byte_height_a); |
|||
const Node op_b = [&]() { |
|||
if (instr.video.use_register_b) { |
|||
return GetVideoOperand(GetRegister(instr.gpr20), instr.video.is_byte_chunk_b, |
|||
instr.video.signed_b, instr.video.type_b, |
|||
instr.video.byte_height_b); |
|||
} |
|||
if (instr.video.signed_b) { |
|||
const auto imm = static_cast<s16>(instr.alu.GetImm20_16()); |
|||
return Immediate(static_cast<u32>(imm)); |
|||
} else { |
|||
return Immediate(instr.alu.GetImm20_16()); |
|||
} |
|||
}(); |
|||
|
|||
switch (opcode->get().GetId()) { |
|||
case OpCode::Id::VMAD: { |
|||
const bool result_signed = instr.video.signed_a == 1 || instr.video.signed_b == 1; |
|||
const Node op_c = GetRegister(instr.gpr39); |
|||
|
|||
Node value = SignedOperation(OperationCode::IMul, result_signed, NO_PRECISE, op_a, op_b); |
|||
value = SignedOperation(OperationCode::IAdd, result_signed, NO_PRECISE, value, op_c); |
|||
|
|||
if (instr.vmad.shr == VmadShr::Shr7 || instr.vmad.shr == VmadShr::Shr15) { |
|||
const Node shift = Immediate(instr.vmad.shr == VmadShr::Shr7 ? 7 : 15); |
|||
value = |
|||
SignedOperation(OperationCode::IArithmeticShiftRight, result_signed, value, shift); |
|||
} |
|||
|
|||
SetInternalFlagsFromInteger(bb, value, instr.generates_cc); |
|||
SetRegister(bb, instr.gpr0, value); |
|||
break; |
|||
} |
|||
case OpCode::Id::VSETP: { |
|||
// We can't use the constant predicate as destination.
|
|||
ASSERT(instr.vsetp.pred3 != static_cast<u64>(Pred::UnusedIndex)); |
|||
|
|||
const bool sign = instr.video.signed_a == 1 || instr.video.signed_b == 1; |
|||
const Node first_pred = GetPredicateComparisonInteger(instr.vsetp.cond, sign, op_a, op_b); |
|||
const Node second_pred = GetPredicate(instr.vsetp.pred39, false); |
|||
|
|||
const OperationCode combiner = GetPredicateCombiner(instr.vsetp.op); |
|||
|
|||
// Set the primary predicate to the result of Predicate OP SecondPredicate
|
|||
SetPredicate(bb, instr.vsetp.pred3, Operation(combiner, first_pred, second_pred)); |
|||
|
|||
if (instr.vsetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) { |
|||
// Set the secondary predicate to the result of !Predicate OP SecondPredicate,
|
|||
// if enabled
|
|||
const Node negate_pred = Operation(OperationCode::LogicalNegate, first_pred); |
|||
SetPredicate(bb, instr.vsetp.pred0, Operation(combiner, negate_pred, second_pred)); |
|||
} |
|||
break; |
|||
} |
|||
default: |
|||
UNIMPLEMENTED_MSG("Unhandled video instruction: {}", opcode->get().GetName()); |
|||
} |
|||
|
|||
return pc; |
|||
} |
|||
|
|||
Node ShaderIR::GetVideoOperand(Node op, bool is_chunk, bool is_signed, |
|||
Tegra::Shader::VideoType type, u64 byte_height) { |
|||
if (!is_chunk) { |
|||
return BitfieldExtract(op, static_cast<u32>(byte_height * 8), 8); |
|||
} |
|||
const Node zero = Immediate(0); |
|||
|
|||
switch (type) { |
|||
case Tegra::Shader::VideoType::Size16_Low: |
|||
return BitfieldExtract(op, 0, 16); |
|||
case Tegra::Shader::VideoType::Size16_High: |
|||
return BitfieldExtract(op, 16, 16); |
|||
case Tegra::Shader::VideoType::Size32: |
|||
// TODO(Rodrigo): From my hardware tests it becomes a bit "mad" when this type is used
|
|||
// (1 * 1 + 0 == 0x5b800000). Until a better explanation is found: abort.
|
|||
UNIMPLEMENTED(); |
|||
return zero; |
|||
case Tegra::Shader::VideoType::Invalid: |
|||
UNREACHABLE_MSG("Invalid instruction encoding"); |
|||
return zero; |
|||
default: |
|||
UNREACHABLE(); |
|||
return zero; |
|||
} |
|||
} |
|||
|
|||
} // namespace VideoCommon::Shader
|
|||
@ -0,0 +1,97 @@ |
|||
// Copyright 2018 yuzu Emulator Project
|
|||
// Licensed under GPLv2 or any later version
|
|||
// Refer to the license.txt file included.
|
|||
|
|||
#include "common/assert.h"
|
|||
#include "common/common_types.h"
|
|||
#include "video_core/engines/shader_bytecode.h"
|
|||
#include "video_core/shader/shader_ir.h"
|
|||
|
|||
namespace VideoCommon::Shader { |
|||
|
|||
using Tegra::Shader::Instruction; |
|||
using Tegra::Shader::OpCode; |
|||
|
|||
u32 ShaderIR::DecodeXmad(BasicBlock& bb, const BasicBlock& code, u32 pc) { |
|||
const Instruction instr = {program_code[pc]}; |
|||
const auto opcode = OpCode::Decode(instr); |
|||
|
|||
UNIMPLEMENTED_IF(instr.xmad.sign_a); |
|||
UNIMPLEMENTED_IF(instr.xmad.sign_b); |
|||
UNIMPLEMENTED_IF_MSG(instr.generates_cc, |
|||
"Condition codes generation in XMAD is not implemented"); |
|||
|
|||
Node op_a = GetRegister(instr.gpr8); |
|||
|
|||
// TODO(bunnei): Needs to be fixed once op_a or op_b is signed
|
|||
UNIMPLEMENTED_IF(instr.xmad.sign_a != instr.xmad.sign_b); |
|||
const bool is_signed_a = instr.xmad.sign_a == 1; |
|||
const bool is_signed_b = instr.xmad.sign_b == 1; |
|||
const bool is_signed_c = is_signed_a; |
|||
|
|||
auto [is_merge, op_b, op_c] = [&]() -> std::tuple<bool, Node, Node> { |
|||
switch (opcode->get().GetId()) { |
|||
case OpCode::Id::XMAD_CR: |
|||
return {instr.xmad.merge_56, GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset), |
|||
GetRegister(instr.gpr39)}; |
|||
case OpCode::Id::XMAD_RR: |
|||
return {instr.xmad.merge_37, GetRegister(instr.gpr20), GetRegister(instr.gpr39)}; |
|||
case OpCode::Id::XMAD_RC: |
|||
return {false, GetRegister(instr.gpr39), |
|||
GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset)}; |
|||
case OpCode::Id::XMAD_IMM: |
|||
return {instr.xmad.merge_37, Immediate(static_cast<u32>(instr.xmad.imm20_16)), |
|||
GetRegister(instr.gpr39)}; |
|||
} |
|||
UNIMPLEMENTED_MSG("Unhandled XMAD instruction: {}", opcode->get().GetName()); |
|||
return {false, Immediate(0), Immediate(0)}; |
|||
}(); |
|||
|
|||
op_a = BitfieldExtract(op_a, instr.xmad.high_a ? 16 : 0, 16); |
|||
|
|||
const Node original_b = op_b; |
|||
op_b = BitfieldExtract(op_b, instr.xmad.high_b ? 16 : 0, 16); |
|||
|
|||
// TODO(Rodrigo): Use an appropiate sign for this operation
|
|||
Node product = Operation(OperationCode::IMul, NO_PRECISE, op_a, op_b); |
|||
if (instr.xmad.product_shift_left) { |
|||
product = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, product, Immediate(16)); |
|||
} |
|||
|
|||
const Node original_c = op_c; |
|||
op_c = [&]() { |
|||
switch (instr.xmad.mode) { |
|||
case Tegra::Shader::XmadMode::None: |
|||
return original_c; |
|||
case Tegra::Shader::XmadMode::CLo: |
|||
return BitfieldExtract(original_c, 0, 16); |
|||
case Tegra::Shader::XmadMode::CHi: |
|||
return BitfieldExtract(original_c, 16, 16); |
|||
case Tegra::Shader::XmadMode::CBcc: { |
|||
const Node shifted_b = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed_b, |
|||
NO_PRECISE, original_b, Immediate(16)); |
|||
return SignedOperation(OperationCode::IAdd, is_signed_c, NO_PRECISE, original_c, |
|||
shifted_b); |
|||
} |
|||
default: |
|||
UNIMPLEMENTED_MSG("Unhandled XMAD mode: {}", static_cast<u32>(instr.xmad.mode.Value())); |
|||
return Immediate(0); |
|||
} |
|||
}(); |
|||
|
|||
// TODO(Rodrigo): Use an appropiate sign for this operation
|
|||
Node sum = Operation(OperationCode::IAdd, product, op_c); |
|||
if (is_merge) { |
|||
const Node a = BitfieldExtract(sum, 0, 16); |
|||
const Node b = |
|||
Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, original_b, Immediate(16)); |
|||
sum = Operation(OperationCode::IBitwiseOr, NO_PRECISE, a, b); |
|||
} |
|||
|
|||
SetInternalFlagsFromInteger(bb, sum, instr.generates_cc); |
|||
SetRegister(bb, instr.gpr0, sum); |
|||
|
|||
return pc; |
|||
} |
|||
|
|||
} // namespace VideoCommon::Shader
|
|||
@ -0,0 +1,444 @@ |
|||
// Copyright 2018 yuzu Emulator Project
|
|||
// Licensed under GPLv2 or any later version
|
|||
// Refer to the license.txt file included.
|
|||
|
|||
#include <cmath>
|
|||
#include <unordered_map>
|
|||
|
|||
#include "common/assert.h"
|
|||
#include "common/common_types.h"
|
|||
#include "common/logging/log.h"
|
|||
#include "video_core/engines/shader_bytecode.h"
|
|||
#include "video_core/shader/shader_ir.h"
|
|||
|
|||
namespace VideoCommon::Shader { |
|||
|
|||
using Tegra::Shader::Attribute; |
|||
using Tegra::Shader::Instruction; |
|||
using Tegra::Shader::IpaMode; |
|||
using Tegra::Shader::Pred; |
|||
using Tegra::Shader::PredCondition; |
|||
using Tegra::Shader::PredOperation; |
|||
using Tegra::Shader::Register; |
|||
|
|||
Node ShaderIR::StoreNode(NodeData&& node_data) { |
|||
auto store = std::make_unique<NodeData>(node_data); |
|||
const Node node = store.get(); |
|||
stored_nodes.push_back(std::move(store)); |
|||
return node; |
|||
} |
|||
|
|||
Node ShaderIR::Conditional(Node condition, std::vector<Node>&& code) { |
|||
return StoreNode(ConditionalNode(condition, std::move(code))); |
|||
} |
|||
|
|||
Node ShaderIR::Comment(const std::string& text) { |
|||
return StoreNode(CommentNode(text)); |
|||
} |
|||
|
|||
Node ShaderIR::Immediate(u32 value) { |
|||
return StoreNode(ImmediateNode(value)); |
|||
} |
|||
|
|||
Node ShaderIR::GetRegister(Register reg) { |
|||
if (reg != Register::ZeroIndex) { |
|||
used_registers.insert(static_cast<u32>(reg)); |
|||
} |
|||
return StoreNode(GprNode(reg)); |
|||
} |
|||
|
|||
Node ShaderIR::GetImmediate19(Instruction instr) { |
|||
return Immediate(instr.alu.GetImm20_19()); |
|||
} |
|||
|
|||
Node ShaderIR::GetImmediate32(Instruction instr) { |
|||
return Immediate(instr.alu.GetImm20_32()); |
|||
} |
|||
|
|||
Node ShaderIR::GetConstBuffer(u64 index_, u64 offset_) { |
|||
const auto index = static_cast<u32>(index_); |
|||
const auto offset = static_cast<u32>(offset_); |
|||
|
|||
const auto [entry, is_new] = used_cbufs.try_emplace(index); |
|||
entry->second.MarkAsUsed(offset); |
|||
|
|||
return StoreNode(CbufNode(index, Immediate(offset))); |
|||
} |
|||
|
|||
Node ShaderIR::GetConstBufferIndirect(u64 index_, u64 offset_, Node node) { |
|||
const auto index = static_cast<u32>(index_); |
|||
const auto offset = static_cast<u32>(offset_); |
|||
|
|||
const auto [entry, is_new] = used_cbufs.try_emplace(index); |
|||
entry->second.MarkAsUsedIndirect(); |
|||
|
|||
const Node final_offset = Operation(OperationCode::UAdd, NO_PRECISE, node, Immediate(offset)); |
|||
return StoreNode(CbufNode(index, final_offset)); |
|||
} |
|||
|
|||
Node ShaderIR::GetPredicate(u64 pred_, bool negated) { |
|||
const auto pred = static_cast<Pred>(pred_); |
|||
if (pred != Pred::UnusedIndex && pred != Pred::NeverExecute) { |
|||
used_predicates.insert(pred); |
|||
} |
|||
|
|||
return StoreNode(PredicateNode(pred, negated)); |
|||
} |
|||
|
|||
Node ShaderIR::GetPredicate(bool immediate) { |
|||
return GetPredicate(static_cast<u64>(immediate ? Pred::UnusedIndex : Pred::NeverExecute)); |
|||
} |
|||
|
|||
Node ShaderIR::GetInputAttribute(Attribute::Index index, u64 element, |
|||
const Tegra::Shader::IpaMode& input_mode, Node buffer) { |
|||
const auto [entry, is_new] = |
|||
used_input_attributes.emplace(std::make_pair(index, std::set<Tegra::Shader::IpaMode>{})); |
|||
entry->second.insert(input_mode); |
|||
|
|||
return StoreNode(AbufNode(index, static_cast<u32>(element), input_mode, buffer)); |
|||
} |
|||
|
|||
Node ShaderIR::GetOutputAttribute(Attribute::Index index, u64 element, Node buffer) { |
|||
if (index == Attribute::Index::ClipDistances0123 || |
|||
index == Attribute::Index::ClipDistances4567) { |
|||
const auto clip_index = |
|||
static_cast<u32>((index == Attribute::Index::ClipDistances4567 ? 1 : 0) + element); |
|||
used_clip_distances.at(clip_index) = true; |
|||
} |
|||
used_output_attributes.insert(index); |
|||
|
|||
return StoreNode(AbufNode(index, static_cast<u32>(element), buffer)); |
|||
} |
|||
|
|||
Node ShaderIR::GetInternalFlag(InternalFlag flag, bool negated) { |
|||
const Node node = StoreNode(InternalFlagNode(flag)); |
|||
if (negated) { |
|||
return Operation(OperationCode::LogicalNegate, node); |
|||
} |
|||
return node; |
|||
} |
|||
|
|||
Node ShaderIR::GetLocalMemory(Node address) { |
|||
return StoreNode(LmemNode(address)); |
|||
} |
|||
|
|||
Node ShaderIR::GetTemporal(u32 id) { |
|||
return GetRegister(Register::ZeroIndex + 1 + id); |
|||
} |
|||
|
|||
Node ShaderIR::GetOperandAbsNegFloat(Node value, bool absolute, bool negate) { |
|||
if (absolute) { |
|||
value = Operation(OperationCode::FAbsolute, NO_PRECISE, value); |
|||
} |
|||
if (negate) { |
|||
value = Operation(OperationCode::FNegate, NO_PRECISE, value); |
|||
} |
|||
return value; |
|||
} |
|||
|
|||
Node ShaderIR::GetSaturatedFloat(Node value, bool saturate) { |
|||
if (!saturate) { |
|||
return value; |
|||
} |
|||
const Node positive_zero = Immediate(std::copysignf(0, 1)); |
|||
const Node positive_one = Immediate(1.0f); |
|||
return Operation(OperationCode::FClamp, NO_PRECISE, value, positive_zero, positive_one); |
|||
} |
|||
|
|||
Node ShaderIR::ConvertIntegerSize(Node value, Tegra::Shader::Register::Size size, bool is_signed) { |
|||
switch (size) { |
|||
case Register::Size::Byte: |
|||
value = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, NO_PRECISE, value, |
|||
Immediate(24)); |
|||
value = SignedOperation(OperationCode::IArithmeticShiftRight, is_signed, NO_PRECISE, value, |
|||
Immediate(24)); |
|||
return value; |
|||
case Register::Size::Short: |
|||
value = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, NO_PRECISE, value, |
|||
Immediate(16)); |
|||
value = SignedOperation(OperationCode::IArithmeticShiftRight, is_signed, NO_PRECISE, value, |
|||
Immediate(16)); |
|||
case Register::Size::Word: |
|||
// Default - do nothing
|
|||
return value; |
|||
default: |
|||
UNREACHABLE_MSG("Unimplemented conversion size: {}", static_cast<u32>(size)); |
|||
return value; |
|||
} |
|||
} |
|||
|
|||
Node ShaderIR::GetOperandAbsNegInteger(Node value, bool absolute, bool negate, bool is_signed) { |
|||
if (!is_signed) { |
|||
// Absolute or negate on an unsigned is pointless
|
|||
return value; |
|||
} |
|||
if (absolute) { |
|||
value = Operation(OperationCode::IAbsolute, NO_PRECISE, value); |
|||
} |
|||
if (negate) { |
|||
value = Operation(OperationCode::INegate, NO_PRECISE, value); |
|||
} |
|||
return value; |
|||
} |
|||
|
|||
Node ShaderIR::UnpackHalfImmediate(Instruction instr, bool has_negation) { |
|||
const Node value = Immediate(instr.half_imm.PackImmediates()); |
|||
if (!has_negation) { |
|||
return value; |
|||
} |
|||
const Node first_negate = GetPredicate(instr.half_imm.first_negate != 0); |
|||
const Node second_negate = GetPredicate(instr.half_imm.second_negate != 0); |
|||
|
|||
return Operation(OperationCode::HNegate, HALF_NO_PRECISE, value, first_negate, second_negate); |
|||
} |
|||
|
|||
Node ShaderIR::HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge) { |
|||
switch (merge) { |
|||
case Tegra::Shader::HalfMerge::H0_H1: |
|||
return src; |
|||
case Tegra::Shader::HalfMerge::F32: |
|||
return Operation(OperationCode::HMergeF32, src); |
|||
case Tegra::Shader::HalfMerge::Mrg_H0: |
|||
return Operation(OperationCode::HMergeH0, dest, src); |
|||
case Tegra::Shader::HalfMerge::Mrg_H1: |
|||
return Operation(OperationCode::HMergeH1, dest, src); |
|||
} |
|||
UNREACHABLE(); |
|||
return src; |
|||
} |
|||
|
|||
Node ShaderIR::GetOperandAbsNegHalf(Node value, bool absolute, bool negate) { |
|||
if (absolute) { |
|||
value = Operation(OperationCode::HAbsolute, HALF_NO_PRECISE, value); |
|||
} |
|||
if (negate) { |
|||
value = Operation(OperationCode::HNegate, HALF_NO_PRECISE, value, GetPredicate(true), |
|||
GetPredicate(true)); |
|||
} |
|||
return value; |
|||
} |
|||
|
|||
Node ShaderIR::GetPredicateComparisonFloat(PredCondition condition, Node op_a, Node op_b) { |
|||
static const std::unordered_map<PredCondition, OperationCode> PredicateComparisonTable = { |
|||
{PredCondition::LessThan, OperationCode::LogicalFLessThan}, |
|||
{PredCondition::Equal, OperationCode::LogicalFEqual}, |
|||
{PredCondition::LessEqual, OperationCode::LogicalFLessEqual}, |
|||
{PredCondition::GreaterThan, OperationCode::LogicalFGreaterThan}, |
|||
{PredCondition::NotEqual, OperationCode::LogicalFNotEqual}, |
|||
{PredCondition::GreaterEqual, OperationCode::LogicalFGreaterEqual}, |
|||
{PredCondition::LessThanWithNan, OperationCode::LogicalFLessThan}, |
|||
{PredCondition::NotEqualWithNan, OperationCode::LogicalFNotEqual}, |
|||
{PredCondition::LessEqualWithNan, OperationCode::LogicalFLessEqual}, |
|||
{PredCondition::GreaterThanWithNan, OperationCode::LogicalFGreaterThan}, |
|||
{PredCondition::GreaterEqualWithNan, OperationCode::LogicalFGreaterEqual}}; |
|||
|
|||
const auto comparison{PredicateComparisonTable.find(condition)}; |
|||
UNIMPLEMENTED_IF_MSG(comparison == PredicateComparisonTable.end(), |
|||
"Unknown predicate comparison operation"); |
|||
|
|||
Node predicate = Operation(comparison->second, NO_PRECISE, op_a, op_b); |
|||
|
|||
if (condition == PredCondition::LessThanWithNan || |
|||
condition == PredCondition::NotEqualWithNan || |
|||
condition == PredCondition::LessEqualWithNan || |
|||
condition == PredCondition::GreaterThanWithNan || |
|||
condition == PredCondition::GreaterEqualWithNan) { |
|||
|
|||
predicate = Operation(OperationCode::LogicalOr, predicate, |
|||
Operation(OperationCode::LogicalFIsNan, op_a)); |
|||
predicate = Operation(OperationCode::LogicalOr, predicate, |
|||
Operation(OperationCode::LogicalFIsNan, op_b)); |
|||
} |
|||
|
|||
return predicate; |
|||
} |
|||
|
|||
Node ShaderIR::GetPredicateComparisonInteger(PredCondition condition, bool is_signed, Node op_a, |
|||
Node op_b) { |
|||
static const std::unordered_map<PredCondition, OperationCode> PredicateComparisonTable = { |
|||
{PredCondition::LessThan, OperationCode::LogicalILessThan}, |
|||
{PredCondition::Equal, OperationCode::LogicalIEqual}, |
|||
{PredCondition::LessEqual, OperationCode::LogicalILessEqual}, |
|||
{PredCondition::GreaterThan, OperationCode::LogicalIGreaterThan}, |
|||
{PredCondition::NotEqual, OperationCode::LogicalINotEqual}, |
|||
{PredCondition::GreaterEqual, OperationCode::LogicalIGreaterEqual}, |
|||
{PredCondition::LessThanWithNan, OperationCode::LogicalILessThan}, |
|||
{PredCondition::NotEqualWithNan, OperationCode::LogicalINotEqual}, |
|||
{PredCondition::LessEqualWithNan, OperationCode::LogicalILessEqual}, |
|||
{PredCondition::GreaterThanWithNan, OperationCode::LogicalIGreaterThan}, |
|||
{PredCondition::GreaterEqualWithNan, OperationCode::LogicalIGreaterEqual}}; |
|||
|
|||
const auto comparison{PredicateComparisonTable.find(condition)}; |
|||
UNIMPLEMENTED_IF_MSG(comparison == PredicateComparisonTable.end(), |
|||
"Unknown predicate comparison operation"); |
|||
|
|||
Node predicate = SignedOperation(comparison->second, is_signed, NO_PRECISE, op_a, op_b); |
|||
|
|||
UNIMPLEMENTED_IF_MSG(condition == PredCondition::LessThanWithNan || |
|||
condition == PredCondition::NotEqualWithNan || |
|||
condition == PredCondition::LessEqualWithNan || |
|||
condition == PredCondition::GreaterThanWithNan || |
|||
condition == PredCondition::GreaterEqualWithNan, |
|||
"NaN comparisons for integers are not implemented"); |
|||
return predicate; |
|||
} |
|||
|
|||
Node ShaderIR::GetPredicateComparisonHalf(Tegra::Shader::PredCondition condition, |
|||
const MetaHalfArithmetic& meta, Node op_a, Node op_b) { |
|||
|
|||
UNIMPLEMENTED_IF_MSG(condition == PredCondition::LessThanWithNan || |
|||
condition == PredCondition::NotEqualWithNan || |
|||
condition == PredCondition::LessEqualWithNan || |
|||
condition == PredCondition::GreaterThanWithNan || |
|||
condition == PredCondition::GreaterEqualWithNan, |
|||
"Unimplemented NaN comparison for half floats"); |
|||
|
|||
static const std::unordered_map<PredCondition, OperationCode> PredicateComparisonTable = { |
|||
{PredCondition::LessThan, OperationCode::Logical2HLessThan}, |
|||
{PredCondition::Equal, OperationCode::Logical2HEqual}, |
|||
{PredCondition::LessEqual, OperationCode::Logical2HLessEqual}, |
|||
{PredCondition::GreaterThan, OperationCode::Logical2HGreaterThan}, |
|||
{PredCondition::NotEqual, OperationCode::Logical2HNotEqual}, |
|||
{PredCondition::GreaterEqual, OperationCode::Logical2HGreaterEqual}, |
|||
{PredCondition::LessThanWithNan, OperationCode::Logical2HLessThan}, |
|||
{PredCondition::NotEqualWithNan, OperationCode::Logical2HNotEqual}, |
|||
{PredCondition::LessEqualWithNan, OperationCode::Logical2HLessEqual}, |
|||
{PredCondition::GreaterThanWithNan, OperationCode::Logical2HGreaterThan}, |
|||
{PredCondition::GreaterEqualWithNan, OperationCode::Logical2HGreaterEqual}}; |
|||
|
|||
const auto comparison{PredicateComparisonTable.find(condition)}; |
|||
UNIMPLEMENTED_IF_MSG(comparison == PredicateComparisonTable.end(), |
|||
"Unknown predicate comparison operation"); |
|||
|
|||
const Node predicate = Operation(comparison->second, meta, op_a, op_b); |
|||
|
|||
return predicate; |
|||
} |
|||
|
|||
OperationCode ShaderIR::GetPredicateCombiner(PredOperation operation) { |
|||
static const std::unordered_map<PredOperation, OperationCode> PredicateOperationTable = { |
|||
{PredOperation::And, OperationCode::LogicalAnd}, |
|||
{PredOperation::Or, OperationCode::LogicalOr}, |
|||
{PredOperation::Xor, OperationCode::LogicalXor}, |
|||
}; |
|||
|
|||
const auto op = PredicateOperationTable.find(operation); |
|||
UNIMPLEMENTED_IF_MSG(op == PredicateOperationTable.end(), "Unknown predicate operation"); |
|||
return op->second; |
|||
} |
|||
|
|||
Node ShaderIR::GetConditionCode(Tegra::Shader::ConditionCode cc) { |
|||
switch (cc) { |
|||
case Tegra::Shader::ConditionCode::NEU: |
|||
return GetInternalFlag(InternalFlag::Zero, true); |
|||
default: |
|||
UNIMPLEMENTED_MSG("Unimplemented condition code: {}", static_cast<u32>(cc)); |
|||
return GetPredicate(static_cast<u64>(Pred::NeverExecute)); |
|||
} |
|||
} |
|||
|
|||
void ShaderIR::SetRegister(BasicBlock& bb, Register dest, Node src) { |
|||
bb.push_back(Operation(OperationCode::Assign, GetRegister(dest), src)); |
|||
} |
|||
|
|||
void ShaderIR::SetPredicate(BasicBlock& bb, u64 dest, Node src) { |
|||
bb.push_back(Operation(OperationCode::LogicalAssign, GetPredicate(dest), src)); |
|||
} |
|||
|
|||
void ShaderIR::SetInternalFlag(BasicBlock& bb, InternalFlag flag, Node value) { |
|||
bb.push_back(Operation(OperationCode::LogicalAssign, GetInternalFlag(flag), value)); |
|||
} |
|||
|
|||
void ShaderIR::SetLocalMemory(BasicBlock& bb, Node address, Node value) { |
|||
bb.push_back(Operation(OperationCode::Assign, GetLocalMemory(address), value)); |
|||
} |
|||
|
|||
void ShaderIR::SetTemporal(BasicBlock& bb, u32 id, Node value) { |
|||
SetRegister(bb, Register::ZeroIndex + 1 + id, value); |
|||
} |
|||
|
|||
void ShaderIR::SetInternalFlagsFromFloat(BasicBlock& bb, Node value, bool sets_cc) { |
|||
if (!sets_cc) { |
|||
return; |
|||
} |
|||
const Node zerop = Operation(OperationCode::LogicalFEqual, value, Immediate(0.0f)); |
|||
SetInternalFlag(bb, InternalFlag::Zero, zerop); |
|||
LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete"); |
|||
} |
|||
|
|||
void ShaderIR::SetInternalFlagsFromInteger(BasicBlock& bb, Node value, bool sets_cc) { |
|||
if (!sets_cc) { |
|||
return; |
|||
} |
|||
const Node zerop = Operation(OperationCode::LogicalIEqual, value, Immediate(0)); |
|||
SetInternalFlag(bb, InternalFlag::Zero, zerop); |
|||
LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete"); |
|||
} |
|||
|
|||
Node ShaderIR::BitfieldExtract(Node value, u32 offset, u32 bits) { |
|||
return Operation(OperationCode::UBitfieldExtract, NO_PRECISE, value, Immediate(offset), |
|||
Immediate(bits)); |
|||
} |
|||
|
|||
/*static*/ OperationCode ShaderIR::SignedToUnsignedCode(OperationCode operation_code, |
|||
bool is_signed) { |
|||
if (is_signed) { |
|||
return operation_code; |
|||
} |
|||
switch (operation_code) { |
|||
case OperationCode::FCastInteger: |
|||
return OperationCode::FCastUInteger; |
|||
case OperationCode::IAdd: |
|||
return OperationCode::UAdd; |
|||
case OperationCode::IMul: |
|||
return OperationCode::UMul; |
|||
case OperationCode::IDiv: |
|||
return OperationCode::UDiv; |
|||
case OperationCode::IMin: |
|||
return OperationCode::UMin; |
|||
case OperationCode::IMax: |
|||
return OperationCode::UMax; |
|||
case OperationCode::ICastFloat: |
|||
return OperationCode::UCastFloat; |
|||
case OperationCode::ICastUnsigned: |
|||
return OperationCode::UCastSigned; |
|||
case OperationCode::ILogicalShiftLeft: |
|||
return OperationCode::ULogicalShiftLeft; |
|||
case OperationCode::ILogicalShiftRight: |
|||
return OperationCode::ULogicalShiftRight; |
|||
case OperationCode::IArithmeticShiftRight: |
|||
return OperationCode::UArithmeticShiftRight; |
|||
case OperationCode::IBitwiseAnd: |
|||
return OperationCode::UBitwiseAnd; |
|||
case OperationCode::IBitwiseOr: |
|||
return OperationCode::UBitwiseOr; |
|||
case OperationCode::IBitwiseXor: |
|||
return OperationCode::UBitwiseXor; |
|||
case OperationCode::IBitwiseNot: |
|||
return OperationCode::UBitwiseNot; |
|||
case OperationCode::IBitfieldInsert: |
|||
return OperationCode::UBitfieldInsert; |
|||
case OperationCode::IBitCount: |
|||
return OperationCode::UBitCount; |
|||
case OperationCode::LogicalILessThan: |
|||
return OperationCode::LogicalULessThan; |
|||
case OperationCode::LogicalIEqual: |
|||
return OperationCode::LogicalUEqual; |
|||
case OperationCode::LogicalILessEqual: |
|||
return OperationCode::LogicalULessEqual; |
|||
case OperationCode::LogicalIGreaterThan: |
|||
return OperationCode::LogicalUGreaterThan; |
|||
case OperationCode::LogicalINotEqual: |
|||
return OperationCode::LogicalUNotEqual; |
|||
case OperationCode::LogicalIGreaterEqual: |
|||
return OperationCode::LogicalUGreaterEqual; |
|||
case OperationCode::INegate: |
|||
UNREACHABLE_MSG("Can't negate an unsigned integer"); |
|||
case OperationCode::IAbsolute: |
|||
UNREACHABLE_MSG("Can't apply absolute to an unsigned integer"); |
|||
} |
|||
UNREACHABLE_MSG("Unknown signed operation with code={}", static_cast<u32>(operation_code)); |
|||
return {}; |
|||
} |
|||
|
|||
} // namespace VideoCommon::Shader
|
|||
@ -0,0 +1,793 @@ |
|||
// Copyright 2018 yuzu Emulator Project |
|||
// Licensed under GPLv2 or any later version |
|||
// Refer to the license.txt file included. |
|||
|
|||
#pragma once |
|||
|
|||
#include <array> |
|||
#include <cstring> |
|||
#include <map> |
|||
#include <set> |
|||
#include <string> |
|||
#include <tuple> |
|||
#include <variant> |
|||
#include <vector> |
|||
|
|||
#include "common/common_types.h" |
|||
#include "video_core/engines/maxwell_3d.h" |
|||
#include "video_core/engines/shader_bytecode.h" |
|||
#include "video_core/engines/shader_header.h" |
|||
|
|||
namespace VideoCommon::Shader { |
|||
|
|||
class OperationNode; |
|||
class ConditionalNode; |
|||
class GprNode; |
|||
class ImmediateNode; |
|||
class InternalFlagNode; |
|||
class PredicateNode; |
|||
class AbufNode; ///< Attribute buffer |
|||
class CbufNode; ///< Constant buffer |
|||
class LmemNode; ///< Local memory |
|||
class GmemNode; ///< Global memory |
|||
class CommentNode; |
|||
|
|||
using ProgramCode = std::vector<u64>; |
|||
|
|||
using NodeData = |
|||
std::variant<OperationNode, ConditionalNode, GprNode, ImmediateNode, InternalFlagNode, |
|||
PredicateNode, AbufNode, CbufNode, LmemNode, GmemNode, CommentNode>; |
|||
using Node = const NodeData*; |
|||
using Node4 = std::array<Node, 4>; |
|||
using BasicBlock = std::vector<Node>; |
|||
|
|||
constexpr u32 MAX_PROGRAM_LENGTH = 0x1000; |
|||
|
|||
enum class OperationCode { |
|||
Assign, /// (float& dest, float src) -> void |
|||
|
|||
Select, /// (MetaArithmetic, bool pred, float a, float b) -> float |
|||
|
|||
FAdd, /// (MetaArithmetic, float a, float b) -> float |
|||
FMul, /// (MetaArithmetic, float a, float b) -> float |
|||
FDiv, /// (MetaArithmetic, float a, float b) -> float |
|||
FFma, /// (MetaArithmetic, float a, float b, float c) -> float |
|||
FNegate, /// (MetaArithmetic, float a) -> float |
|||
FAbsolute, /// (MetaArithmetic, float a) -> float |
|||
FClamp, /// (MetaArithmetic, float value, float min, float max) -> float |
|||
FMin, /// (MetaArithmetic, float a, float b) -> float |
|||
FMax, /// (MetaArithmetic, float a, float b) -> float |
|||
FCos, /// (MetaArithmetic, float a) -> float |
|||
FSin, /// (MetaArithmetic, float a) -> float |
|||
FExp2, /// (MetaArithmetic, float a) -> float |
|||
FLog2, /// (MetaArithmetic, float a) -> float |
|||
FInverseSqrt, /// (MetaArithmetic, float a) -> float |
|||
FSqrt, /// (MetaArithmetic, float a) -> float |
|||
FRoundEven, /// (MetaArithmetic, float a) -> float |
|||
FFloor, /// (MetaArithmetic, float a) -> float |
|||
FCeil, /// (MetaArithmetic, float a) -> float |
|||
FTrunc, /// (MetaArithmetic, float a) -> float |
|||
FCastInteger, /// (MetaArithmetic, int a) -> float |
|||
FCastUInteger, /// (MetaArithmetic, uint a) -> float |
|||
|
|||
IAdd, /// (MetaArithmetic, int a, int b) -> int |
|||
IMul, /// (MetaArithmetic, int a, int b) -> int |
|||
IDiv, /// (MetaArithmetic, int a, int b) -> int |
|||
INegate, /// (MetaArithmetic, int a) -> int |
|||
IAbsolute, /// (MetaArithmetic, int a) -> int |
|||
IMin, /// (MetaArithmetic, int a, int b) -> int |
|||
IMax, /// (MetaArithmetic, int a, int b) -> int |
|||
ICastFloat, /// (MetaArithmetic, float a) -> int |
|||
ICastUnsigned, /// (MetaArithmetic, uint a) -> int |
|||
ILogicalShiftLeft, /// (MetaArithmetic, int a, uint b) -> int |
|||
ILogicalShiftRight, /// (MetaArithmetic, int a, uint b) -> int |
|||
IArithmeticShiftRight, /// (MetaArithmetic, int a, uint b) -> int |
|||
IBitwiseAnd, /// (MetaArithmetic, int a, int b) -> int |
|||
IBitwiseOr, /// (MetaArithmetic, int a, int b) -> int |
|||
IBitwiseXor, /// (MetaArithmetic, int a, int b) -> int |
|||
IBitwiseNot, /// (MetaArithmetic, int a) -> int |
|||
IBitfieldInsert, /// (MetaArithmetic, int base, int insert, int offset, int bits) -> int |
|||
IBitfieldExtract, /// (MetaArithmetic, int value, int offset, int offset) -> int |
|||
IBitCount, /// (MetaArithmetic, int) -> int |
|||
|
|||
UAdd, /// (MetaArithmetic, uint a, uint b) -> uint |
|||
UMul, /// (MetaArithmetic, uint a, uint b) -> uint |
|||
UDiv, /// (MetaArithmetic, uint a, uint b) -> uint |
|||
UMin, /// (MetaArithmetic, uint a, uint b) -> uint |
|||
UMax, /// (MetaArithmetic, uint a, uint b) -> uint |
|||
UCastFloat, /// (MetaArithmetic, float a) -> uint |
|||
UCastSigned, /// (MetaArithmetic, int a) -> uint |
|||
ULogicalShiftLeft, /// (MetaArithmetic, uint a, uint b) -> uint |
|||
ULogicalShiftRight, /// (MetaArithmetic, uint a, uint b) -> uint |
|||
UArithmeticShiftRight, /// (MetaArithmetic, uint a, uint b) -> uint |
|||
UBitwiseAnd, /// (MetaArithmetic, uint a, uint b) -> uint |
|||
UBitwiseOr, /// (MetaArithmetic, uint a, uint b) -> uint |
|||
UBitwiseXor, /// (MetaArithmetic, uint a, uint b) -> uint |
|||
UBitwiseNot, /// (MetaArithmetic, uint a) -> uint |
|||
UBitfieldInsert, /// (MetaArithmetic, uint base, uint insert, int offset, int bits) -> uint |
|||
UBitfieldExtract, /// (MetaArithmetic, uint value, int offset, int offset) -> uint |
|||
UBitCount, /// (MetaArithmetic, uint) -> uint |
|||
|
|||
HAdd, /// (MetaHalfArithmetic, f16vec2 a, f16vec2 b) -> f16vec2 |
|||
HMul, /// (MetaHalfArithmetic, f16vec2 a, f16vec2 b) -> f16vec2 |
|||
HFma, /// (MetaHalfArithmetic, f16vec2 a, f16vec2 b, f16vec2 c) -> f16vec2 |
|||
HAbsolute, /// (f16vec2 a) -> f16vec2 |
|||
HNegate, /// (f16vec2 a, bool first, bool second) -> f16vec2 |
|||
HMergeF32, /// (f16vec2 src) -> float |
|||
HMergeH0, /// (f16vec2 dest, f16vec2 src) -> f16vec2 |
|||
HMergeH1, /// (f16vec2 dest, f16vec2 src) -> f16vec2 |
|||
HPack2, /// (float a, float b) -> f16vec2 |
|||
|
|||
LogicalAssign, /// (bool& dst, bool src) -> void |
|||
LogicalAnd, /// (bool a, bool b) -> bool |
|||
LogicalOr, /// (bool a, bool b) -> bool |
|||
LogicalXor, /// (bool a, bool b) -> bool |
|||
LogicalNegate, /// (bool a) -> bool |
|||
LogicalPick2, /// (bool2 pair, uint index) -> bool |
|||
LogicalAll2, /// (bool2 a) -> bool |
|||
LogicalAny2, /// (bool2 a) -> bool |
|||
|
|||
LogicalFLessThan, /// (float a, float b) -> bool |
|||
LogicalFEqual, /// (float a, float b) -> bool |
|||
LogicalFLessEqual, /// (float a, float b) -> bool |
|||
LogicalFGreaterThan, /// (float a, float b) -> bool |
|||
LogicalFNotEqual, /// (float a, float b) -> bool |
|||
LogicalFGreaterEqual, /// (float a, float b) -> bool |
|||
LogicalFIsNan, /// (float a) -> bool |
|||
|
|||
LogicalILessThan, /// (int a, int b) -> bool |
|||
LogicalIEqual, /// (int a, int b) -> bool |
|||
LogicalILessEqual, /// (int a, int b) -> bool |
|||
LogicalIGreaterThan, /// (int a, int b) -> bool |
|||
LogicalINotEqual, /// (int a, int b) -> bool |
|||
LogicalIGreaterEqual, /// (int a, int b) -> bool |
|||
|
|||
LogicalULessThan, /// (uint a, uint b) -> bool |
|||
LogicalUEqual, /// (uint a, uint b) -> bool |
|||
LogicalULessEqual, /// (uint a, uint b) -> bool |
|||
LogicalUGreaterThan, /// (uint a, uint b) -> bool |
|||
LogicalUNotEqual, /// (uint a, uint b) -> bool |
|||
LogicalUGreaterEqual, /// (uint a, uint b) -> bool |
|||
|
|||
Logical2HLessThan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 |
|||
Logical2HEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 |
|||
Logical2HLessEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 |
|||
Logical2HGreaterThan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 |
|||
Logical2HNotEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 |
|||
Logical2HGreaterEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 |
|||
|
|||
F4Texture, /// (MetaTexture, float[N] coords, float[M] params) -> float4 |
|||
F4TextureLod, /// (MetaTexture, float[N] coords, float[M] params) -> float4 |
|||
F4TextureGather, /// (MetaTexture, float[N] coords, float[M] params) -> float4 |
|||
F4TextureQueryDimensions, /// (MetaTexture, float a) -> float4 |
|||
F4TextureQueryLod, /// (MetaTexture, float[N] coords) -> float4 |
|||
F4TexelFetch, /// (MetaTexture, int[N], int) -> float4 |
|||
|
|||
Branch, /// (uint branch_target) -> void |
|||
PushFlowStack, /// (uint branch_target) -> void |
|||
PopFlowStack, /// () -> void |
|||
Exit, /// () -> void |
|||
Discard, /// () -> void |
|||
|
|||
EmitVertex, /// () -> void |
|||
EndPrimitive, /// () -> void |
|||
|
|||
YNegate, /// () -> float |
|||
|
|||
Amount, |
|||
}; |
|||
|
|||
enum class InternalFlag { |
|||
Zero = 0, |
|||
Sign = 1, |
|||
Carry = 2, |
|||
Overflow = 3, |
|||
Amount = 4, |
|||
}; |
|||
|
|||
/// Describes the behaviour of code path of a given entry point and a return point. |
|||
enum class ExitMethod { |
|||
Undetermined, ///< Internal value. Only occur when analyzing JMP loop. |
|||
AlwaysReturn, ///< All code paths reach the return point. |
|||
Conditional, ///< Code path reaches the return point or an END instruction conditionally. |
|||
AlwaysEnd, ///< All code paths reach a END instruction. |
|||
}; |
|||
|
|||
class Sampler { |
|||
public: |
|||
explicit Sampler(std::size_t offset, std::size_t index, Tegra::Shader::TextureType type, |
|||
bool is_array, bool is_shadow) |
|||
: offset{offset}, index{index}, type{type}, is_array{is_array}, is_shadow{is_shadow} {} |
|||
|
|||
std::size_t GetOffset() const { |
|||
return offset; |
|||
} |
|||
|
|||
std::size_t GetIndex() const { |
|||
return index; |
|||
} |
|||
|
|||
Tegra::Shader::TextureType GetType() const { |
|||
return type; |
|||
} |
|||
|
|||
bool IsArray() const { |
|||
return is_array; |
|||
} |
|||
|
|||
bool IsShadow() const { |
|||
return is_shadow; |
|||
} |
|||
|
|||
bool operator<(const Sampler& rhs) const { |
|||
return std::tie(offset, index, type, is_array, is_shadow) < |
|||
std::tie(rhs.offset, rhs.index, rhs.type, rhs.is_array, rhs.is_shadow); |
|||
} |
|||
|
|||
private: |
|||
/// Offset in TSC memory from which to read the sampler object, as specified by the sampling |
|||
/// instruction. |
|||
std::size_t offset{}; |
|||
std::size_t index{}; ///< Value used to index into the generated GLSL sampler array. |
|||
Tegra::Shader::TextureType type{}; ///< The type used to sample this texture (Texture2D, etc) |
|||
bool is_array{}; ///< Whether the texture is being sampled as an array texture or not. |
|||
bool is_shadow{}; ///< Whether the texture is being sampled as a depth texture or not. |
|||
}; |
|||
|
|||
class ConstBuffer { |
|||
public: |
|||
void MarkAsUsed(u64 offset) { |
|||
max_offset = std::max(max_offset, static_cast<u32>(offset)); |
|||
} |
|||
|
|||
void MarkAsUsedIndirect() { |
|||
is_indirect = true; |
|||
} |
|||
|
|||
bool IsIndirect() const { |
|||
return is_indirect; |
|||
} |
|||
|
|||
u32 GetSize() const { |
|||
return max_offset + 1; |
|||
} |
|||
|
|||
private: |
|||
u32 max_offset{}; |
|||
bool is_indirect{}; |
|||
}; |
|||
|
|||
struct MetaArithmetic { |
|||
bool precise{}; |
|||
}; |
|||
|
|||
struct MetaHalfArithmetic { |
|||
bool precise{}; |
|||
std::array<Tegra::Shader::HalfType, 3> types = {Tegra::Shader::HalfType::H0_H1, |
|||
Tegra::Shader::HalfType::H0_H1, |
|||
Tegra::Shader::HalfType::H0_H1}; |
|||
}; |
|||
|
|||
struct MetaTexture { |
|||
const Sampler& sampler; |
|||
u32 element{}; |
|||
u32 coords_count{}; |
|||
std::optional<u32> array_index; |
|||
}; |
|||
|
|||
constexpr MetaArithmetic PRECISE = {true}; |
|||
constexpr MetaArithmetic NO_PRECISE = {false}; |
|||
constexpr MetaHalfArithmetic HALF_NO_PRECISE = {false}; |
|||
|
|||
using Meta = std::variant<MetaArithmetic, MetaHalfArithmetic, MetaTexture>; |
|||
|
|||
/// Holds any kind of operation that can be done in the IR |
|||
class OperationNode final { |
|||
public: |
|||
template <typename... T> |
|||
explicit constexpr OperationNode(OperationCode code) : code{code}, meta{} {} |
|||
|
|||
template <typename... T> |
|||
explicit constexpr OperationNode(OperationCode code, Meta&& meta) |
|||
: code{code}, meta{std::move(meta)} {} |
|||
|
|||
template <typename... T> |
|||
explicit constexpr OperationNode(OperationCode code, const T*... operands) |
|||
: OperationNode(code, {}, operands...) {} |
|||
|
|||
template <typename... T> |
|||
explicit constexpr OperationNode(OperationCode code, Meta&& meta, const T*... operands_) |
|||
: code{code}, meta{std::move(meta)} { |
|||
|
|||
auto operands_list = {operands_...}; |
|||
for (auto& operand : operands_list) { |
|||
operands.push_back(operand); |
|||
} |
|||
} |
|||
|
|||
explicit OperationNode(OperationCode code, Meta&& meta, std::vector<Node>&& operands) |
|||
: code{code}, meta{meta}, operands{std::move(operands)} {} |
|||
|
|||
explicit OperationNode(OperationCode code, std::vector<Node>&& operands) |
|||
: code{code}, meta{}, operands{std::move(operands)} {} |
|||
|
|||
OperationCode GetCode() const { |
|||
return code; |
|||
} |
|||
|
|||
const Meta& GetMeta() const { |
|||
return meta; |
|||
} |
|||
|
|||
std::size_t GetOperandsCount() const { |
|||
return operands.size(); |
|||
} |
|||
|
|||
Node operator[](std::size_t operand_index) const { |
|||
return operands.at(operand_index); |
|||
} |
|||
|
|||
private: |
|||
const OperationCode code; |
|||
const Meta meta; |
|||
std::vector<Node> operands; |
|||
}; |
|||
|
|||
/// Encloses inside any kind of node that returns a boolean conditionally-executed code |
|||
class ConditionalNode final { |
|||
public: |
|||
explicit ConditionalNode(Node condition, std::vector<Node>&& code) |
|||
: condition{condition}, code{std::move(code)} {} |
|||
|
|||
Node GetCondition() const { |
|||
return condition; |
|||
} |
|||
|
|||
const std::vector<Node>& GetCode() const { |
|||
return code; |
|||
} |
|||
|
|||
private: |
|||
const Node condition; ///< Condition to be satisfied |
|||
std::vector<Node> code; ///< Code to execute |
|||
}; |
|||
|
|||
/// A general purpose register |
|||
class GprNode final { |
|||
public: |
|||
explicit constexpr GprNode(Tegra::Shader::Register index) : index{index} {} |
|||
|
|||
u32 GetIndex() const { |
|||
return static_cast<u32>(index); |
|||
} |
|||
|
|||
private: |
|||
const Tegra::Shader::Register index; |
|||
}; |
|||
|
|||
/// A 32-bits value that represents an immediate value |
|||
class ImmediateNode final { |
|||
public: |
|||
explicit constexpr ImmediateNode(u32 value) : value{value} {} |
|||
|
|||
u32 GetValue() const { |
|||
return value; |
|||
} |
|||
|
|||
private: |
|||
const u32 value; |
|||
}; |
|||
|
|||
/// One of Maxwell's internal flags |
|||
class InternalFlagNode final { |
|||
public: |
|||
explicit constexpr InternalFlagNode(InternalFlag flag) : flag{flag} {} |
|||
|
|||
InternalFlag GetFlag() const { |
|||
return flag; |
|||
} |
|||
|
|||
private: |
|||
const InternalFlag flag; |
|||
}; |
|||
|
|||
/// A predicate register, it can be negated without aditional nodes |
|||
class PredicateNode final { |
|||
public: |
|||
explicit constexpr PredicateNode(Tegra::Shader::Pred index, bool negated) |
|||
: index{index}, negated{negated} {} |
|||
|
|||
Tegra::Shader::Pred GetIndex() const { |
|||
return index; |
|||
} |
|||
|
|||
bool IsNegated() const { |
|||
return negated; |
|||
} |
|||
|
|||
private: |
|||
const Tegra::Shader::Pred index; |
|||
const bool negated; |
|||
}; |
|||
|
|||
/// Attribute buffer memory (known as attributes or varyings in GLSL terms) |
|||
class AbufNode final { |
|||
public: |
|||
explicit constexpr AbufNode(Tegra::Shader::Attribute::Index index, u32 element, |
|||
const Tegra::Shader::IpaMode& input_mode, Node buffer = {}) |
|||
: input_mode{input_mode}, index{index}, element{element}, buffer{buffer} {} |
|||
|
|||
explicit constexpr AbufNode(Tegra::Shader::Attribute::Index index, u32 element, |
|||
Node buffer = {}) |
|||
: input_mode{}, index{index}, element{element}, buffer{buffer} {} |
|||
|
|||
Tegra::Shader::IpaMode GetInputMode() const { |
|||
return input_mode; |
|||
} |
|||
|
|||
Tegra::Shader::Attribute::Index GetIndex() const { |
|||
return index; |
|||
} |
|||
|
|||
u32 GetElement() const { |
|||
return element; |
|||
} |
|||
|
|||
Node GetBuffer() const { |
|||
return buffer; |
|||
} |
|||
|
|||
private: |
|||
const Tegra::Shader::IpaMode input_mode; |
|||
const Node buffer; |
|||
const Tegra::Shader::Attribute::Index index; |
|||
const u32 element; |
|||
}; |
|||
|
|||
/// Constant buffer node, usually mapped to uniform buffers in GLSL |
|||
class CbufNode final { |
|||
public: |
|||
explicit constexpr CbufNode(u32 index, Node offset) : index{index}, offset{offset} {} |
|||
|
|||
u32 GetIndex() const { |
|||
return index; |
|||
} |
|||
|
|||
Node GetOffset() const { |
|||
return offset; |
|||
} |
|||
|
|||
private: |
|||
const u32 index; |
|||
const Node offset; |
|||
}; |
|||
|
|||
/// Local memory node |
|||
class LmemNode final { |
|||
public: |
|||
explicit constexpr LmemNode(Node address) : address{address} {} |
|||
|
|||
Node GetAddress() const { |
|||
return address; |
|||
} |
|||
|
|||
private: |
|||
const Node address; |
|||
}; |
|||
|
|||
/// Global memory node |
|||
class GmemNode final { |
|||
public: |
|||
explicit constexpr GmemNode(Node address) : address{address} {} |
|||
|
|||
Node GetAddress() const { |
|||
return address; |
|||
} |
|||
|
|||
private: |
|||
const Node address; |
|||
}; |
|||
|
|||
/// Commentary, can be dropped |
|||
class CommentNode final { |
|||
public: |
|||
explicit CommentNode(std::string text) : text{std::move(text)} {} |
|||
|
|||
const std::string& GetText() const { |
|||
return text; |
|||
} |
|||
|
|||
private: |
|||
std::string text; |
|||
}; |
|||
|
|||
class ShaderIR final { |
|||
public: |
|||
explicit ShaderIR(const ProgramCode& program_code, u32 main_offset) |
|||
: program_code{program_code}, main_offset{main_offset} { |
|||
|
|||
Decode(); |
|||
} |
|||
|
|||
const std::map<u32, BasicBlock>& GetBasicBlocks() const { |
|||
return basic_blocks; |
|||
} |
|||
|
|||
const std::set<u32>& GetRegisters() const { |
|||
return used_registers; |
|||
} |
|||
|
|||
const std::set<Tegra::Shader::Pred>& GetPredicates() const { |
|||
return used_predicates; |
|||
} |
|||
|
|||
const std::map<Tegra::Shader::Attribute::Index, std::set<Tegra::Shader::IpaMode>>& |
|||
GetInputAttributes() const { |
|||
return used_input_attributes; |
|||
} |
|||
|
|||
const std::set<Tegra::Shader::Attribute::Index>& GetOutputAttributes() const { |
|||
return used_output_attributes; |
|||
} |
|||
|
|||
const std::map<u32, ConstBuffer>& GetConstantBuffers() const { |
|||
return used_cbufs; |
|||
} |
|||
|
|||
const std::set<Sampler>& GetSamplers() const { |
|||
return used_samplers; |
|||
} |
|||
|
|||
const std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances>& GetClipDistances() |
|||
const { |
|||
return used_clip_distances; |
|||
} |
|||
|
|||
std::size_t GetLength() const { |
|||
return static_cast<std::size_t>(coverage_end * sizeof(u64)); |
|||
} |
|||
|
|||
const Tegra::Shader::Header& GetHeader() const { |
|||
return header; |
|||
} |
|||
|
|||
private: |
|||
void Decode(); |
|||
|
|||
ExitMethod Scan(u32 begin, u32 end, std::set<u32>& labels); |
|||
|
|||
BasicBlock DecodeRange(u32 begin, u32 end); |
|||
|
|||
/** |
|||
* Decodes a single instruction from Tegra to IR. |
|||
* @param bb Basic block where the nodes will be written to. |
|||
* @param pc Program counter. Offset to decode. |
|||
* @return Next address to decode. |
|||
*/ |
|||
u32 DecodeInstr(BasicBlock& bb, u32 pc); |
|||
|
|||
u32 DecodeArithmetic(BasicBlock& bb, const BasicBlock& code, u32 pc); |
|||
u32 DecodeArithmeticImmediate(BasicBlock& bb, const BasicBlock& code, u32 pc); |
|||
u32 DecodeBfe(BasicBlock& bb, const BasicBlock& code, u32 pc); |
|||
u32 DecodeBfi(BasicBlock& bb, const BasicBlock& code, u32 pc); |
|||
u32 DecodeShift(BasicBlock& bb, const BasicBlock& code, u32 pc); |
|||
u32 DecodeArithmeticInteger(BasicBlock& bb, const BasicBlock& code, u32 pc); |
|||
u32 DecodeArithmeticIntegerImmediate(BasicBlock& bb, const BasicBlock& code, u32 pc); |
|||
u32 DecodeArithmeticHalf(BasicBlock& bb, const BasicBlock& code, u32 pc); |
|||
u32 DecodeArithmeticHalfImmediate(BasicBlock& bb, const BasicBlock& code, u32 pc); |
|||
u32 DecodeFfma(BasicBlock& bb, const BasicBlock& code, u32 pc); |
|||
u32 DecodeHfma2(BasicBlock& bb, const BasicBlock& code, u32 pc); |
|||
u32 DecodeConversion(BasicBlock& bb, const BasicBlock& code, u32 pc); |
|||
u32 DecodeMemory(BasicBlock& bb, const BasicBlock& code, u32 pc); |
|||
u32 DecodeFloatSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc); |
|||
u32 DecodeIntegerSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc); |
|||
u32 DecodeHalfSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc); |
|||
u32 DecodePredicateSetRegister(BasicBlock& bb, const BasicBlock& code, u32 pc); |
|||
u32 DecodePredicateSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc); |
|||
u32 DecodeRegisterSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc); |
|||
u32 DecodeFloatSet(BasicBlock& bb, const BasicBlock& code, u32 pc); |
|||
u32 DecodeIntegerSet(BasicBlock& bb, const BasicBlock& code, u32 pc); |
|||
u32 DecodeHalfSet(BasicBlock& bb, const BasicBlock& code, u32 pc); |
|||
u32 DecodeVideo(BasicBlock& bb, const BasicBlock& code, u32 pc); |
|||
u32 DecodeXmad(BasicBlock& bb, const BasicBlock& code, u32 pc); |
|||
u32 DecodeOther(BasicBlock& bb, const BasicBlock& code, u32 pc); |
|||
|
|||
/// Internalizes node's data and returns a managed pointer to a clone of that node |
|||
Node StoreNode(NodeData&& node_data); |
|||
|
|||
/// Creates a conditional node |
|||
Node Conditional(Node condition, std::vector<Node>&& code); |
|||
/// Creates a commentary |
|||
Node Comment(const std::string& text); |
|||
/// Creates an u32 immediate |
|||
Node Immediate(u32 value); |
|||
/// Creates a s32 immediate |
|||
Node Immediate(s32 value) { |
|||
return Immediate(static_cast<u32>(value)); |
|||
} |
|||
/// Creates a f32 immediate |
|||
Node Immediate(f32 value) { |
|||
u32 integral; |
|||
std::memcpy(&integral, &value, sizeof(u32)); |
|||
return Immediate(integral); |
|||
} |
|||
|
|||
/// Generates a node for a passed register. |
|||
Node GetRegister(Tegra::Shader::Register reg); |
|||
/// Generates a node representing a 19-bit immediate value |
|||
Node GetImmediate19(Tegra::Shader::Instruction instr); |
|||
/// Generates a node representing a 32-bit immediate value |
|||
Node GetImmediate32(Tegra::Shader::Instruction instr); |
|||
/// Generates a node representing a constant buffer |
|||
Node GetConstBuffer(u64 index, u64 offset); |
|||
/// Generates a node representing a constant buffer with a variadic offset |
|||
Node GetConstBufferIndirect(u64 index, u64 offset, Node node); |
|||
/// Generates a node for a passed predicate. It can be optionally negated |
|||
Node GetPredicate(u64 pred, bool negated = false); |
|||
/// Generates a predicate node for an immediate true or false value |
|||
Node GetPredicate(bool immediate); |
|||
/// Generates a node representing an input atttribute. Keeps track of used attributes. |
|||
Node GetInputAttribute(Tegra::Shader::Attribute::Index index, u64 element, |
|||
const Tegra::Shader::IpaMode& input_mode, Node buffer = {}); |
|||
/// Generates a node representing an output atttribute. Keeps track of used attributes. |
|||
Node GetOutputAttribute(Tegra::Shader::Attribute::Index index, u64 element, Node buffer); |
|||
/// Generates a node representing an internal flag |
|||
Node GetInternalFlag(InternalFlag flag, bool negated = false); |
|||
/// Generates a node representing a local memory address |
|||
Node GetLocalMemory(Node address); |
|||
/// Generates a temporal, internally it uses a post-RZ register |
|||
Node GetTemporal(u32 id); |
|||
|
|||
/// Sets a register. src value must be a number-evaluated node. |
|||
void SetRegister(BasicBlock& bb, Tegra::Shader::Register dest, Node src); |
|||
/// Sets a predicate. src value must be a bool-evaluated node |
|||
void SetPredicate(BasicBlock& bb, u64 dest, Node src); |
|||
/// Sets an internal flag. src value must be a bool-evaluated node |
|||
void SetInternalFlag(BasicBlock& bb, InternalFlag flag, Node value); |
|||
/// Sets a local memory address. address and value must be a number-evaluated node |
|||
void SetLocalMemory(BasicBlock& bb, Node address, Node value); |
|||
/// Sets a temporal. Internally it uses a post-RZ register |
|||
void SetTemporal(BasicBlock& bb, u32 id, Node value); |
|||
|
|||
/// Sets internal flags from a float |
|||
void SetInternalFlagsFromFloat(BasicBlock& bb, Node value, bool sets_cc = true); |
|||
/// Sets internal flags from an integer |
|||
void SetInternalFlagsFromInteger(BasicBlock& bb, Node value, bool sets_cc = true); |
|||
|
|||
/// Conditionally absolute/negated float. Absolute is applied first |
|||
Node GetOperandAbsNegFloat(Node value, bool absolute, bool negate); |
|||
/// Conditionally saturates a float |
|||
Node GetSaturatedFloat(Node value, bool saturate = true); |
|||
|
|||
/// Converts an integer to different sizes. |
|||
Node ConvertIntegerSize(Node value, Tegra::Shader::Register::Size size, bool is_signed); |
|||
/// Conditionally absolute/negated integer. Absolute is applied first |
|||
Node GetOperandAbsNegInteger(Node value, bool absolute, bool negate, bool is_signed); |
|||
|
|||
/// Unpacks a half immediate from an instruction |
|||
Node UnpackHalfImmediate(Tegra::Shader::Instruction instr, bool has_negation); |
|||
/// Merges a half pair into another value |
|||
Node HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge); |
|||
/// Conditionally absolute/negated half float pair. Absolute is applied first |
|||
Node GetOperandAbsNegHalf(Node value, bool absolute, bool negate); |
|||
|
|||
/// Returns a predicate comparing two floats |
|||
Node GetPredicateComparisonFloat(Tegra::Shader::PredCondition condition, Node op_a, Node op_b); |
|||
/// Returns a predicate comparing two integers |
|||
Node GetPredicateComparisonInteger(Tegra::Shader::PredCondition condition, bool is_signed, |
|||
Node op_a, Node op_b); |
|||
/// Returns a predicate comparing two half floats. meta consumes how both pairs will be compared |
|||
Node GetPredicateComparisonHalf(Tegra::Shader::PredCondition condition, |
|||
const MetaHalfArithmetic& meta, Node op_a, Node op_b); |
|||
|
|||
/// Returns a predicate combiner operation |
|||
OperationCode GetPredicateCombiner(Tegra::Shader::PredOperation operation); |
|||
|
|||
/// Returns a condition code evaluated from internal flags |
|||
Node GetConditionCode(Tegra::Shader::ConditionCode cc); |
|||
|
|||
/// Accesses a texture sampler |
|||
const Sampler& GetSampler(const Tegra::Shader::Sampler& sampler, |
|||
Tegra::Shader::TextureType type, bool is_array, bool is_shadow); |
|||
|
|||
/// Extracts a sequence of bits from a node |
|||
Node BitfieldExtract(Node value, u32 offset, u32 bits); |
|||
|
|||
void WriteTexInstructionFloat(BasicBlock& bb, Tegra::Shader::Instruction instr, |
|||
const Node4& components); |
|||
|
|||
void WriteTexsInstructionFloat(BasicBlock& bb, Tegra::Shader::Instruction instr, |
|||
const Node4& components); |
|||
void WriteTexsInstructionHalfFloat(BasicBlock& bb, Tegra::Shader::Instruction instr, |
|||
const Node4& components); |
|||
|
|||
Node4 GetTexCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, |
|||
Tegra::Shader::TextureProcessMode process_mode, bool depth_compare, |
|||
bool is_array); |
|||
|
|||
Node4 GetTexsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, |
|||
Tegra::Shader::TextureProcessMode process_mode, bool depth_compare, |
|||
bool is_array); |
|||
|
|||
Node4 GetTld4Code(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, |
|||
bool depth_compare, bool is_array); |
|||
|
|||
Node4 GetTldsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, |
|||
bool is_array); |
|||
|
|||
std::tuple<std::size_t, std::size_t> ValidateAndGetCoordinateElement( |
|||
Tegra::Shader::TextureType texture_type, bool depth_compare, bool is_array, |
|||
bool lod_bias_enabled, std::size_t max_coords, std::size_t max_inputs); |
|||
|
|||
Node4 GetTextureCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, |
|||
Tegra::Shader::TextureProcessMode process_mode, bool depth_compare, |
|||
bool is_array, std::size_t array_offset, std::size_t bias_offset, |
|||
std::vector<Node>&& coords); |
|||
|
|||
Node GetVideoOperand(Node op, bool is_chunk, bool is_signed, Tegra::Shader::VideoType type, |
|||
u64 byte_height); |
|||
|
|||
void WriteLogicOperation(BasicBlock& bb, Tegra::Shader::Register dest, |
|||
Tegra::Shader::LogicOperation logic_op, Node op_a, Node op_b, |
|||
Tegra::Shader::PredicateResultMode predicate_mode, |
|||
Tegra::Shader::Pred predicate, bool sets_cc); |
|||
void WriteLop3Instruction(BasicBlock& bb, Tegra::Shader::Register dest, Node op_a, Node op_b, |
|||
Node op_c, Node imm_lut, bool sets_cc); |
|||
|
|||
template <typename... T> |
|||
Node Operation(OperationCode code, const T*... operands) { |
|||
return StoreNode(OperationNode(code, operands...)); |
|||
} |
|||
|
|||
template <typename... T> |
|||
Node Operation(OperationCode code, Meta&& meta, const T*... operands) { |
|||
return StoreNode(OperationNode(code, std::move(meta), operands...)); |
|||
} |
|||
|
|||
template <typename... T> |
|||
Node Operation(OperationCode code, std::vector<Node>&& operands) { |
|||
return StoreNode(OperationNode(code, std::move(operands))); |
|||
} |
|||
|
|||
template <typename... T> |
|||
Node Operation(OperationCode code, Meta&& meta, std::vector<Node>&& operands) { |
|||
return StoreNode(OperationNode(code, std::move(meta), std::move(operands))); |
|||
} |
|||
|
|||
template <typename... T> |
|||
Node SignedOperation(OperationCode code, bool is_signed, const T*... operands) { |
|||
return StoreNode(OperationNode(SignedToUnsignedCode(code, is_signed), operands...)); |
|||
} |
|||
|
|||
template <typename... T> |
|||
Node SignedOperation(OperationCode code, bool is_signed, Meta&& meta, const T*... operands) { |
|||
return StoreNode( |
|||
OperationNode(SignedToUnsignedCode(code, is_signed), std::move(meta), operands...)); |
|||
} |
|||
|
|||
static OperationCode SignedToUnsignedCode(OperationCode operation_code, bool is_signed); |
|||
|
|||
const ProgramCode& program_code; |
|||
const u32 main_offset; |
|||
|
|||
u32 coverage_begin{}; |
|||
u32 coverage_end{}; |
|||
std::map<std::pair<u32, u32>, ExitMethod> exit_method_map; |
|||
|
|||
std::map<u32, BasicBlock> basic_blocks; |
|||
|
|||
std::vector<std::unique_ptr<NodeData>> stored_nodes; |
|||
|
|||
std::set<u32> used_registers; |
|||
std::set<Tegra::Shader::Pred> used_predicates; |
|||
std::map<Tegra::Shader::Attribute::Index, std::set<Tegra::Shader::IpaMode>> |
|||
used_input_attributes; |
|||
std::set<Tegra::Shader::Attribute::Index> used_output_attributes; |
|||
std::map<u32, ConstBuffer> used_cbufs; |
|||
std::set<Sampler> used_samplers; |
|||
std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances> used_clip_distances{}; |
|||
|
|||
Tegra::Shader::Header header; |
|||
}; |
|||
|
|||
} // namespace VideoCommon::Shader |
|||
Write
Preview
Loading…
Cancel
Save
Reference in new issue