|
|
|
@ -24,6 +24,7 @@ using Tegra::Shader::GlobalAtomicType; |
|
|
|
using Tegra::Shader::Instruction; |
|
|
|
using Tegra::Shader::OpCode; |
|
|
|
using Tegra::Shader::Register; |
|
|
|
using Tegra::Shader::StoreType; |
|
|
|
|
|
|
|
namespace { |
|
|
|
|
|
|
|
@ -63,6 +64,27 @@ u32 GetMemorySize(Tegra::Shader::UniformType uniform_type) { |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
Node ExtractUnaligned(Node value, Node address, u32 mask, u32 size) { |
|
|
|
Node offset = Operation(OperationCode::UBitwiseAnd, address, Immediate(mask)); |
|
|
|
offset = Operation(OperationCode::ULogicalShiftLeft, std::move(offset), Immediate(3)); |
|
|
|
return Operation(OperationCode::UBitfieldExtract, std::move(value), std::move(offset), |
|
|
|
Immediate(size)); |
|
|
|
} |
|
|
|
|
|
|
|
Node InsertUnaligned(Node dest, Node value, Node address, u32 mask, u32 size) { |
|
|
|
Node offset = Operation(OperationCode::UBitwiseAnd, std::move(address), Immediate(mask)); |
|
|
|
offset = Operation(OperationCode::ULogicalShiftLeft, std::move(offset), Immediate(3)); |
|
|
|
return Operation(OperationCode::UBitfieldInsert, std::move(dest), std::move(value), |
|
|
|
std::move(offset), Immediate(size)); |
|
|
|
} |
|
|
|
|
|
|
|
Node Sign16Extend(Node value) { |
|
|
|
Node sign = Operation(OperationCode::UBitwiseAnd, value, Immediate(1U << 15)); |
|
|
|
Node is_sign = Operation(OperationCode::LogicalUEqual, std::move(sign), Immediate(1U << 15)); |
|
|
|
Node extend = Operation(OperationCode::Select, is_sign, Immediate(0xFFFF0000), Immediate(0)); |
|
|
|
return Operation(OperationCode::UBitwiseOr, std::move(value), std::move(extend)); |
|
|
|
} |
|
|
|
|
|
|
|
} // Anonymous namespace
|
|
|
|
|
|
|
|
u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { |
|
|
|
@ -138,26 +160,31 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { |
|
|
|
LOG_DEBUG(HW_GPU, "LD_L cache management mode: {}", static_cast<u64>(instr.ld_l.unknown)); |
|
|
|
[[fallthrough]]; |
|
|
|
case OpCode::Id::LD_S: { |
|
|
|
const auto GetMemory = [&](s32 offset) { |
|
|
|
const auto GetAddress = [&](s32 offset) { |
|
|
|
ASSERT(offset % 4 == 0); |
|
|
|
const Node immediate_offset = Immediate(static_cast<s32>(instr.smem_imm) + offset); |
|
|
|
const Node address = Operation(OperationCode::IAdd, NO_PRECISE, GetRegister(instr.gpr8), |
|
|
|
immediate_offset); |
|
|
|
return opcode->get().GetId() == OpCode::Id::LD_S ? GetSharedMemory(address) |
|
|
|
: GetLocalMemory(address); |
|
|
|
return Operation(OperationCode::IAdd, GetRegister(instr.gpr8), immediate_offset); |
|
|
|
}; |
|
|
|
const auto GetMemory = [&](s32 offset) { |
|
|
|
return opcode->get().GetId() == OpCode::Id::LD_S ? GetSharedMemory(GetAddress(offset)) |
|
|
|
: GetLocalMemory(GetAddress(offset)); |
|
|
|
}; |
|
|
|
|
|
|
|
switch (instr.ldst_sl.type.Value()) { |
|
|
|
case Tegra::Shader::StoreType::Bits32: |
|
|
|
case Tegra::Shader::StoreType::Bits64: |
|
|
|
case Tegra::Shader::StoreType::Bits128: { |
|
|
|
const u32 count = [&]() { |
|
|
|
case StoreType::Signed16: |
|
|
|
SetRegister(bb, instr.gpr0, |
|
|
|
Sign16Extend(ExtractUnaligned(GetMemory(0), GetAddress(0), 0b10, 16))); |
|
|
|
break; |
|
|
|
case StoreType::Bits32: |
|
|
|
case StoreType::Bits64: |
|
|
|
case StoreType::Bits128: { |
|
|
|
const u32 count = [&] { |
|
|
|
switch (instr.ldst_sl.type.Value()) { |
|
|
|
case Tegra::Shader::StoreType::Bits32: |
|
|
|
case StoreType::Bits32: |
|
|
|
return 1; |
|
|
|
case Tegra::Shader::StoreType::Bits64: |
|
|
|
case StoreType::Bits64: |
|
|
|
return 2; |
|
|
|
case Tegra::Shader::StoreType::Bits128: |
|
|
|
case StoreType::Bits128: |
|
|
|
return 4; |
|
|
|
default: |
|
|
|
UNREACHABLE(); |
|
|
|
@ -214,12 +241,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { |
|
|
|
// To handle unaligned loads get the bytes used to dereference global memory and extract
|
|
|
|
// those bytes from the loaded u32.
|
|
|
|
if (IsUnaligned(type)) { |
|
|
|
Node mask = Immediate(GetUnalignedMask(type)); |
|
|
|
Node offset = Operation(OperationCode::UBitwiseAnd, real_address, std::move(mask)); |
|
|
|
offset = Operation(OperationCode::ULogicalShiftLeft, offset, Immediate(3)); |
|
|
|
|
|
|
|
gmem = Operation(OperationCode::UBitfieldExtract, std::move(gmem), |
|
|
|
std::move(offset), Immediate(size)); |
|
|
|
gmem = ExtractUnaligned(gmem, real_address, GetUnalignedMask(type), size); |
|
|
|
} |
|
|
|
|
|
|
|
SetTemporary(bb, i, gmem); |
|
|
|
@ -271,21 +293,28 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { |
|
|
|
return Operation(OperationCode::IAdd, NO_PRECISE, GetRegister(instr.gpr8), immediate); |
|
|
|
}; |
|
|
|
|
|
|
|
const auto set_memory = opcode->get().GetId() == OpCode::Id::ST_L |
|
|
|
? &ShaderIR::SetLocalMemory |
|
|
|
: &ShaderIR::SetSharedMemory; |
|
|
|
const bool is_local = opcode->get().GetId() == OpCode::Id::ST_L; |
|
|
|
const auto set_memory = is_local ? &ShaderIR::SetLocalMemory : &ShaderIR::SetSharedMemory; |
|
|
|
const auto get_memory = is_local ? &ShaderIR::GetLocalMemory : &ShaderIR::GetSharedMemory; |
|
|
|
|
|
|
|
switch (instr.ldst_sl.type.Value()) { |
|
|
|
case Tegra::Shader::StoreType::Bits128: |
|
|
|
case StoreType::Bits128: |
|
|
|
(this->*set_memory)(bb, GetAddress(12), GetRegister(instr.gpr0.Value() + 3)); |
|
|
|
(this->*set_memory)(bb, GetAddress(8), GetRegister(instr.gpr0.Value() + 2)); |
|
|
|
[[fallthrough]]; |
|
|
|
case Tegra::Shader::StoreType::Bits64: |
|
|
|
case StoreType::Bits64: |
|
|
|
(this->*set_memory)(bb, GetAddress(4), GetRegister(instr.gpr0.Value() + 1)); |
|
|
|
[[fallthrough]]; |
|
|
|
case Tegra::Shader::StoreType::Bits32: |
|
|
|
case StoreType::Bits32: |
|
|
|
(this->*set_memory)(bb, GetAddress(0), GetRegister(instr.gpr0)); |
|
|
|
break; |
|
|
|
case StoreType::Signed16: { |
|
|
|
Node address = GetAddress(0); |
|
|
|
Node memory = (this->*get_memory)(address); |
|
|
|
(this->*set_memory)( |
|
|
|
bb, address, InsertUnaligned(memory, GetRegister(instr.gpr0), address, 0b10, 16)); |
|
|
|
break; |
|
|
|
} |
|
|
|
default: |
|
|
|
UNIMPLEMENTED_MSG("{} unhandled type: {}", opcode->get().GetName(), |
|
|
|
static_cast<u32>(instr.ldst_sl.type.Value())); |
|
|
|
@ -325,12 +354,8 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { |
|
|
|
Node value = GetRegister(instr.gpr0.Value() + i); |
|
|
|
|
|
|
|
if (IsUnaligned(type)) { |
|
|
|
Node mask = Immediate(GetUnalignedMask(type)); |
|
|
|
Node offset = Operation(OperationCode::UBitwiseAnd, real_address, std::move(mask)); |
|
|
|
offset = Operation(OperationCode::ULogicalShiftLeft, offset, Immediate(3)); |
|
|
|
|
|
|
|
value = Operation(OperationCode::UBitfieldInsert, gmem, std::move(value), offset, |
|
|
|
Immediate(size)); |
|
|
|
const u32 mask = GetUnalignedMask(type); |
|
|
|
value = InsertUnaligned(gmem, std::move(value), real_address, mask, size); |
|
|
|
} |
|
|
|
|
|
|
|
bb.push_back(Operation(OperationCode::Assign, gmem, value)); |
|
|
|
|