|
|
|
@ -13,13 +13,247 @@ |
|
|
|
#include "video_core/engines/shader_bytecode.h"
|
|
|
|
#include "video_core/shader/node_helper.h"
|
|
|
|
#include "video_core/shader/shader_ir.h"
|
|
|
|
#include "video_core/textures/texture.h"
|
|
|
|
|
|
|
|
namespace VideoCommon::Shader { |
|
|
|
|
|
|
|
using Tegra::Shader::Instruction; |
|
|
|
using Tegra::Shader::OpCode; |
|
|
|
using Tegra::Shader::PredCondition; |
|
|
|
using Tegra::Shader::StoreType; |
|
|
|
using Tegra::Texture::ComponentType; |
|
|
|
using Tegra::Texture::TextureFormat; |
|
|
|
using Tegra::Texture::TICEntry; |
|
|
|
|
|
|
|
namespace { |
|
|
|
|
|
|
|
ComponentType GetComponentType(Tegra::Engines::SamplerDescriptor descriptor, |
|
|
|
std::size_t component) { |
|
|
|
const TextureFormat format{descriptor.format}; |
|
|
|
switch (format) { |
|
|
|
case TextureFormat::R16_G16_B16_A16: |
|
|
|
case TextureFormat::R32_G32_B32_A32: |
|
|
|
case TextureFormat::R32_G32_B32: |
|
|
|
case TextureFormat::R32_G32: |
|
|
|
case TextureFormat::R16_G16: |
|
|
|
case TextureFormat::R32: |
|
|
|
case TextureFormat::R16: |
|
|
|
case TextureFormat::R8: |
|
|
|
case TextureFormat::R1: |
|
|
|
if (component == 0) { |
|
|
|
return descriptor.r_type; |
|
|
|
} |
|
|
|
if (component == 1) { |
|
|
|
return descriptor.g_type; |
|
|
|
} |
|
|
|
if (component == 2) { |
|
|
|
return descriptor.b_type; |
|
|
|
} |
|
|
|
if (component == 3) { |
|
|
|
return descriptor.a_type; |
|
|
|
} |
|
|
|
break; |
|
|
|
case TextureFormat::A8R8G8B8: |
|
|
|
if (component == 0) { |
|
|
|
return descriptor.a_type; |
|
|
|
} |
|
|
|
if (component == 1) { |
|
|
|
return descriptor.r_type; |
|
|
|
} |
|
|
|
if (component == 2) { |
|
|
|
return descriptor.g_type; |
|
|
|
} |
|
|
|
if (component == 3) { |
|
|
|
return descriptor.b_type; |
|
|
|
} |
|
|
|
break; |
|
|
|
case TextureFormat::A2B10G10R10: |
|
|
|
case TextureFormat::A4B4G4R4: |
|
|
|
case TextureFormat::A5B5G5R1: |
|
|
|
case TextureFormat::A1B5G5R5: |
|
|
|
if (component == 0) { |
|
|
|
return descriptor.a_type; |
|
|
|
} |
|
|
|
if (component == 1) { |
|
|
|
return descriptor.b_type; |
|
|
|
} |
|
|
|
if (component == 2) { |
|
|
|
return descriptor.g_type; |
|
|
|
} |
|
|
|
if (component == 3) { |
|
|
|
return descriptor.r_type; |
|
|
|
} |
|
|
|
break; |
|
|
|
case TextureFormat::R32_B24G8: |
|
|
|
if (component == 0) { |
|
|
|
return descriptor.r_type; |
|
|
|
} |
|
|
|
if (component == 1) { |
|
|
|
return descriptor.b_type; |
|
|
|
} |
|
|
|
if (component == 2) { |
|
|
|
return descriptor.g_type; |
|
|
|
} |
|
|
|
break; |
|
|
|
case TextureFormat::B5G6R5: |
|
|
|
case TextureFormat::B6G5R5: |
|
|
|
if (component == 0) { |
|
|
|
return descriptor.b_type; |
|
|
|
} |
|
|
|
if (component == 1) { |
|
|
|
return descriptor.g_type; |
|
|
|
} |
|
|
|
if (component == 2) { |
|
|
|
return descriptor.r_type; |
|
|
|
} |
|
|
|
break; |
|
|
|
case TextureFormat::G8R24: |
|
|
|
case TextureFormat::G24R8: |
|
|
|
case TextureFormat::G8R8: |
|
|
|
case TextureFormat::G4R4: |
|
|
|
if (component == 0) { |
|
|
|
return descriptor.g_type; |
|
|
|
} |
|
|
|
if (component == 1) { |
|
|
|
return descriptor.r_type; |
|
|
|
} |
|
|
|
break; |
|
|
|
} |
|
|
|
UNIMPLEMENTED_MSG("texture format not implement={}", format); |
|
|
|
return ComponentType::FLOAT; |
|
|
|
} |
|
|
|
|
|
|
|
bool IsComponentEnabled(std::size_t component_mask, std::size_t component) { |
|
|
|
constexpr u8 R = 0b0001; |
|
|
|
constexpr u8 G = 0b0010; |
|
|
|
constexpr u8 B = 0b0100; |
|
|
|
constexpr u8 A = 0b1000; |
|
|
|
constexpr std::array<u8, 16> mask = { |
|
|
|
0, (R), (G), (R | G), (B), (R | B), (G | B), (R | G | B), |
|
|
|
(A), (R | A), (G | A), (R | G | A), (B | A), (R | B | A), (G | B | A), (R | G | B | A)}; |
|
|
|
return std::bitset<4>{mask.at(component_mask)}.test(component); |
|
|
|
} |
|
|
|
|
|
|
|
u32 GetComponentSize(TextureFormat format, std::size_t component) { |
|
|
|
switch (format) { |
|
|
|
case TextureFormat::R32_G32_B32_A32: |
|
|
|
return 32; |
|
|
|
case TextureFormat::R16_G16_B16_A16: |
|
|
|
return 16; |
|
|
|
case TextureFormat::R32_G32_B32: |
|
|
|
return component <= 2 ? 32 : 0; |
|
|
|
case TextureFormat::R32_G32: |
|
|
|
return component <= 1 ? 32 : 0; |
|
|
|
case TextureFormat::R16_G16: |
|
|
|
return component <= 1 ? 16 : 0; |
|
|
|
case TextureFormat::R32: |
|
|
|
return component == 0 ? 32 : 0; |
|
|
|
case TextureFormat::R16: |
|
|
|
return component == 0 ? 16 : 0; |
|
|
|
case TextureFormat::R8: |
|
|
|
return component == 0 ? 8 : 0; |
|
|
|
case TextureFormat::R1: |
|
|
|
return component == 0 ? 1 : 0; |
|
|
|
case TextureFormat::A8R8G8B8: |
|
|
|
return 8; |
|
|
|
case TextureFormat::A2B10G10R10: |
|
|
|
return (component == 3 || component == 2 || component == 1) ? 10 : 2; |
|
|
|
case TextureFormat::A4B4G4R4: |
|
|
|
return 4; |
|
|
|
case TextureFormat::A5B5G5R1: |
|
|
|
return (component == 0 || component == 1 || component == 2) ? 5 : 1; |
|
|
|
case TextureFormat::A1B5G5R5: |
|
|
|
return (component == 1 || component == 2 || component == 3) ? 5 : 1; |
|
|
|
case TextureFormat::R32_B24G8: |
|
|
|
if (component == 0) { |
|
|
|
return 32; |
|
|
|
} |
|
|
|
if (component == 1) { |
|
|
|
return 24; |
|
|
|
} |
|
|
|
if (component == 2) { |
|
|
|
return 8; |
|
|
|
} |
|
|
|
return 0; |
|
|
|
case TextureFormat::B5G6R5: |
|
|
|
if (component == 0 || component == 2) { |
|
|
|
return 5; |
|
|
|
} |
|
|
|
if (component == 1) { |
|
|
|
return 6; |
|
|
|
} |
|
|
|
return 0; |
|
|
|
case TextureFormat::B6G5R5: |
|
|
|
if (component == 1 || component == 2) { |
|
|
|
return 5; |
|
|
|
} |
|
|
|
if (component == 0) { |
|
|
|
return 6; |
|
|
|
} |
|
|
|
return 0; |
|
|
|
case TextureFormat::G8R24: |
|
|
|
if (component == 0) { |
|
|
|
return 8; |
|
|
|
} |
|
|
|
if (component == 1) { |
|
|
|
return 24; |
|
|
|
} |
|
|
|
return 0; |
|
|
|
case TextureFormat::G24R8: |
|
|
|
if (component == 0) { |
|
|
|
return 8; |
|
|
|
} |
|
|
|
if (component == 1) { |
|
|
|
return 24; |
|
|
|
} |
|
|
|
return 0; |
|
|
|
case TextureFormat::G8R8: |
|
|
|
return (component == 0 || component == 1) ? 8 : 0; |
|
|
|
case TextureFormat::G4R4: |
|
|
|
return (component == 0 || component == 1) ? 4 : 0; |
|
|
|
default: |
|
|
|
UNIMPLEMENTED_MSG("texture format not implement={}", format); |
|
|
|
return 0; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
std::size_t GetImageComponentMask(TextureFormat format) { |
|
|
|
constexpr u8 R = 0b0001; |
|
|
|
constexpr u8 G = 0b0010; |
|
|
|
constexpr u8 B = 0b0100; |
|
|
|
constexpr u8 A = 0b1000; |
|
|
|
switch (format) { |
|
|
|
case TextureFormat::R32_G32_B32_A32: |
|
|
|
case TextureFormat::R16_G16_B16_A16: |
|
|
|
case TextureFormat::A8R8G8B8: |
|
|
|
case TextureFormat::A2B10G10R10: |
|
|
|
case TextureFormat::A4B4G4R4: |
|
|
|
case TextureFormat::A5B5G5R1: |
|
|
|
case TextureFormat::A1B5G5R5: |
|
|
|
return std::size_t{R | G | B | A}; |
|
|
|
case TextureFormat::R32_G32_B32: |
|
|
|
case TextureFormat::R32_B24G8: |
|
|
|
case TextureFormat::B5G6R5: |
|
|
|
case TextureFormat::B6G5R5: |
|
|
|
return std::size_t{R | G | B}; |
|
|
|
case TextureFormat::R32_G32: |
|
|
|
case TextureFormat::R16_G16: |
|
|
|
case TextureFormat::G8R24: |
|
|
|
case TextureFormat::G24R8: |
|
|
|
case TextureFormat::G8R8: |
|
|
|
case TextureFormat::G4R4: |
|
|
|
return std::size_t{R | G}; |
|
|
|
case TextureFormat::R32: |
|
|
|
case TextureFormat::R16: |
|
|
|
case TextureFormat::R8: |
|
|
|
case TextureFormat::R1: |
|
|
|
return std::size_t{R}; |
|
|
|
default: |
|
|
|
UNIMPLEMENTED_MSG("texture format not implement={}", format); |
|
|
|
return std::size_t{R | G | B | A}; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
std::size_t GetImageTypeNumCoordinates(Tegra::Shader::ImageType image_type) { |
|
|
|
switch (image_type) { |
|
|
|
case Tegra::Shader::ImageType::Texture1D: |
|
|
|
@ -37,6 +271,39 @@ std::size_t GetImageTypeNumCoordinates(Tegra::Shader::ImageType image_type) { |
|
|
|
} |
|
|
|
} // Anonymous namespace
|
|
|
|
|
|
|
|
std::pair<Node, bool> ShaderIR::GetComponentValue(ComponentType component_type, u32 component_size, |
|
|
|
Node original_value) { |
|
|
|
switch (component_type) { |
|
|
|
case ComponentType::SNORM: { |
|
|
|
// range [-1.0, 1.0]
|
|
|
|
auto cnv_value = Operation(OperationCode::FMul, original_value, |
|
|
|
Immediate(static_cast<float>(1 << component_size) / 2.f - 1.f)); |
|
|
|
cnv_value = Operation(OperationCode::ICastFloat, std::move(cnv_value)); |
|
|
|
return {BitfieldExtract(std::move(cnv_value), 0, component_size), true}; |
|
|
|
} |
|
|
|
case ComponentType::SINT: |
|
|
|
case ComponentType::UNORM: { |
|
|
|
bool is_signed = component_type == ComponentType::SINT; |
|
|
|
// range [0.0, 1.0]
|
|
|
|
auto cnv_value = Operation(OperationCode::FMul, original_value, |
|
|
|
Immediate(static_cast<float>(1 << component_size) - 1.f)); |
|
|
|
return {SignedOperation(OperationCode::ICastFloat, is_signed, std::move(cnv_value)), |
|
|
|
is_signed}; |
|
|
|
} |
|
|
|
case ComponentType::UINT: // range [0, (1 << component_size) - 1]
|
|
|
|
return {std::move(original_value), false}; |
|
|
|
case ComponentType::FLOAT: |
|
|
|
if (component_size == 16) { |
|
|
|
return {Operation(OperationCode::HCastFloat, original_value), true}; |
|
|
|
} else { |
|
|
|
return {std::move(original_value), true}; |
|
|
|
} |
|
|
|
default: |
|
|
|
UNIMPLEMENTED_MSG("Unimplement component type={}", component_type); |
|
|
|
return {std::move(original_value), true}; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) { |
|
|
|
const Instruction instr = {program_code[pc]}; |
|
|
|
const auto opcode = OpCode::Decode(instr); |
|
|
|
@ -53,7 +320,6 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) { |
|
|
|
|
|
|
|
switch (opcode->get().GetId()) { |
|
|
|
case OpCode::Id::SULD: { |
|
|
|
UNIMPLEMENTED_IF(instr.suldst.mode != Tegra::Shader::SurfaceDataMode::P); |
|
|
|
UNIMPLEMENTED_IF(instr.suldst.out_of_bounds_store != |
|
|
|
Tegra::Shader::OutOfBoundsStore::Ignore); |
|
|
|
|
|
|
|
@ -62,17 +328,89 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) { |
|
|
|
: GetBindlessImage(instr.gpr39, type)}; |
|
|
|
image.MarkRead(); |
|
|
|
|
|
|
|
u32 indexer = 0; |
|
|
|
for (u32 element = 0; element < 4; ++element) { |
|
|
|
if (!instr.suldst.IsComponentEnabled(element)) { |
|
|
|
continue; |
|
|
|
if (instr.suldst.mode == Tegra::Shader::SurfaceDataMode::P) { |
|
|
|
u32 indexer = 0; |
|
|
|
for (u32 element = 0; element < 4; ++element) { |
|
|
|
if (!instr.suldst.IsComponentEnabled(element)) { |
|
|
|
continue; |
|
|
|
} |
|
|
|
MetaImage meta{image, {}, element}; |
|
|
|
Node value = Operation(OperationCode::ImageLoad, meta, GetCoordinates(type)); |
|
|
|
SetTemporary(bb, indexer++, std::move(value)); |
|
|
|
} |
|
|
|
for (u32 i = 0; i < indexer; ++i) { |
|
|
|
SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); |
|
|
|
} |
|
|
|
} else if (instr.suldst.mode == Tegra::Shader::SurfaceDataMode::D_BA) { |
|
|
|
UNIMPLEMENTED_IF(instr.suldst.GetStoreDataLayout() != StoreType::Bits32 && |
|
|
|
instr.suldst.GetStoreDataLayout() != StoreType::Bits64); |
|
|
|
|
|
|
|
auto descriptor = [this, instr] { |
|
|
|
std::optional<Tegra::Engines::SamplerDescriptor> descriptor; |
|
|
|
if (instr.suldst.is_immediate) { |
|
|
|
descriptor = |
|
|
|
registry.ObtainBoundSampler(static_cast<u32>(instr.image.index.Value())); |
|
|
|
} else { |
|
|
|
const Node image_register = GetRegister(instr.gpr39); |
|
|
|
const auto [base_image, buffer, offset] = TrackCbuf( |
|
|
|
image_register, global_code, static_cast<s64>(global_code.size())); |
|
|
|
descriptor = registry.ObtainBindlessSampler(buffer, offset); |
|
|
|
} |
|
|
|
if (!descriptor) { |
|
|
|
UNREACHABLE_MSG("Failed to obtain image descriptor"); |
|
|
|
} |
|
|
|
return *descriptor; |
|
|
|
}(); |
|
|
|
|
|
|
|
const auto comp_mask = GetImageComponentMask(descriptor.format); |
|
|
|
|
|
|
|
switch (instr.suldst.GetStoreDataLayout()) { |
|
|
|
case StoreType::Bits32: |
|
|
|
case StoreType::Bits64: { |
|
|
|
u32 indexer = 0; |
|
|
|
u32 shifted_counter = 0; |
|
|
|
Node value = Immediate(0); |
|
|
|
for (u32 element = 0; element < 4; ++element) { |
|
|
|
if (!IsComponentEnabled(comp_mask, element)) { |
|
|
|
continue; |
|
|
|
} |
|
|
|
const auto component_type = GetComponentType(descriptor, element); |
|
|
|
const auto component_size = GetComponentSize(descriptor.format, element); |
|
|
|
MetaImage meta{image, {}, element}; |
|
|
|
|
|
|
|
auto [converted_value, is_signed] = GetComponentValue( |
|
|
|
component_type, component_size, |
|
|
|
Operation(OperationCode::ImageLoad, meta, GetCoordinates(type))); |
|
|
|
|
|
|
|
// shift element to correct position
|
|
|
|
const auto shifted = shifted_counter; |
|
|
|
if (shifted > 0) { |
|
|
|
converted_value = |
|
|
|
SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, |
|
|
|
std::move(converted_value), Immediate(shifted)); |
|
|
|
} |
|
|
|
shifted_counter += component_size; |
|
|
|
|
|
|
|
// add value into result
|
|
|
|
value = Operation(OperationCode::UBitwiseOr, value, std::move(converted_value)); |
|
|
|
|
|
|
|
// if we shifted enough for 1 byte -> we save it into temp
|
|
|
|
if (shifted_counter >= 32) { |
|
|
|
SetTemporary(bb, indexer++, std::move(value)); |
|
|
|
// reset counter and value to prepare pack next byte
|
|
|
|
value = Immediate(0); |
|
|
|
shifted_counter = 0; |
|
|
|
} |
|
|
|
} |
|
|
|
for (u32 i = 0; i < indexer; ++i) { |
|
|
|
SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); |
|
|
|
} |
|
|
|
break; |
|
|
|
} |
|
|
|
default: |
|
|
|
UNREACHABLE(); |
|
|
|
break; |
|
|
|
} |
|
|
|
MetaImage meta{image, {}, element}; |
|
|
|
Node value = Operation(OperationCode::ImageLoad, meta, GetCoordinates(type)); |
|
|
|
SetTemporary(bb, indexer++, std::move(value)); |
|
|
|
} |
|
|
|
for (u32 i = 0; i < indexer; ++i) { |
|
|
|
SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); |
|
|
|
} |
|
|
|
break; |
|
|
|
} |
|
|
|
|