|
|
|
@ -20,6 +20,8 @@ |
|
|
|
#include <cstring>
|
|
|
|
#include <vector>
|
|
|
|
|
|
|
|
#include <boost/container/static_vector.hpp>
|
|
|
|
|
|
|
|
#include "common/common_types.h"
|
|
|
|
|
|
|
|
#include "video_core/textures/astc.h"
|
|
|
|
@ -39,25 +41,25 @@ constexpr u32 Popcnt(u32 n) { |
|
|
|
|
|
|
|
class InputBitStream { |
|
|
|
public: |
|
|
|
explicit InputBitStream(const u8* ptr, std::size_t start_offset = 0) |
|
|
|
: m_CurByte(ptr), m_NextBit(start_offset % 8) {} |
|
|
|
constexpr explicit InputBitStream(const u8* ptr, std::size_t start_offset = 0) |
|
|
|
: cur_byte{ptr}, next_bit{start_offset % 8} {} |
|
|
|
|
|
|
|
std::size_t GetBitsRead() const { |
|
|
|
return m_BitsRead; |
|
|
|
constexpr std::size_t GetBitsRead() const { |
|
|
|
return bits_read; |
|
|
|
} |
|
|
|
|
|
|
|
u32 ReadBit() { |
|
|
|
u32 bit = *m_CurByte >> m_NextBit++; |
|
|
|
while (m_NextBit >= 8) { |
|
|
|
m_NextBit -= 8; |
|
|
|
m_CurByte++; |
|
|
|
constexpr bool ReadBit() { |
|
|
|
const bool bit = (*cur_byte >> next_bit++) & 1; |
|
|
|
while (next_bit >= 8) { |
|
|
|
next_bit -= 8; |
|
|
|
cur_byte++; |
|
|
|
} |
|
|
|
|
|
|
|
m_BitsRead++; |
|
|
|
return bit & 1; |
|
|
|
bits_read++; |
|
|
|
return bit; |
|
|
|
} |
|
|
|
|
|
|
|
u32 ReadBits(std::size_t nBits) { |
|
|
|
constexpr u32 ReadBits(std::size_t nBits) { |
|
|
|
u32 ret = 0; |
|
|
|
for (std::size_t i = 0; i < nBits; ++i) { |
|
|
|
ret |= (ReadBit() & 1) << i; |
|
|
|
@ -66,7 +68,7 @@ public: |
|
|
|
} |
|
|
|
|
|
|
|
template <std::size_t nBits> |
|
|
|
u32 ReadBits() { |
|
|
|
constexpr u32 ReadBits() { |
|
|
|
u32 ret = 0; |
|
|
|
for (std::size_t i = 0; i < nBits; ++i) { |
|
|
|
ret |= (ReadBit() & 1) << i; |
|
|
|
@ -75,64 +77,58 @@ public: |
|
|
|
} |
|
|
|
|
|
|
|
private: |
|
|
|
const u8* m_CurByte; |
|
|
|
std::size_t m_NextBit = 0; |
|
|
|
std::size_t m_BitsRead = 0; |
|
|
|
const u8* cur_byte; |
|
|
|
std::size_t next_bit = 0; |
|
|
|
std::size_t bits_read = 0; |
|
|
|
}; |
|
|
|
|
|
|
|
class OutputBitStream { |
|
|
|
public: |
|
|
|
explicit OutputBitStream(u8* ptr, s32 nBits = 0, s32 start_offset = 0) |
|
|
|
: m_NumBits(nBits), m_CurByte(ptr), m_NextBit(start_offset % 8) {} |
|
|
|
|
|
|
|
~OutputBitStream() = default; |
|
|
|
constexpr explicit OutputBitStream(u8* ptr, std::size_t bits = 0, std::size_t start_offset = 0) |
|
|
|
: cur_byte{ptr}, num_bits{bits}, next_bit{start_offset % 8} {} |
|
|
|
|
|
|
|
s32 GetBitsWritten() const { |
|
|
|
return m_BitsWritten; |
|
|
|
constexpr std::size_t GetBitsWritten() const { |
|
|
|
return bits_written; |
|
|
|
} |
|
|
|
|
|
|
|
void WriteBitsR(u32 val, u32 nBits) { |
|
|
|
constexpr void WriteBitsR(u32 val, u32 nBits) { |
|
|
|
for (u32 i = 0; i < nBits; i++) { |
|
|
|
WriteBit((val >> (nBits - i - 1)) & 1); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
void WriteBits(u32 val, u32 nBits) { |
|
|
|
constexpr void WriteBits(u32 val, u32 nBits) { |
|
|
|
for (u32 i = 0; i < nBits; i++) { |
|
|
|
WriteBit((val >> i) & 1); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
private: |
|
|
|
void WriteBit(s32 b) { |
|
|
|
|
|
|
|
if (done) |
|
|
|
constexpr void WriteBit(bool b) { |
|
|
|
if (bits_written >= num_bits) { |
|
|
|
return; |
|
|
|
} |
|
|
|
|
|
|
|
const u32 mask = 1 << m_NextBit++; |
|
|
|
const u32 mask = 1 << next_bit++; |
|
|
|
|
|
|
|
// clear the bit
|
|
|
|
*m_CurByte &= static_cast<u8>(~mask); |
|
|
|
*cur_byte &= static_cast<u8>(~mask); |
|
|
|
|
|
|
|
// Write the bit, if necessary
|
|
|
|
if (b) |
|
|
|
*m_CurByte |= static_cast<u8>(mask); |
|
|
|
*cur_byte |= static_cast<u8>(mask); |
|
|
|
|
|
|
|
// Next byte?
|
|
|
|
if (m_NextBit >= 8) { |
|
|
|
m_CurByte += 1; |
|
|
|
m_NextBit = 0; |
|
|
|
if (next_bit >= 8) { |
|
|
|
cur_byte += 1; |
|
|
|
next_bit = 0; |
|
|
|
} |
|
|
|
|
|
|
|
done = done || ++m_BitsWritten >= m_NumBits; |
|
|
|
} |
|
|
|
|
|
|
|
s32 m_BitsWritten = 0; |
|
|
|
const s32 m_NumBits; |
|
|
|
u8* m_CurByte; |
|
|
|
s32 m_NextBit = 0; |
|
|
|
|
|
|
|
bool done = false; |
|
|
|
u8* cur_byte; |
|
|
|
std::size_t num_bits; |
|
|
|
std::size_t bits_written = 0; |
|
|
|
std::size_t next_bit = 0; |
|
|
|
}; |
|
|
|
|
|
|
|
template <typename IntType> |
|
|
|
@ -195,9 +191,13 @@ struct IntegerEncodedValue { |
|
|
|
u32 trit_value; |
|
|
|
}; |
|
|
|
}; |
|
|
|
using IntegerEncodedVector = boost::container::static_vector< |
|
|
|
IntegerEncodedValue, 64, |
|
|
|
boost::container::static_vector_options< |
|
|
|
boost::container::inplace_alignment<alignof(IntegerEncodedValue)>, |
|
|
|
boost::container::throw_on_overflow<false>>::type>; |
|
|
|
|
|
|
|
static void DecodeTritBlock(InputBitStream& bits, std::vector<IntegerEncodedValue>& result, |
|
|
|
u32 nBitsPerValue) { |
|
|
|
static void DecodeTritBlock(InputBitStream& bits, IntegerEncodedVector& result, u32 nBitsPerValue) { |
|
|
|
// Implement the algorithm in section C.2.12
|
|
|
|
u32 m[5]; |
|
|
|
u32 t[5]; |
|
|
|
@ -255,7 +255,7 @@ static void DecodeTritBlock(InputBitStream& bits, std::vector<IntegerEncodedValu |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
static void DecodeQus32Block(InputBitStream& bits, std::vector<IntegerEncodedValue>& result, |
|
|
|
static void DecodeQus32Block(InputBitStream& bits, IntegerEncodedVector& result, |
|
|
|
u32 nBitsPerValue) { |
|
|
|
// Implement the algorithm in section C.2.12
|
|
|
|
u32 m[3]; |
|
|
|
@ -343,8 +343,8 @@ static constexpr std::array EncodingsValues = MakeEncodedValues(); |
|
|
|
// Fills result with the values that are encoded in the given
|
|
|
|
// bitstream. We must know beforehand what the maximum possible
|
|
|
|
// value is, and how many values we're decoding.
|
|
|
|
static void DecodeIntegerSequence(std::vector<IntegerEncodedValue>& result, InputBitStream& bits, |
|
|
|
u32 maxRange, u32 nValues) { |
|
|
|
static void DecodeIntegerSequence(IntegerEncodedVector& result, InputBitStream& bits, u32 maxRange, |
|
|
|
u32 nValues) { |
|
|
|
// Determine encoding parameters
|
|
|
|
IntegerEncodedValue val = EncodingsValues[maxRange]; |
|
|
|
|
|
|
|
@ -634,12 +634,14 @@ static void FillError(u32* outBuf, u32 blockWidth, u32 blockHeight) { |
|
|
|
// Replicates low numBits such that [(toBit - 1):(toBit - 1 - fromBit)]
|
|
|
|
// is the same as [(numBits - 1):0] and repeats all the way down.
|
|
|
|
template <typename IntType> |
|
|
|
static IntType Replicate(IntType val, u32 numBits, u32 toBit) { |
|
|
|
if (numBits == 0) |
|
|
|
static constexpr IntType Replicate(IntType val, u32 numBits, u32 toBit) { |
|
|
|
if (numBits == 0) { |
|
|
|
return 0; |
|
|
|
if (toBit == 0) |
|
|
|
} |
|
|
|
if (toBit == 0) { |
|
|
|
return 0; |
|
|
|
IntType v = val & static_cast<IntType>((1 << numBits) - 1); |
|
|
|
} |
|
|
|
const IntType v = val & static_cast<IntType>((1 << numBits) - 1); |
|
|
|
IntType res = v; |
|
|
|
u32 reslen = numBits; |
|
|
|
while (reslen < toBit) { |
|
|
|
@ -656,6 +658,89 @@ static IntType Replicate(IntType val, u32 numBits, u32 toBit) { |
|
|
|
return res; |
|
|
|
} |
|
|
|
|
|
|
|
static constexpr std::size_t NumReplicateEntries(u32 num_bits) { |
|
|
|
return std::size_t(1) << num_bits; |
|
|
|
} |
|
|
|
|
|
|
|
template <typename IntType, u32 num_bits, u32 to_bit> |
|
|
|
static constexpr auto MakeReplicateTable() { |
|
|
|
std::array<IntType, NumReplicateEntries(num_bits)> table{}; |
|
|
|
for (IntType value = 0; value < static_cast<IntType>(std::size(table)); ++value) { |
|
|
|
table[value] = Replicate(value, num_bits, to_bit); |
|
|
|
} |
|
|
|
return table; |
|
|
|
} |
|
|
|
|
|
|
|
static constexpr auto REPLICATE_BYTE_TO_16_TABLE = MakeReplicateTable<u32, 8, 16>(); |
|
|
|
static constexpr u32 ReplicateByteTo16(std::size_t value) { |
|
|
|
return REPLICATE_BYTE_TO_16_TABLE[value]; |
|
|
|
} |
|
|
|
|
|
|
|
static constexpr auto REPLICATE_BIT_TO_7_TABLE = MakeReplicateTable<u32, 1, 7>(); |
|
|
|
static constexpr u32 ReplicateBitTo7(std::size_t value) { |
|
|
|
return REPLICATE_BIT_TO_7_TABLE[value]; |
|
|
|
} |
|
|
|
|
|
|
|
static constexpr auto REPLICATE_BIT_TO_9_TABLE = MakeReplicateTable<u32, 1, 9>(); |
|
|
|
static constexpr u32 ReplicateBitTo9(std::size_t value) { |
|
|
|
return REPLICATE_BIT_TO_9_TABLE[value]; |
|
|
|
} |
|
|
|
|
|
|
|
static constexpr auto REPLICATE_1_BIT_TO_8_TABLE = MakeReplicateTable<u32, 1, 8>(); |
|
|
|
static constexpr auto REPLICATE_2_BIT_TO_8_TABLE = MakeReplicateTable<u32, 2, 8>(); |
|
|
|
static constexpr auto REPLICATE_3_BIT_TO_8_TABLE = MakeReplicateTable<u32, 3, 8>(); |
|
|
|
static constexpr auto REPLICATE_4_BIT_TO_8_TABLE = MakeReplicateTable<u32, 4, 8>(); |
|
|
|
static constexpr auto REPLICATE_5_BIT_TO_8_TABLE = MakeReplicateTable<u32, 5, 8>(); |
|
|
|
static constexpr auto REPLICATE_6_BIT_TO_8_TABLE = MakeReplicateTable<u32, 6, 8>(); |
|
|
|
static constexpr auto REPLICATE_7_BIT_TO_8_TABLE = MakeReplicateTable<u32, 7, 8>(); |
|
|
|
static constexpr auto REPLICATE_8_BIT_TO_8_TABLE = MakeReplicateTable<u32, 8, 8>(); |
|
|
|
/// Use a precompiled table with the most common usages, if it's not in the expected range, fallback
|
|
|
|
/// to the runtime implementation
|
|
|
|
static constexpr u32 FastReplicateTo8(u32 value, u32 num_bits) { |
|
|
|
switch (num_bits) { |
|
|
|
case 1: |
|
|
|
return REPLICATE_1_BIT_TO_8_TABLE[value]; |
|
|
|
case 2: |
|
|
|
return REPLICATE_2_BIT_TO_8_TABLE[value]; |
|
|
|
case 3: |
|
|
|
return REPLICATE_3_BIT_TO_8_TABLE[value]; |
|
|
|
case 4: |
|
|
|
return REPLICATE_4_BIT_TO_8_TABLE[value]; |
|
|
|
case 5: |
|
|
|
return REPLICATE_5_BIT_TO_8_TABLE[value]; |
|
|
|
case 6: |
|
|
|
return REPLICATE_6_BIT_TO_8_TABLE[value]; |
|
|
|
case 7: |
|
|
|
return REPLICATE_7_BIT_TO_8_TABLE[value]; |
|
|
|
case 8: |
|
|
|
return REPLICATE_8_BIT_TO_8_TABLE[value]; |
|
|
|
default: |
|
|
|
return Replicate(value, num_bits, 8); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
static constexpr auto REPLICATE_1_BIT_TO_6_TABLE = MakeReplicateTable<u32, 1, 6>(); |
|
|
|
static constexpr auto REPLICATE_2_BIT_TO_6_TABLE = MakeReplicateTable<u32, 2, 6>(); |
|
|
|
static constexpr auto REPLICATE_3_BIT_TO_6_TABLE = MakeReplicateTable<u32, 3, 6>(); |
|
|
|
static constexpr auto REPLICATE_4_BIT_TO_6_TABLE = MakeReplicateTable<u32, 4, 6>(); |
|
|
|
static constexpr auto REPLICATE_5_BIT_TO_6_TABLE = MakeReplicateTable<u32, 5, 6>(); |
|
|
|
static constexpr u32 FastReplicateTo6(u32 value, u32 num_bits) { |
|
|
|
switch (num_bits) { |
|
|
|
case 1: |
|
|
|
return REPLICATE_1_BIT_TO_6_TABLE[value]; |
|
|
|
case 2: |
|
|
|
return REPLICATE_2_BIT_TO_6_TABLE[value]; |
|
|
|
case 3: |
|
|
|
return REPLICATE_3_BIT_TO_6_TABLE[value]; |
|
|
|
case 4: |
|
|
|
return REPLICATE_4_BIT_TO_6_TABLE[value]; |
|
|
|
case 5: |
|
|
|
return REPLICATE_5_BIT_TO_6_TABLE[value]; |
|
|
|
default: |
|
|
|
return Replicate(value, num_bits, 6); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
class Pixel { |
|
|
|
protected: |
|
|
|
using ChannelType = s16; |
|
|
|
@ -674,10 +759,10 @@ public: |
|
|
|
// significant bits when going from larger to smaller bit depth
|
|
|
|
// or by repeating the most significant bits when going from
|
|
|
|
// smaller to larger bit depths.
|
|
|
|
void ChangeBitDepth(const u8 (&depth)[4]) { |
|
|
|
void ChangeBitDepth() { |
|
|
|
for (u32 i = 0; i < 4; i++) { |
|
|
|
Component(i) = ChangeBitDepth(Component(i), m_BitDepth[i], depth[i]); |
|
|
|
m_BitDepth[i] = depth[i]; |
|
|
|
Component(i) = ChangeBitDepth(Component(i), m_BitDepth[i]); |
|
|
|
m_BitDepth[i] = 8; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
@ -689,28 +774,23 @@ public: |
|
|
|
|
|
|
|
// Changes the bit depth of a single component. See the comment
|
|
|
|
// above for how we do this.
|
|
|
|
static ChannelType ChangeBitDepth(Pixel::ChannelType val, u8 oldDepth, u8 newDepth) { |
|
|
|
assert(newDepth <= 8); |
|
|
|
static ChannelType ChangeBitDepth(Pixel::ChannelType val, u8 oldDepth) { |
|
|
|
assert(oldDepth <= 8); |
|
|
|
|
|
|
|
if (oldDepth == newDepth) { |
|
|
|
if (oldDepth == 8) { |
|
|
|
// Do nothing
|
|
|
|
return val; |
|
|
|
} else if (oldDepth == 0 && newDepth != 0) { |
|
|
|
return static_cast<ChannelType>((1 << newDepth) - 1); |
|
|
|
} else if (newDepth > oldDepth) { |
|
|
|
return Replicate(val, oldDepth, newDepth); |
|
|
|
} else if (oldDepth == 0) { |
|
|
|
return static_cast<ChannelType>((1 << 8) - 1); |
|
|
|
} else if (8 > oldDepth) { |
|
|
|
return static_cast<ChannelType>(FastReplicateTo8(static_cast<u32>(val), oldDepth)); |
|
|
|
} else { |
|
|
|
// oldDepth > newDepth
|
|
|
|
if (newDepth == 0) { |
|
|
|
return 0xFF; |
|
|
|
} else { |
|
|
|
u8 bitsWasted = static_cast<u8>(oldDepth - newDepth); |
|
|
|
u16 v = static_cast<u16>(val); |
|
|
|
v = static_cast<u16>((v + (1 << (bitsWasted - 1))) >> bitsWasted); |
|
|
|
v = ::std::min<u16>(::std::max<u16>(0, v), static_cast<u16>((1 << newDepth) - 1)); |
|
|
|
return static_cast<u8>(v); |
|
|
|
} |
|
|
|
const u8 bitsWasted = static_cast<u8>(oldDepth - 8); |
|
|
|
u16 v = static_cast<u16>(val); |
|
|
|
v = static_cast<u16>((v + (1 << (bitsWasted - 1))) >> bitsWasted); |
|
|
|
v = ::std::min<u16>(::std::max<u16>(0, v), static_cast<u16>((1 << 8) - 1)); |
|
|
|
return static_cast<u8>(v); |
|
|
|
} |
|
|
|
|
|
|
|
assert(false && "We shouldn't get here."); |
|
|
|
@ -760,8 +840,7 @@ public: |
|
|
|
// up in the most-significant byte.
|
|
|
|
u32 Pack() const { |
|
|
|
Pixel eightBit(*this); |
|
|
|
const u8 eightBitDepth[4] = {8, 8, 8, 8}; |
|
|
|
eightBit.ChangeBitDepth(eightBitDepth); |
|
|
|
eightBit.ChangeBitDepth(); |
|
|
|
|
|
|
|
u32 r = 0; |
|
|
|
r |= eightBit.A(); |
|
|
|
@ -816,8 +895,7 @@ static void DecodeColorValues(u32* out, u8* data, const u32* modes, const u32 nP |
|
|
|
} |
|
|
|
|
|
|
|
// We now have enough to decode our integer sequence.
|
|
|
|
std::vector<IntegerEncodedValue> decodedColorValues; |
|
|
|
decodedColorValues.reserve(32); |
|
|
|
IntegerEncodedVector decodedColorValues; |
|
|
|
|
|
|
|
InputBitStream colorStream(data); |
|
|
|
DecodeIntegerSequence(decodedColorValues, colorStream, range, nValues); |
|
|
|
@ -839,12 +917,12 @@ static void DecodeColorValues(u32* out, u8* data, const u32* modes, const u32 nP |
|
|
|
|
|
|
|
u32 A = 0, B = 0, C = 0, D = 0; |
|
|
|
// A is just the lsb replicated 9 times.
|
|
|
|
A = Replicate(bitval & 1, 1, 9); |
|
|
|
A = ReplicateBitTo9(bitval & 1); |
|
|
|
|
|
|
|
switch (val.encoding) { |
|
|
|
// Replicate bits
|
|
|
|
case IntegerEncoding::JustBits: |
|
|
|
out[outIdx++] = Replicate(bitval, bitlen, 8); |
|
|
|
out[outIdx++] = FastReplicateTo8(bitval, bitlen); |
|
|
|
break; |
|
|
|
|
|
|
|
// Use algorithm in C.2.13
|
|
|
|
@ -962,13 +1040,13 @@ static u32 UnquantizeTexelWeight(const IntegerEncodedValue& val) { |
|
|
|
u32 bitval = val.bit_value; |
|
|
|
u32 bitlen = val.num_bits; |
|
|
|
|
|
|
|
u32 A = Replicate(bitval & 1, 1, 7); |
|
|
|
u32 A = ReplicateBitTo7(bitval & 1); |
|
|
|
u32 B = 0, C = 0, D = 0; |
|
|
|
|
|
|
|
u32 result = 0; |
|
|
|
switch (val.encoding) { |
|
|
|
case IntegerEncoding::JustBits: |
|
|
|
result = Replicate(bitval, bitlen, 6); |
|
|
|
result = FastReplicateTo6(bitval, bitlen); |
|
|
|
break; |
|
|
|
|
|
|
|
case IntegerEncoding::Trit: { |
|
|
|
@ -1047,7 +1125,7 @@ static u32 UnquantizeTexelWeight(const IntegerEncodedValue& val) { |
|
|
|
return result; |
|
|
|
} |
|
|
|
|
|
|
|
static void UnquantizeTexelWeights(u32 out[2][144], const std::vector<IntegerEncodedValue>& weights, |
|
|
|
static void UnquantizeTexelWeights(u32 out[2][144], const IntegerEncodedVector& weights, |
|
|
|
const TexelWeightParams& params, const u32 blockWidth, |
|
|
|
const u32 blockHeight) { |
|
|
|
u32 weightIdx = 0; |
|
|
|
@ -1545,8 +1623,7 @@ static void DecompressBlock(const u8 inBuf[16], const u32 blockWidth, const u32 |
|
|
|
static_cast<u8>((1 << (weightParams.GetPackedBitSize() % 8)) - 1); |
|
|
|
memset(texelWeightData + clearByteStart, 0, 16 - clearByteStart); |
|
|
|
|
|
|
|
std::vector<IntegerEncodedValue> texelWeightValues; |
|
|
|
texelWeightValues.reserve(64); |
|
|
|
IntegerEncodedVector texelWeightValues; |
|
|
|
|
|
|
|
InputBitStream weightStream(texelWeightData); |
|
|
|
|
|
|
|
@ -1568,9 +1645,9 @@ static void DecompressBlock(const u8 inBuf[16], const u32 blockWidth, const u32 |
|
|
|
Pixel p; |
|
|
|
for (u32 c = 0; c < 4; c++) { |
|
|
|
u32 C0 = endpos32s[partition][0].Component(c); |
|
|
|
C0 = Replicate(C0, 8, 16); |
|
|
|
C0 = ReplicateByteTo16(C0); |
|
|
|
u32 C1 = endpos32s[partition][1].Component(c); |
|
|
|
C1 = Replicate(C1, 8, 16); |
|
|
|
C1 = ReplicateByteTo16(C1); |
|
|
|
|
|
|
|
u32 plane = 0; |
|
|
|
if (weightParams.m_bDualPlane && (((planeIdx + 1) & 3) == c)) { |
|
|
|
|