|
|
|
@ -83,6 +83,12 @@ int result_index = 0; |
|
|
|
uint result_vector_max_index; |
|
|
|
bool result_limit_reached = false; |
|
|
|
|
|
|
|
// avoid intermediate result_vector storage during color decode phase |
|
|
|
bool write_color_values = false; |
|
|
|
uint color_values_direct[32]; |
|
|
|
uint color_out_index = 0; |
|
|
|
uint color_num_values = 0; |
|
|
|
|
|
|
|
// EncodingData helpers |
|
|
|
uint Encoding(EncodingData val) { |
|
|
|
return bitfieldExtract(val.data, 0, 8); |
|
|
|
@ -114,9 +120,110 @@ EncodingData CreateEncodingData(uint encoding, uint num_bits, uint bit_val, uint |
|
|
|
return EncodingData(((encoding) << 0u) | ((num_bits) << 8u) | |
|
|
|
((bit_val) << 16u) | ((quint_trit_val) << 24u)); |
|
|
|
} |
|
|
|
uint ReplicateBitTo9(uint bit); |
|
|
|
uint FastReplicateTo8(uint value, uint num_bits); |
|
|
|
|
|
|
|
void EmitColorValue(EncodingData val) { |
|
|
|
// write directly to color_values_direct[] |
|
|
|
const uint encoding = Encoding(val); |
|
|
|
const uint bitlen = NumBits(val); |
|
|
|
const uint bitval = BitValue(val); |
|
|
|
|
|
|
|
if (encoding == JUST_BITS) { |
|
|
|
color_values_direct[++color_out_index] = FastReplicateTo8(bitval, bitlen); |
|
|
|
return; |
|
|
|
} |
|
|
|
|
|
|
|
uint A = ReplicateBitTo9((bitval & 1)); |
|
|
|
uint B = 0, C = 0, D = QuintTritValue(val); |
|
|
|
|
|
|
|
if (encoding == TRIT) { |
|
|
|
switch (bitlen) { |
|
|
|
case 1: |
|
|
|
C = 204; |
|
|
|
break; |
|
|
|
case 2: { |
|
|
|
C = 93; |
|
|
|
const uint b = (bitval >> 1) & 1; |
|
|
|
B = (b << 8) | (b << 4) | (b << 2) | (b << 1); |
|
|
|
break; |
|
|
|
} |
|
|
|
case 3: { |
|
|
|
C = 44; |
|
|
|
const uint cb = (bitval >> 1) & 3; |
|
|
|
B = (cb << 7) | (cb << 2) | cb; |
|
|
|
break; |
|
|
|
} |
|
|
|
case 4: { |
|
|
|
C = 22; |
|
|
|
const uint dcb = (bitval >> 1) & 7; |
|
|
|
B = (dcb << 6) | dcb; |
|
|
|
break; |
|
|
|
} |
|
|
|
case 5: { |
|
|
|
C = 11; |
|
|
|
const uint edcb = (bitval >> 1) & 0xF; |
|
|
|
B = (edcb << 5) | (edcb >> 2); |
|
|
|
break; |
|
|
|
} |
|
|
|
case 6: { |
|
|
|
C = 5; |
|
|
|
const uint fedcb = (bitval >> 1) & 0x1F; |
|
|
|
B = (fedcb << 4) | (fedcb >> 4); |
|
|
|
break; |
|
|
|
} |
|
|
|
} |
|
|
|
} else { // QUINT |
|
|
|
switch (bitlen) { |
|
|
|
case 1: |
|
|
|
C = 113; |
|
|
|
break; |
|
|
|
case 2: { |
|
|
|
C = 54; |
|
|
|
const uint b = (bitval >> 1) & 1; |
|
|
|
B = (b << 8) | (b << 3) | (b << 2); |
|
|
|
break; |
|
|
|
} |
|
|
|
case 3: { |
|
|
|
C = 26; |
|
|
|
const uint cb = (bitval >> 1) & 3; |
|
|
|
B = (cb << 7) | (cb << 1) | (cb >> 1); |
|
|
|
break; |
|
|
|
} |
|
|
|
case 4: { |
|
|
|
C = 13; |
|
|
|
const uint dcb = (bitval >> 1) & 7; |
|
|
|
B = (dcb << 6) | (dcb >> 1); |
|
|
|
break; |
|
|
|
} |
|
|
|
case 5: { |
|
|
|
C = 6; |
|
|
|
const uint edcb = (bitval >> 1) & 0xF; |
|
|
|
B = (edcb << 5) | (edcb >> 3); |
|
|
|
break; |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
uint T = (D * C) + B; |
|
|
|
T ^= A; |
|
|
|
T = (A & 0x80) | (T >> 2); |
|
|
|
color_values_direct[++color_out_index] = T; |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
void ResultEmplaceBack(EncodingData val) { |
|
|
|
if (write_color_values) { |
|
|
|
if (color_out_index >= color_num_values) { |
|
|
|
// avoid decoding more than needed by this phase |
|
|
|
result_limit_reached = true; |
|
|
|
return; |
|
|
|
} |
|
|
|
EmitColorValue(val); |
|
|
|
return; |
|
|
|
} |
|
|
|
|
|
|
|
if (result_index >= result_vector_max_index) { |
|
|
|
// Alert callers to avoid decoding more than needed by this phase |
|
|
|
result_limit_reached = true; |
|
|
|
@ -457,7 +564,7 @@ void DecodeIntegerSequence(uint max_range, uint num_values) { |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
void DecodeColorValues(uvec4 modes, uint num_partitions, uint color_data_bits, out uint color_values[32]) { |
|
|
|
void DecodeColorValues(uvec4 modes, uint num_partitions, uint color_data_bits) { |
|
|
|
uint num_values = 0; |
|
|
|
for (uint i = 0; i < num_partitions; i++) { |
|
|
|
num_values += ((modes[i] >> 2) + 1) << 1; |
|
|
|
@ -471,104 +578,21 @@ void DecodeColorValues(uvec4 modes, uint num_partitions, uint color_data_bits, o |
|
|
|
break; |
|
|
|
} |
|
|
|
} |
|
|
|
DecodeIntegerSequence(range - 1, num_values); |
|
|
|
uint out_index = 0; |
|
|
|
for (int itr = 0; itr < result_index; ++itr) { |
|
|
|
if (out_index >= num_values) { |
|
|
|
break; |
|
|
|
} |
|
|
|
const EncodingData val = GetEncodingFromVector(itr); |
|
|
|
const uint encoding = Encoding(val); |
|
|
|
const uint bitlen = NumBits(val); |
|
|
|
const uint bitval = BitValue(val); |
|
|
|
uint A = 0, B = 0, C = 0, D = 0; |
|
|
|
A = ReplicateBitTo9((bitval & 1)); |
|
|
|
switch (encoding) { |
|
|
|
case JUST_BITS: |
|
|
|
color_values[++out_index] = FastReplicateTo8(bitval, bitlen); |
|
|
|
break; |
|
|
|
case TRIT: { |
|
|
|
D = QuintTritValue(val); |
|
|
|
switch (bitlen) { |
|
|
|
case 1: |
|
|
|
C = 204; |
|
|
|
break; |
|
|
|
case 2: { |
|
|
|
C = 93; |
|
|
|
const uint b = (bitval >> 1) & 1; |
|
|
|
B = (b << 8) | (b << 4) | (b << 2) | (b << 1); |
|
|
|
break; |
|
|
|
} |
|
|
|
case 3: { |
|
|
|
C = 44; |
|
|
|
const uint cb = (bitval >> 1) & 3; |
|
|
|
B = (cb << 7) | (cb << 2) | cb; |
|
|
|
break; |
|
|
|
} |
|
|
|
case 4: { |
|
|
|
C = 22; |
|
|
|
const uint dcb = (bitval >> 1) & 7; |
|
|
|
B = (dcb << 6) | dcb; |
|
|
|
break; |
|
|
|
} |
|
|
|
case 5: { |
|
|
|
C = 11; |
|
|
|
const uint edcb = (bitval >> 1) & 0xF; |
|
|
|
B = (edcb << 5) | (edcb >> 2); |
|
|
|
break; |
|
|
|
} |
|
|
|
case 6: { |
|
|
|
C = 5; |
|
|
|
const uint fedcb = (bitval >> 1) & 0x1F; |
|
|
|
B = (fedcb << 4) | (fedcb >> 4); |
|
|
|
break; |
|
|
|
} |
|
|
|
} |
|
|
|
break; |
|
|
|
} |
|
|
|
case QUINT: { |
|
|
|
D = QuintTritValue(val); |
|
|
|
switch (bitlen) { |
|
|
|
case 1: |
|
|
|
C = 113; |
|
|
|
break; |
|
|
|
case 2: { |
|
|
|
C = 54; |
|
|
|
const uint b = (bitval >> 1) & 1; |
|
|
|
B = (b << 8) | (b << 3) | (b << 2); |
|
|
|
break; |
|
|
|
} |
|
|
|
case 3: { |
|
|
|
C = 26; |
|
|
|
const uint cb = (bitval >> 1) & 3; |
|
|
|
B = (cb << 7) | (cb << 1) | (cb >> 1); |
|
|
|
break; |
|
|
|
} |
|
|
|
case 4: { |
|
|
|
C = 13; |
|
|
|
const uint dcb = (bitval >> 1) & 7; |
|
|
|
B = (dcb << 6) | (dcb >> 1); |
|
|
|
break; |
|
|
|
} |
|
|
|
case 5: { |
|
|
|
C = 6; |
|
|
|
const uint edcb = (bitval >> 1) & 0xF; |
|
|
|
B = (edcb << 5) | (edcb >> 3); |
|
|
|
break; |
|
|
|
} |
|
|
|
} |
|
|
|
break; |
|
|
|
} |
|
|
|
} |
|
|
|
if (encoding != JUST_BITS) { |
|
|
|
uint T = (D * C) + B; |
|
|
|
T ^= A; |
|
|
|
T = (A & 0x80) | (T >> 2); |
|
|
|
color_values[++out_index] = T; |
|
|
|
} |
|
|
|
// Decode directly into color_values_direct[] |
|
|
|
write_color_values = true; |
|
|
|
color_out_index = 0; |
|
|
|
color_num_values = num_values; |
|
|
|
for (uint i = 0; i < 32; ++i) { |
|
|
|
color_values_direct[i] = 0; |
|
|
|
} |
|
|
|
|
|
|
|
DecodeIntegerSequence(range - 1, num_values); |
|
|
|
|
|
|
|
write_color_values = false; |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
ivec2 BitTransferSigned(int a, int b) { |
|
|
|
ivec2 transferred; |
|
|
|
transferred.y = b >> 1; |
|
|
|
@ -1069,13 +1093,12 @@ void DecompressBlock(ivec3 coord) { |
|
|
|
uvec4 endpoints0[4]; |
|
|
|
uvec4 endpoints1[4]; |
|
|
|
{ |
|
|
|
// This decode phase should at most push 32 elements into the vector |
|
|
|
result_vector_max_index = 32; |
|
|
|
uint color_values[32]; |
|
|
|
// Decode directly into color_values_direct[] (no intermediate result_vector storage) |
|
|
|
result_limit_reached = false; |
|
|
|
uint colvals_index = 0; |
|
|
|
DecodeColorValues(color_endpoint_mode, num_partitions, color_data_bits, color_values); |
|
|
|
DecodeColorValues(color_endpoint_mode, num_partitions, color_data_bits); |
|
|
|
for (uint i = 0; i < num_partitions; i++) { |
|
|
|
ComputeEndpoints(endpoints0[i], endpoints1[i], color_endpoint_mode[i], color_values, |
|
|
|
ComputeEndpoints(endpoints0[i], endpoints1[i], color_endpoint_mode[i], color_values_direct, |
|
|
|
colvals_index); |
|
|
|
} |
|
|
|
} |
|
|
|
|