Browse Source

Updated the ASTC decoder

Removed unused leftover code from previous optimization attempts
pull/3246/head
Forrest Keller 3 weeks ago
committed by crueter
parent
commit
af7aec449e
  1. 129
      src/video_core/host_shaders/astc_decoder.comp
  2. 2
      src/video_core/renderer_opengl/gl_texture_cache.h
  3. 4
      src/video_core/texture_cache/texture_cache_base.h
  4. 5
      src/video_core/texture_cache/util.h

129
src/video_core/host_shaders/astc_decoder.comp

@ -727,114 +727,35 @@ void ComputeEndpoints(out uvec4 ep1, out uvec4 ep2, uint color_endpoint_mode, ui
}
uint UnquantizeTexelWeight(EncodingData val) {
const uint encoding = Encoding(val);
const uint bitlen = NumBits(val);
const uint bitval = BitValue(val);
// I probably added this wrong, brain empty atm
uint encoding = Encoding(val), bitlen = NumBits(val), bitval = BitValue(val);
if (encoding == JUST_BITS) {
uint z = bitval;
uint x = bitlen;
switch (bitlen) {
case 1:
return z * 64;
case 2:
case 3:
case 4:
case 5:
return uint(floor(0.5f + float(z) * 64.0f / float((1 << x) - 1)));
default:
return FastReplicateTo6(z, bitlen);
}
}
// Brain screaming at me that division is expensive, probably ultra wrong so have this here incase
/*if (encoding == JUST_BITS) {
uint z = bitval;
switch (bitlen) {
case 1: return z * 64;
case 2: // (z * 64 + 1) / 3
return ((z * 64 + 1) * 43691) >> 17;
case 3: // (z * 64 + 3) / 7
return ((z * 64 + 3) * 37449) >> 18;
case 4: // (z * 64 + 7) / 15
return ((z * 64 + 7) * 34953) >> 19;
case 5: // (z * 64 + 15) / 31
return ((z * 64 + 15) * 33826) >> 20;
default:
return FastReplicateTo6(z, bitlen);
}
}*/
const uint A = ReplicateBitTo7((bitval & 1));
uint B = 0, C = 0, D = 0;
uint result = 0;
switch (encoding) {
case TRIT: {
return (bitlen >= 1 && bitlen <= 5)
? uint(floor(0.5f + float(bitval) * 64.0f / float((1 << bitlen) - 1)))
: FastReplicateTo6(bitval, bitlen);
} else if (encoding == TRIT || encoding == QUINT) {
uint B = 0, C = 0, D = 0;
uint b_mask = (0x3100 >> (bitlen * 4)) & 0xf;
uint b = (bitval >> 1) & b_mask;
D = QuintTritValue(val);
switch (bitlen) {
case 0: {
const uint trit_base[3] = uint[](0, 32, 64);
return trit_base[D];
}
case 1: {
C = 50;
break;
}
case 2: {
C = 23;
const uint b = (bitval >> 1) & 1;
B = (b << 6) | (b << 2) | b;
break;
}
case 3: {
C = 11;
const uint cb = (bitval >> 1) & 3;
B = (cb << 5) | cb;
break;
}
}
break;
}
case QUINT: {
D = QuintTritValue(val);
switch (bitlen) {
case 0: {
const uint quint_base[5] = uint[](0, 16, 32, 48, 64);
return quint_base[D];
}
case 1: {
C = 28;
break;
}
case 2: {
C = 13;
const uint b = (bitval >> 1) & 1;
B = (b << 6) | (b << 1);
break;
}
if (encoding == TRIT) {
switch (bitlen) {
case 0: return D * 32; //0,32,64
case 1: C = 50; break;
case 2: C = 23; B = (b << 6) | (b << 2) | b; break;
case 3: C = 11; B = (b << 5) | b; break;
}
} else if (encoding == QUINT) {
switch (bitlen) {
case 0: return D * 16; //0, 16, 32, 48, 64
case 1: C = 28; break;
case 2: C = 13; B = (b << 6) | (b << 1); break;
}
}
break;
}
uint A = ReplicateBitTo7(bitval & 1);
uint res = (A & 0x20) | (((D * C + B) ^ A) >> 2);
return res + (res > 32 ? 1 : 0);
}
if (bitlen > 0) {
result = D * C + B;
result ^= A;
result = (A & 0x20) | (result >> 2);
}
if (result > 32) {
result += 1;
}
return result;
return 0;
}
void UnquantizeTexelWeights(uvec2 size, bool is_dual_plane) {

2
src/video_core/renderer_opengl/gl_texture_cache.h

@ -124,8 +124,6 @@ public:
std::span<const VideoCommon::SwizzleParameters> swizzles,
u32 z_start, u32 z_count);
void ClearImage(Image& image, u32 clear_value);
void InsertUploadMemoryBarrier();
void TransitionImageLayout(Image& image) {}

4
src/video_core/texture_cache/texture_cache_base.h

@ -328,10 +328,6 @@ private:
/// Refresh the contents (pixel data) of an image
void RefreshContents(Image& image, ImageId image_id);
/// Sparse texture partial upload
template <typename StagingBuffer>
void UploadSparseDirtyTiles(Image& image, StagingBuffer& staging);
/// Upload data from guest to an image
template <typename StagingBuffer>
void UploadImageContents(Image& image, StagingBuffer& staging_buffer);

5
src/video_core/texture_cache/util.h

@ -30,11 +30,6 @@ struct OverlapResult {
SubresourceExtent resources;
};
struct SparseTileUnswizzleResult {
u32 buffer_row_length;
u32 buffer_image_height;
};
[[nodiscard]] u32 CalculateGuestSizeInBytes(const ImageInfo& info) noexcept;
[[nodiscard]] u32 CalculateUnswizzledSizeBytes(const ImageInfo& info) noexcept;

Loading…
Cancel
Save