From af7aec449e2025fd4f07a74b7694b323dd3f8a11 Mon Sep 17 00:00:00 2001 From: Forrest Keller Date: Sat, 10 Jan 2026 19:54:28 -0600 Subject: [PATCH] Updated the ASTC decoder Removed unused leftover code from previous optimization attempts --- src/video_core/host_shaders/astc_decoder.comp | 129 ++++-------------- .../renderer_opengl/gl_texture_cache.h | 2 - .../texture_cache/texture_cache_base.h | 4 - src/video_core/texture_cache/util.h | 5 - 4 files changed, 25 insertions(+), 115 deletions(-) diff --git a/src/video_core/host_shaders/astc_decoder.comp b/src/video_core/host_shaders/astc_decoder.comp index a1ff3323cf..da21b4bde8 100644 --- a/src/video_core/host_shaders/astc_decoder.comp +++ b/src/video_core/host_shaders/astc_decoder.comp @@ -727,114 +727,35 @@ void ComputeEndpoints(out uvec4 ep1, out uvec4 ep2, uint color_endpoint_mode, ui } uint UnquantizeTexelWeight(EncodingData val) { - const uint encoding = Encoding(val); - const uint bitlen = NumBits(val); - const uint bitval = BitValue(val); - - // I probably added this wrong, brain empty atm + uint encoding = Encoding(val), bitlen = NumBits(val), bitval = BitValue(val); if (encoding == JUST_BITS) { - uint z = bitval; - uint x = bitlen; - switch (bitlen) { - case 1: - return z * 64; - case 2: - case 3: - case 4: - case 5: - return uint(floor(0.5f + float(z) * 64.0f / float((1 << x) - 1))); - default: - return FastReplicateTo6(z, bitlen); - } - } - - // Brain screaming at me that division is expensive, probably ultra wrong so have this here incase - /*if (encoding == JUST_BITS) { - uint z = bitval; - switch (bitlen) { - case 1: return z * 64; - - case 2: // (z * 64 + 1) / 3 - return ((z * 64 + 1) * 43691) >> 17; - - case 3: // (z * 64 + 3) / 7 - return ((z * 64 + 3) * 37449) >> 18; - - case 4: // (z * 64 + 7) / 15 - return ((z * 64 + 7) * 34953) >> 19; - - case 5: // (z * 64 + 15) / 31 - return ((z * 64 + 15) * 33826) >> 20; - - default: - return FastReplicateTo6(z, bitlen); - } - }*/ - - const uint A = ReplicateBitTo7((bitval & 1)); - uint B = 0, C = 0, D = 0; - uint result = 0; - - switch (encoding) { - case TRIT: { + return (bitlen >= 1 && bitlen <= 5) + ? uint(floor(0.5f + float(bitval) * 64.0f / float((1 << bitlen) - 1))) + : FastReplicateTo6(bitval, bitlen); + } else if (encoding == TRIT || encoding == QUINT) { + uint B = 0, C = 0, D = 0; + uint b_mask = (0x3100 >> (bitlen * 4)) & 0xf; + uint b = (bitval >> 1) & b_mask; D = QuintTritValue(val); - switch (bitlen) { - case 0: { - const uint trit_base[3] = uint[](0, 32, 64); - return trit_base[D]; - } - case 1: { - C = 50; - break; - } - case 2: { - C = 23; - const uint b = (bitval >> 1) & 1; - B = (b << 6) | (b << 2) | b; - break; - } - case 3: { - C = 11; - const uint cb = (bitval >> 1) & 3; - B = (cb << 5) | cb; - break; - } - } - break; - } - case QUINT: { - D = QuintTritValue(val); - switch (bitlen) { - case 0: { - const uint quint_base[5] = uint[](0, 16, 32, 48, 64); - return quint_base[D]; - } - case 1: { - C = 28; - break; - } - case 2: { - C = 13; - const uint b = (bitval >> 1) & 1; - B = (b << 6) | (b << 1); - break; - } + if (encoding == TRIT) { + switch (bitlen) { + case 0: return D * 32; //0,32,64 + case 1: C = 50; break; + case 2: C = 23; B = (b << 6) | (b << 2) | b; break; + case 3: C = 11; B = (b << 5) | b; break; + } + } else if (encoding == QUINT) { + switch (bitlen) { + case 0: return D * 16; //0, 16, 32, 48, 64 + case 1: C = 28; break; + case 2: C = 13; B = (b << 6) | (b << 1); break; + } } - break; - } + uint A = ReplicateBitTo7(bitval & 1); + uint res = (A & 0x20) | (((D * C + B) ^ A) >> 2); + return res + (res > 32 ? 1 : 0); } - - if (bitlen > 0) { - result = D * C + B; - result ^= A; - result = (A & 0x20) | (result >> 2); - } - - if (result > 32) { - result += 1; - } - - return result; + return 0; } void UnquantizeTexelWeights(uvec2 size, bool is_dual_plane) { diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index bcb559e145..7faee3ed1e 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -124,8 +124,6 @@ public: std::span swizzles, u32 z_start, u32 z_count); - void ClearImage(Image& image, u32 clear_value); - void InsertUploadMemoryBarrier(); void TransitionImageLayout(Image& image) {} diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index 1dc1da1698..97de31d424 100644 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h @@ -328,10 +328,6 @@ private: /// Refresh the contents (pixel data) of an image void RefreshContents(Image& image, ImageId image_id); - /// Sparse texture partial upload - template - void UploadSparseDirtyTiles(Image& image, StagingBuffer& staging); - /// Upload data from guest to an image template void UploadImageContents(Image& image, StagingBuffer& staging_buffer); diff --git a/src/video_core/texture_cache/util.h b/src/video_core/texture_cache/util.h index 51379753b5..3e8bb00032 100644 --- a/src/video_core/texture_cache/util.h +++ b/src/video_core/texture_cache/util.h @@ -30,11 +30,6 @@ struct OverlapResult { SubresourceExtent resources; }; -struct SparseTileUnswizzleResult { - u32 buffer_row_length; - u32 buffer_image_height; -}; - [[nodiscard]] u32 CalculateGuestSizeInBytes(const ImageInfo& info) noexcept; [[nodiscard]] u32 CalculateUnswizzledSizeBytes(const ImageInfo& info) noexcept;