diff --git a/src/video_core/host_shaders/astc_decoder.comp b/src/video_core/host_shaders/astc_decoder.comp
index 761b62582d..ef3f407c33 100644
--- a/src/video_core/host_shaders/astc_decoder.comp
+++ b/src/video_core/host_shaders/astc_decoder.comp
@@ -62,13 +62,6 @@ const uint encoding_values[22] = uint[](
     (QUINT | (4u << 8u)), (TRIT | (5u << 8u)), (JUST_BITS | (7u << 8u)), (QUINT | (5u << 8u)),
     (TRIT | (6u << 8u)), (JUST_BITS | (8u << 8u)));
     
-// Precomputed weight tables
-const uint WEIGHT_TABLE_1BIT[2] = uint[](0, 64);
-const uint WEIGHT_TABLE_2BIT[4] = uint[](0, 21, 43, 64);
-const uint WEIGHT_TABLE_3BIT[8] = uint[](0, 9, 18, 27, 37, 46, 55, 64);
-const uint WEIGHT_TABLE_4BIT[16] = uint[](0, 4, 8, 12, 17, 21, 25, 29, 35, 39, 43, 47, 52, 56, 60, 64);
-const uint WEIGHT_TABLE_5BIT[32] = uint[](0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, 64);
-
 // Input ASTC texture globals
 int total_bitsread = 0;
 uvec4 local_buff;
@@ -739,13 +732,14 @@ uint UnquantizeTexelWeight(EncodingData val) {
     const uint bitval = BitValue(val);
 
     if (encoding == JUST_BITS) {
+        uint z = bitval;
         switch (bitlen) {
-            case 1: return WEIGHT_TABLE_1BIT[bitval];
-            case 2: return WEIGHT_TABLE_2BIT[bitval];
-            case 3: return WEIGHT_TABLE_3BIT[bitval];
-            case 4: return WEIGHT_TABLE_4BIT[bitval];
-            case 5: return WEIGHT_TABLE_5BIT[bitval];
-            default: return FastReplicateTo6(bitval, bitlen);
+            case 1: return z * 64;
+            case 2: return uint(floor(float(z) * 21.5f));
+            case 3: return uint(floor(float(z) * 9.25f));
+            case 4: return uint(floor(float(z) * 4.125f));
+            case 5: return uint(floor(float(z) * 2.0625f));
+            default: return FastReplicateTo6(z, bitlen);
         }
     }
     
diff --git a/src/video_core/host_shaders/block_linear_unswizzle_3d_bcn.comp b/src/video_core/host_shaders/block_linear_unswizzle_3d_bcn.comp
index 455e99e019..a25eb52327 100644
--- a/src/video_core/host_shaders/block_linear_unswizzle_3d_bcn.comp
+++ b/src/video_core/host_shaders/block_linear_unswizzle_3d_bcn.comp
@@ -152,23 +152,12 @@ void main() {
     uint block_index = block_coord.x +
                        (block_coord.y * pc.blocks_dim.x) +
                        (block_coord.z * pc.blocks_dim.x * pc.blocks_dim.y);
+    uint out_idx = block_index * (bytes_per_block >> 2u);
 
-    if (bytes_per_block == 16u) {
-        // BC6H/BC7
-        uvec4 out_data[1];
-        out_data[0] = texel;
-        out_u32[block_index * 4u] = texel.x;
-        out_u32[block_index * 4u + 1u] = texel.y;
-        out_u32[block_index * 4u + 2u] = texel.z;
-        out_u32[block_index * 4u + 3u] = texel.w;
-    } else if (bytes_per_block == 8u) {
-        // BC1/BC4
-        uint out_idx = block_index * 2u;
-        out_u32[out_idx] = texel.x;
-        out_u32[out_idx + 1u] = texel.y;
-    } else {
-        uint out_idx = block_index * (bytes_per_block >> 2u);
-        out_u32[out_idx] = texel.x;
-        if (bytes_per_block > 4u) out_u32[out_idx + 1u] = texel.y;
+    out_u32[out_idx]     = texel.x;
+    out_u32[out_idx + 1u] = texel.y;
+    if (pc.bytes_per_block_log2 == 4u) {
+        out_u32[out_idx + 2u] = texel.z;
+        out_u32[out_idx + 3u] = texel.w;
     }
 }
\ No newline at end of file
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp
index bc5ab23d06..1874d4002c 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp
+++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp
@@ -756,8 +756,6 @@ void BlockLinearUnswizzle3DPass::Unswizzle(
 {
     using namespace VideoCommon::Accelerated;
     
-    // Leaving this here incase instances are found where slices_needed causes device loss
-    // Tune this for a balance between speed and size, I don't own a deck so can't self tune it
     const u32 MAX_BATCH_SLICES = std::min(z_count, image.info.size.depth);
     
     if (!image.has_compute_unswizzle_buffer) {
@@ -874,9 +872,10 @@ void BlockLinearUnswizzle3DPass::UnswizzleChunk(
             .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
             .pNext = nullptr,
             .srcAccessMask = is_first_chunk ? VkAccessFlags{} : 
-                            static_cast<VkAccessFlags>(VK_ACCESS_SHADER_READ_BIT),
+                            static_cast<VkAccessFlags>(VK_ACCESS_TRANSFER_WRITE_BIT),
             .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
-            .oldLayout = is_first_chunk ? VK_IMAGE_LAYOUT_UNDEFINED : VK_IMAGE_LAYOUT_GENERAL,
+            .oldLayout = is_first_chunk ? VK_IMAGE_LAYOUT_UNDEFINED : 
+                        VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
             .newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
             .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
             .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index 74a42e5c87..74edeec3e2 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -1510,14 +1510,15 @@ void TextureCache<P>::TickAsyncUnswizzle() {
         task.current_offset += copy_amount;
     }
     
+    const bool is_final_batch = task.current_offset >= task.total_size;
     const size_t bytes_ready = task.current_offset - task.last_submitted_offset;
     const u32 complete_slices = static_cast<u32>(bytes_ready / task.bytes_per_slice);
-    const bool is_final_batch = task.current_offset >= task.total_size;
     
     if (complete_slices >= SLICES_PER_BATCH || (is_final_batch && complete_slices > 0)) {
         const u32 z_start = static_cast<u32>(task.last_submitted_offset / task.bytes_per_slice);
-        const u32 z_count = std::min(complete_slices, image.info.size.depth - z_start);
-        
+        const u32 slices_to_process = std::min(complete_slices, SLICES_PER_BATCH);
+        const u32 z_count = std::min(slices_to_process, image.info.size.depth - z_start);
+
         if (z_count > 0) {
             const auto uploads = FullUploadSwizzles(task.info);
             runtime.AccelerateImageUpload(image, task.staging_buffer, FixSmallVectorADL(uploads), z_start, z_count);
@@ -1526,7 +1527,10 @@ void TextureCache<P>::TickAsyncUnswizzle() {
     }
     
     // Check if complete
-    if (is_final_batch && task.last_submitted_offset >= task.total_size) {
+    const u32 slices_submitted = static_cast<u32>(task.last_submitted_offset / task.bytes_per_slice);
+    const bool all_slices_submitted = slices_submitted >= image.info.size.depth;
+    
+    if (is_final_batch && all_slices_submitted) {
         runtime.FreeDeferredStagingBuffer(task.staging_buffer);
         image.flags &= ~ImageFlagBits::IsDecoding;
         unswizzle_queue.pop_front();