From a32b97c0ed40ebede48e6510ba26fffe6681b3c4 Mon Sep 17 00:00:00 2001 From: Forrest Mark X Date: Sat, 13 Jun 2026 05:51:18 -0500 Subject: [PATCH] Fixed complier issues Added adaptive batching for the memory copy Fixed issue where the system was reading from memory and then unsiwzzling the texture when the workflow should have been to copy from memory until it finishes and then unswizzle --- src/video_core/texture_cache/texture_cache.h | 49 +++++++++++++++---- .../texture_cache/texture_cache_base.h | 3 ++ 2 files changed, 43 insertions(+), 9 deletions(-) diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 6022dd3db0..7058500b0b 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -1419,17 +1419,40 @@ u32 TextureCache

::GetAdaptiveBatchSize(const PendingUnswizzle& task, size_t q multiplier = 8; } const u32 dynamic_slices = base_slices * multiplier; - return std::min(dynamic_slices, static_cast(texture_slices)); + return (std::min)(dynamic_slices, static_cast(texture_slices)); } if (queue_size > MODERATE_BACKLOG) { const u32 dynamic_slices = base_slices * 2; - return std::min(dynamic_slices, static_cast(texture_slices)); + return (std::min)(dynamic_slices, static_cast(texture_slices)); } return base_slices; } +template +u32 TextureCache

::GetAdaptiveChunkSize(const PendingUnswizzle& task, size_t queue_size) const { + const u32 base_chunk = swizzle_chunk_size; + if (base_chunk == 0) + return 0; + + constexpr size_t LARGE_BACKLOG = 4; + constexpr size_t MODERATE_BACKLOG = 2; + constexpr size_t LARGE_TEXTURE_BYTES = 64_MiB; + constexpr size_t HUGE_TEXTURE_BYTES = 256_MiB; + + if (queue_size > LARGE_BACKLOG) { + u32 multiplier = 4; + if (task.total_size < HUGE_TEXTURE_BYTES) + multiplier = 8; + return (std::min)(base_chunk * multiplier, static_cast(task.total_size)); + } + if (queue_size > MODERATE_BACKLOG) { + return (std::min)(base_chunk * 2, static_cast(task.total_size)); + } + return base_chunk; +} + template void TextureCache

::TickAsyncUnswizzle() { if (unswizzle_queue.empty()) { @@ -1465,9 +1488,12 @@ void TextureCache

::TickAsyncUnswizzle() { const size_t remaining = task.total_size - task.current_offset; size_t copy_amount = 0; - if( swizzle_chunk_size == 0 ) + if (swizzle_chunk_size == 0) { copy_amount = remaining; - else copy_amount = (std::min)(swizzle_chunk_size, remaining); + } else { + const u32 dynamic_chunk = GetAdaptiveChunkSize(task, unswizzle_queue.size()); + copy_amount = std::min(dynamic_chunk, remaining); + } if (remaining > swizzle_chunk_size) { copy_amount = (copy_amount / task.bytes_per_slice) * task.bytes_per_slice; @@ -1480,6 +1506,10 @@ void TextureCache

::TickAsyncUnswizzle() { task.current_offset += copy_amount; } + if (task.current_offset < task.total_size) { + return; + } + const bool is_final_batch = task.current_offset >= task.total_size; const size_t bytes_ready = task.current_offset - task.last_submitted_offset; const u32 complete_slices = static_cast(bytes_ready / task.bytes_per_slice); @@ -1496,12 +1526,13 @@ void TextureCache

::TickAsyncUnswizzle() { const u32 slices_to_process = (std::min)(complete_slices, adaptive_batch); if (whole_texture) { - const auto uploads = FullUploadSwizzles(task.info); - runtime.AccelerateImageUpload(image, task.staging_buffer, FixSmallVectorADL(uploads), - z_start, slices_to_process); - task.last_submitted_offset = task.total_size; + runtime.AccelerateImageUpload(image, task.staging_buffer, + FixSmallVectorADL(FullUploadSwizzles(task.info)), 0, + image.info.size.depth); + task.last_submitted_offset += + (static_cast(image.info.size.depth) * task.bytes_per_slice); } else if (complete_slices >= slices_to_process || (is_final_batch && complete_slices > 0)) { - const u32 z_count = std::min(slices_to_process, task.info.size.depth - z_start); + const u32 z_count = (std::min)(slices_to_process, task.info.size.depth - z_start); if (z_count > 0) { const auto uploads = FullUploadSwizzles(task.info); runtime.AccelerateImageUpload(image, task.staging_buffer, FixSmallVectorADL(uploads), diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index 08cda5dce3..d97f9462ee 100644 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h @@ -419,6 +419,9 @@ private: void QueueAsyncDecode(Image& image, ImageId image_id); void TickAsyncDecode(); u32 GetAdaptiveBatchSize(const PendingUnswizzle& task, size_t queue_size) const; + + u32 GetAdaptiveChunkSize(const PendingUnswizzle &task, size_t queue_size) const; + void EnforceSamplerBudget(); void TrimInactiveSamplers(size_t budget); std::optional QuerySamplerBudget() const;