diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index 813b5239dc..91b633a389 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -1397,6 +1397,39 @@ void TextureCache
::TickAsyncDecode() {
}
}
+template
+u32 TextureCache::GetAdaptiveBatchSize(const PendingUnswizzle& task, size_t queue_size) const {
+ const u32 base_slices = swizzle_slices_per_batch;
+ const size_t texture_slices = task.info.size.depth;
+ const size_t texture_bytes = task.total_size;
+
+ constexpr size_t LARGE_BACKLOG = 4;
+ constexpr size_t MODERATE_BACKLOG = 2;
+ constexpr size_t LARGE_TEXTURE_BYTES = 64_MiB;
+ constexpr size_t HUGE_TEXTURE_BYTES = 256_MiB;
+
+ const bool aggressive = queue_size > LARGE_BACKLOG;
+ if (aggressive && texture_bytes < LARGE_TEXTURE_BYTES) {
+ return 0xFFFFFFFF;
+ }
+
+ if (queue_size > LARGE_BACKLOG) {
+ u32 multiplier = 4;
+ if (texture_bytes < HUGE_TEXTURE_BYTES) {
+ multiplier = 8;
+ }
+ const u32 dynamic_slices = base_slices * multiplier;
+ return std::min(dynamic_slices, static_cast(texture_slices));
+ }
+
+ if (queue_size > MODERATE_BACKLOG) {
+ const u32 dynamic_slices = base_slices * 2;
+ return std::min(dynamic_slices, static_cast(texture_slices));
+ }
+
+ return base_slices;
+}
+
template
void TextureCache::TickAsyncUnswizzle() {
if (unswizzle_queue.empty()) {
@@ -1455,15 +1488,26 @@ void TextureCache
::TickAsyncUnswizzle() {
runtime.AccelerateImageUpload(image, task.staging_buffer, FixSmallVectorADL(FullUploadSwizzles(task.info)), 0, image.info.size.depth);
task.last_submitted_offset += (static_cast(image.info.size.depth) * task.bytes_per_slice);
}
- else if (complete_slices >= swizzle_slices_per_batch || (is_final_batch && complete_slices > 0)) {
+ else {
+ const u32 adaptive_batch = GetAdaptiveBatchSize(task, unswizzle_queue.size());
+
+ const bool whole_texture = adaptive_batch == 0xFFFFFFFF;
const u32 z_start = static_cast(task.last_submitted_offset / task.bytes_per_slice);
const u32 slices_to_process = (std::min)(complete_slices, swizzle_slices_per_batch);
- const u32 z_count = (std::min)(slices_to_process, image.info.size.depth - z_start);
- if (z_count > 0) {
+ if (whole_texture) {
const auto uploads = FullUploadSwizzles(task.info);
- runtime.AccelerateImageUpload(image, task.staging_buffer, FixSmallVectorADL(uploads), z_start, z_count);
- task.last_submitted_offset += (static_cast(z_count) * task.bytes_per_slice);
+ runtime.AccelerateImageUpload(image, task.staging_buffer, FixSmallVectorADL(uploads),
+ z_start, slices_to_process);
+ task.last_submitted_offset = task.total_size;
+ } else if (complete_slices >= slices_to_process || (is_final_batch && complete_slices > 0)) {
+ const u32 z_count = std::min(slices_to_process, task.info.size.depth - z_start);
+ if (z_count > 0) {
+ const auto uploads = FullUploadSwizzles(task.info);
+ runtime.AccelerateImageUpload(image, task.staging_buffer, FixSmallVectorADL(uploads),
+ z_start, z_count);
+ task.last_submitted_offset += (static_cast(z_count) * task.bytes_per_slice);
+ }
}
}
diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h
index fbc2bb4cf7..08cda5dce3 100644
--- a/src/video_core/texture_cache/texture_cache_base.h
+++ b/src/video_core/texture_cache/texture_cache_base.h
@@ -418,6 +418,7 @@ private:
void QueueAsyncDecode(Image& image, ImageId image_id);
void TickAsyncDecode();
+ u32 GetAdaptiveBatchSize(const PendingUnswizzle& task, size_t queue_size) const;
void EnforceSamplerBudget();
void TrimInactiveSamplers(size_t budget);
std::optional QuerySamplerBudget() const;
@@ -513,7 +514,6 @@ private:
std::vector> async_decodes;
std::deque unswizzle_queue;
- u8 current_unswizzle_frame;
// Join caching
boost::container::small_vector join_overlap_ids;