From 769edbfea31e0300137bb1214dd2822fceaa09ab Mon Sep 17 00:00:00 2001 From: crueter Date: Tue, 10 Mar 2026 05:44:51 +0100 Subject: [PATCH] [video_core] Revert "Simplify TextureCache GC and remove redundant code" (#3652) (#3704) regr. Steam Deck Please, for the love of God, stop saying "YOLO good to merge" after testers report performance regressions (and promptly get brushed to the side). Seriously, what the hell? This reverts commit f8ea09fa0f06572b32df1ead2fb38a64098e312e. Signed-off-by: crueter Reviewed-on: https://git.eden-emu.dev/eden-emu/eden/pulls/3704 Reviewed-by: Lizzie Reviewed-by: DraVee --- src/video_core/texture_cache/texture_cache.h | 141 ++++++++++++++++-- .../texture_cache/texture_cache_base.h | 1 + 2 files changed, 130 insertions(+), 12 deletions(-) diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index e32f21d2ce..71210ffe6e 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -70,10 +70,14 @@ TextureCache

::TextureCache(Runtime& runtime_, Tegra::MaxwellDeviceMemoryManag (std::max)((std::min)(device_local_memory - min_vacancy_critical, min_spacing_critical), DEFAULT_CRITICAL_MEMORY)); minimum_memory = static_cast((device_local_memory - mem_threshold) / 2); + + lowmemorydevice = false; } else { expected_memory = DEFAULT_EXPECTED_MEMORY + 512_MiB; critical_memory = DEFAULT_CRITICAL_MEMORY + 1_GiB; minimum_memory = 0; + + lowmemorydevice = true; } const bool gpu_unswizzle_enabled = Settings::values.gpu_unswizzle_enabled.GetValue(); @@ -118,46 +122,102 @@ void TextureCache

::RunGarbageCollector() { bool aggressive_mode = false; u64 ticks_to_destroy = 0; size_t num_iterations = 0; + const auto Configure = [&](bool allow_aggressive) { high_priority_mode = total_used_memory >= expected_memory; aggressive_mode = allow_aggressive && total_used_memory >= critical_memory; ticks_to_destroy = aggressive_mode ? 10ULL : high_priority_mode ? 25ULL : 50ULL; num_iterations = aggressive_mode ? 40 : (high_priority_mode ? 20 : 10); }; - const auto Cleanup = [this, &num_iterations, &high_priority_mode, &aggressive_mode](ImageId image_id) { + + const auto Cleanup = [this, &num_iterations, &high_priority_mode, + &aggressive_mode](ImageId image_id) { if (num_iterations == 0) { return true; } --num_iterations; auto& image = slot_images[image_id]; + + // Never delete recently allocated sparse textures (within 3 frames) + const bool is_recently_allocated = image.allocation_tick >= frame_tick - 3; + if (is_recently_allocated && image.info.is_sparse) { + return false; + } + if (True(image.flags & ImageFlagBits::IsDecoding)) { + // This image is still being decoded, deleting it will invalidate the slot + // used by the async decoder thread. + return false; + } + + // Prioritize large sparse textures for cleanup + const bool is_large_sparse = lowmemorydevice && + image.info.is_sparse && + image.guest_size_bytes >= 256_MiB; + + if (!aggressive_mode && !is_large_sparse && + True(image.flags & ImageFlagBits::CostlyLoad)) { return false; } - const bool must_download = image.IsSafeDownload() && False(image.flags & ImageFlagBits::BadOverlap); - if (must_download && !image.info.is_sparse) { + + const bool must_download = + image.IsSafeDownload() && False(image.flags & ImageFlagBits::BadOverlap); + if (!high_priority_mode && !is_large_sparse && must_download) { + return false; + } + + if (must_download && !is_large_sparse) { auto map = runtime.DownloadStagingBuffer(image.unswizzled_size_bytes); const auto copies = FixSmallVectorADL(FullDownloadCopies(image.info)); image.DownloadMemory(map, copies); runtime.Finish(); - SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span, swizzle_data_buffer); + SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span, + swizzle_data_buffer); } + if (True(image.flags & ImageFlagBits::Tracked)) { UntrackImage(image, image_id); } UnregisterImage(image_id); - DeleteImage(image_id, (frame_tick - image.scale_tick) > 5 || aggressive_mode); - if (aggressive_mode && total_used_memory < critical_memory) { - num_iterations >>= 2; - aggressive_mode = false; - } - if (high_priority_mode && total_used_memory < expected_memory) { - num_iterations >>= 1; - high_priority_mode = false; + DeleteImage(image_id, image.scale_tick > frame_tick + 5); + + if (total_used_memory < critical_memory) { + if (aggressive_mode) { + // Sink the aggresiveness. + num_iterations >>= 2; + aggressive_mode = false; + return false; + } + if (high_priority_mode && total_used_memory < expected_memory) { + num_iterations >>= 1; + high_priority_mode = false; + } } return false; }; + + // Aggressively clear massive sparse textures + if (total_used_memory >= expected_memory) { + lru_cache.ForEachItemBelow(frame_tick, [&](ImageId image_id) { + auto& image = slot_images[image_id]; + // Only target sparse textures that are old enough + if (lowmemorydevice && + image.info.is_sparse && + image.guest_size_bytes >= 256_MiB && + image.allocation_tick < frame_tick - 3) { + LOG_DEBUG(HW_GPU, "GC targeting old sparse texture at 0x{:X} ({} MiB, age: {} frames)", + image.gpu_addr, image.guest_size_bytes / (1024 * 1024), + frame_tick - image.allocation_tick); + return Cleanup(image_id); + } + return false; + }); + } + Configure(false); lru_cache.ForEachItemBelow(frame_tick - ticks_to_destroy, Cleanup); + + // If pressure is still too high, prune aggressively. if (total_used_memory >= critical_memory) { Configure(true); lru_cache.ForEachItemBelow(frame_tick - ticks_to_destroy, Cleanup); @@ -1136,6 +1196,9 @@ void TextureCache

::RefreshContents(Image& image, ImageId image_id) { } image.flags &= ~ImageFlagBits::CpuModified; + if( lowmemorydevice && image.info.format == PixelFormat::BC1_RGBA_UNORM && MapSizeBytes(image) >= 256_MiB ) { + return; + } TrackImage(image, image_id); @@ -1556,6 +1619,39 @@ ImageId TextureCache

::InsertImage(const ImageInfo& info, GPUVAddr gpu_addr, } } ASSERT_MSG(cpu_addr, "Tried to insert an image to an invalid gpu_addr=0x{:x}", gpu_addr); + + // For large sparse textures, aggressively clean up old allocations at same address + if (lowmemorydevice && info.is_sparse && CalculateGuestSizeInBytes(info) >= 256_MiB) { + const auto alloc_it = image_allocs_table.find(gpu_addr); + if (alloc_it != image_allocs_table.end()) { + const ImageAllocId alloc_id = alloc_it->second; + auto& alloc_images = slot_image_allocs[alloc_id].images; + + // Collect old images at this address that were created more than 2 frames ago + boost::container::small_vector to_delete; + for (ImageId old_image_id : alloc_images) { + Image& old_image = slot_images[old_image_id]; + if (old_image.info.is_sparse && + old_image.gpu_addr == gpu_addr && + old_image.allocation_tick < frame_tick - 2) { // Try not to delete fresh textures + to_delete.push_back(old_image_id); + } + } + + // Delete old images immediately + for (ImageId old_id : to_delete) { + Image& old_image = slot_images[old_id]; + LOG_DEBUG(HW_GPU, "Immediately deleting old sparse texture at 0x{:X} ({} MiB)", + gpu_addr, old_image.guest_size_bytes / (1024 * 1024)); + if (True(old_image.flags & ImageFlagBits::Tracked)) { + UntrackImage(old_image, old_id); + } + UnregisterImage(old_id); + DeleteImage(old_id, true); + } + } + } + const ImageId image_id = JoinImages(info, gpu_addr, *cpu_addr); const Image& image = slot_images[image_id]; // Using "image.gpu_addr" instead of "gpu_addr" is important because it might be different @@ -1571,6 +1667,27 @@ template ImageId TextureCache

::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, DAddr cpu_addr) { ImageInfo new_info = info; const size_t size_bytes = CalculateGuestSizeInBytes(new_info); + + // Proactive cleanup for large sparse texture allocations + if (lowmemorydevice && new_info.is_sparse && size_bytes >= 256_MiB) { + const u64 estimated_alloc_size = size_bytes; + + if (total_used_memory + estimated_alloc_size >= critical_memory) { + LOG_DEBUG(HW_GPU, "Large sparse texture allocation ({} MiB) - running aggressive GC. " + "Current memory: {} MiB, Critical: {} MiB", + size_bytes / (1024 * 1024), + total_used_memory / (1024 * 1024), + critical_memory / (1024 * 1024)); + RunGarbageCollector(); + + // If still over threshold after GC, try one more aggressive pass + if (total_used_memory + estimated_alloc_size >= critical_memory) { + LOG_DEBUG(HW_GPU, "Still critically low on memory, running second GC pass"); + RunGarbageCollector(); + } + } + } + const bool broken_views = runtime.HasBrokenTextureViewFormats(); const bool native_bgr = runtime.HasNativeBgr(); join_overlap_ids.clear(); diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index 47f52c5c99..4b4061f21d 100644 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h @@ -478,6 +478,7 @@ private: u64 minimum_memory; u64 expected_memory; u64 critical_memory; + bool lowmemorydevice = false; size_t gpu_unswizzle_maxsize = 0; size_t swizzle_chunk_size = 0; u32 swizzle_slices_per_batch = 0;