diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index e32f21d2ce..71210ffe6e 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -70,10 +70,14 @@ TextureCache
::TextureCache(Runtime& runtime_, Tegra::MaxwellDeviceMemoryManag
(std::max)((std::min)(device_local_memory - min_vacancy_critical, min_spacing_critical),
DEFAULT_CRITICAL_MEMORY));
minimum_memory = static_cast ::RunGarbageCollector() {
bool aggressive_mode = false;
u64 ticks_to_destroy = 0;
size_t num_iterations = 0;
+
const auto Configure = [&](bool allow_aggressive) {
high_priority_mode = total_used_memory >= expected_memory;
aggressive_mode = allow_aggressive && total_used_memory >= critical_memory;
ticks_to_destroy = aggressive_mode ? 10ULL : high_priority_mode ? 25ULL : 50ULL;
num_iterations = aggressive_mode ? 40 : (high_priority_mode ? 20 : 10);
};
- const auto Cleanup = [this, &num_iterations, &high_priority_mode, &aggressive_mode](ImageId image_id) {
+
+ const auto Cleanup = [this, &num_iterations, &high_priority_mode,
+ &aggressive_mode](ImageId image_id) {
if (num_iterations == 0) {
return true;
}
--num_iterations;
auto& image = slot_images[image_id];
+
+ // Never delete recently allocated sparse textures (within 3 frames)
+ const bool is_recently_allocated = image.allocation_tick >= frame_tick - 3;
+ if (is_recently_allocated && image.info.is_sparse) {
+ return false;
+ }
+
if (True(image.flags & ImageFlagBits::IsDecoding)) {
+ // This image is still being decoded, deleting it will invalidate the slot
+ // used by the async decoder thread.
+ return false;
+ }
+
+ // Prioritize large sparse textures for cleanup
+ const bool is_large_sparse = lowmemorydevice &&
+ image.info.is_sparse &&
+ image.guest_size_bytes >= 256_MiB;
+
+ if (!aggressive_mode && !is_large_sparse &&
+ True(image.flags & ImageFlagBits::CostlyLoad)) {
return false;
}
- const bool must_download = image.IsSafeDownload() && False(image.flags & ImageFlagBits::BadOverlap);
- if (must_download && !image.info.is_sparse) {
+
+ const bool must_download =
+ image.IsSafeDownload() && False(image.flags & ImageFlagBits::BadOverlap);
+ if (!high_priority_mode && !is_large_sparse && must_download) {
+ return false;
+ }
+
+ if (must_download && !is_large_sparse) {
auto map = runtime.DownloadStagingBuffer(image.unswizzled_size_bytes);
const auto copies = FixSmallVectorADL(FullDownloadCopies(image.info));
image.DownloadMemory(map, copies);
runtime.Finish();
- SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span, swizzle_data_buffer);
+ SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span,
+ swizzle_data_buffer);
}
+
if (True(image.flags & ImageFlagBits::Tracked)) {
UntrackImage(image, image_id);
}
UnregisterImage(image_id);
- DeleteImage(image_id, (frame_tick - image.scale_tick) > 5 || aggressive_mode);
- if (aggressive_mode && total_used_memory < critical_memory) {
- num_iterations >>= 2;
- aggressive_mode = false;
- }
- if (high_priority_mode && total_used_memory < expected_memory) {
- num_iterations >>= 1;
- high_priority_mode = false;
+ DeleteImage(image_id, image.scale_tick > frame_tick + 5);
+
+ if (total_used_memory < critical_memory) {
+ if (aggressive_mode) {
+ // Sink the aggresiveness.
+ num_iterations >>= 2;
+ aggressive_mode = false;
+ return false;
+ }
+ if (high_priority_mode && total_used_memory < expected_memory) {
+ num_iterations >>= 1;
+ high_priority_mode = false;
+ }
}
return false;
};
+
+ // Aggressively clear massive sparse textures
+ if (total_used_memory >= expected_memory) {
+ lru_cache.ForEachItemBelow(frame_tick, [&](ImageId image_id) {
+ auto& image = slot_images[image_id];
+ // Only target sparse textures that are old enough
+ if (lowmemorydevice &&
+ image.info.is_sparse &&
+ image.guest_size_bytes >= 256_MiB &&
+ image.allocation_tick < frame_tick - 3) {
+ LOG_DEBUG(HW_GPU, "GC targeting old sparse texture at 0x{:X} ({} MiB, age: {} frames)",
+ image.gpu_addr, image.guest_size_bytes / (1024 * 1024),
+ frame_tick - image.allocation_tick);
+ return Cleanup(image_id);
+ }
+ return false;
+ });
+ }
+
Configure(false);
lru_cache.ForEachItemBelow(frame_tick - ticks_to_destroy, Cleanup);
+
+ // If pressure is still too high, prune aggressively.
if (total_used_memory >= critical_memory) {
Configure(true);
lru_cache.ForEachItemBelow(frame_tick - ticks_to_destroy, Cleanup);
@@ -1136,6 +1196,9 @@ void TextureCache ::RefreshContents(Image& image, ImageId image_id) {
}
image.flags &= ~ImageFlagBits::CpuModified;
+ if( lowmemorydevice && image.info.format == PixelFormat::BC1_RGBA_UNORM && MapSizeBytes(image) >= 256_MiB ) {
+ return;
+ }
TrackImage(image, image_id);
@@ -1556,6 +1619,39 @@ ImageId TextureCache ::InsertImage(const ImageInfo& info, GPUVAddr gpu_addr,
}
}
ASSERT_MSG(cpu_addr, "Tried to insert an image to an invalid gpu_addr=0x{:x}", gpu_addr);
+
+ // For large sparse textures, aggressively clean up old allocations at same address
+ if (lowmemorydevice && info.is_sparse && CalculateGuestSizeInBytes(info) >= 256_MiB) {
+ const auto alloc_it = image_allocs_table.find(gpu_addr);
+ if (alloc_it != image_allocs_table.end()) {
+ const ImageAllocId alloc_id = alloc_it->second;
+ auto& alloc_images = slot_image_allocs[alloc_id].images;
+
+ // Collect old images at this address that were created more than 2 frames ago
+ boost::container::small_vector ::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, DAddr cpu_addr) {
ImageInfo new_info = info;
const size_t size_bytes = CalculateGuestSizeInBytes(new_info);
+
+ // Proactive cleanup for large sparse texture allocations
+ if (lowmemorydevice && new_info.is_sparse && size_bytes >= 256_MiB) {
+ const u64 estimated_alloc_size = size_bytes;
+
+ if (total_used_memory + estimated_alloc_size >= critical_memory) {
+ LOG_DEBUG(HW_GPU, "Large sparse texture allocation ({} MiB) - running aggressive GC. "
+ "Current memory: {} MiB, Critical: {} MiB",
+ size_bytes / (1024 * 1024),
+ total_used_memory / (1024 * 1024),
+ critical_memory / (1024 * 1024));
+ RunGarbageCollector();
+
+ // If still over threshold after GC, try one more aggressive pass
+ if (total_used_memory + estimated_alloc_size >= critical_memory) {
+ LOG_DEBUG(HW_GPU, "Still critically low on memory, running second GC pass");
+ RunGarbageCollector();
+ }
+ }
+ }
+
const bool broken_views = runtime.HasBrokenTextureViewFormats();
const bool native_bgr = runtime.HasNativeBgr();
join_overlap_ids.clear();
diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h
index 47f52c5c99..4b4061f21d 100644
--- a/src/video_core/texture_cache/texture_cache_base.h
+++ b/src/video_core/texture_cache/texture_cache_base.h
@@ -478,6 +478,7 @@ private:
u64 minimum_memory;
u64 expected_memory;
u64 critical_memory;
+ bool lowmemorydevice = false;
size_t gpu_unswizzle_maxsize = 0;
size_t swizzle_chunk_size = 0;
u32 swizzle_slices_per_batch = 0;