From 080af4f7e3ce821c59b0626764f954662d359683 Mon Sep 17 00:00:00 2001 From: Forrest Keller Date: Sat, 10 Jan 2026 05:58:32 -0600 Subject: [PATCH] Remove whitespaces --- src/common/settings.h | 4 +- .../renderer_vulkan/vk_compute_pass.cpp | 54 +++++----- .../renderer_vulkan/vk_compute_pass.h | 4 +- .../renderer_vulkan/vk_scheduler.cpp | 2 +- .../renderer_vulkan/vk_texture_cache.cpp | 14 +-- .../renderer_vulkan/vk_texture_cache.h | 10 +- src/video_core/texture_cache/image_base.h | 2 +- src/video_core/texture_cache/texture_cache.h | 102 +++++++++--------- .../texture_cache/texture_cache_base.h | 4 +- src/video_core/texture_cache/util.h | 2 +- 10 files changed, 99 insertions(+), 99 deletions(-) diff --git a/src/common/settings.h b/src/common/settings.h index 60c19997dd..3f0a03fba2 100644 --- a/src/common/settings.h +++ b/src/common/settings.h @@ -512,13 +512,13 @@ struct Values { SwitchableSetting use_asynchronous_shaders{linkage, false, "use_asynchronous_shaders", Category::RendererHacks}; - + SwitchableSetting gpu_unzwizzle_stream_size{linkage, GpuUnswizzle::Medium, "gpu_unzwizzle_stream_size", Category::RendererHacks, Specialization::Default}; - + SwitchableSetting gpu_unzwizzle_chunk_size{linkage, GpuUnswizzleChunk::Medium, "gpu_unzwizzle_chunk_size", diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp index 1874d4002c..e49b333528 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp @@ -711,19 +711,19 @@ constexpr std::array struct alignas(16) BlockLinearUnswizzle3DPushConstants { u32 blocks_dim[3]; // Offset 0 u32 bytes_per_block_log2; // Offset 12 - + u32 origin[3]; // Offset 16 u32 slice_size; // Offset 28 - + u32 block_size; // Offset 32 u32 x_shift; // Offset 36 u32 block_height; // Offset 40 u32 block_height_mask; // Offset 44 - + u32 block_depth; // Offset 48 u32 block_depth_mask; // Offset 52 s32 _pad; // Offset 56 - + s32 destination[3]; // Offset 60 s32 _pad_end; // Offset 72 }; @@ -755,9 +755,9 @@ void BlockLinearUnswizzle3DPass::Unswizzle( u32 z_start, u32 z_count) { using namespace VideoCommon::Accelerated; - + const u32 MAX_BATCH_SLICES = std::min(z_count, image.info.size.depth); - + if (!image.has_compute_unswizzle_buffer) { // Allocate exactly what this batch needs image.AllocateComputeUnswizzleBuffer(MAX_BATCH_SLICES); @@ -769,12 +769,12 @@ void BlockLinearUnswizzle3DPass::Unswizzle( const u32 blocks_x = (image.info.size.width + 3) / 4; const u32 blocks_y = (image.info.size.height + 3) / 4; - + scheduler.RequestOutsideRenderPassOperationContext(); for (u32 z_offset = 0; z_offset < z_count; z_offset += MAX_BATCH_SLICES) { const u32 current_chunk_slices = std::min(MAX_BATCH_SLICES, z_count - z_offset); const u32 current_z_start = z_start + z_offset; - + UnswizzleChunk(image, swizzled, sw, params, blocks_x, blocks_y, current_z_start, current_chunk_slices); } @@ -811,12 +811,12 @@ void BlockLinearUnswizzle3DPass::UnswizzleChunk( pc.blocks_dim[2] = z_count; // Only process the count compute_pass_descriptor_queue.Acquire(); - compute_pass_descriptor_queue.AddBuffer(*image.runtime->swizzle_table_buffer, 0, + compute_pass_descriptor_queue.AddBuffer(*image.runtime->swizzle_table_buffer, 0, image.runtime->swizzle_table_size); - compute_pass_descriptor_queue.AddBuffer(swizzled.buffer, - sw.buffer_offset + swizzled.offset, + compute_pass_descriptor_queue.AddBuffer(swizzled.buffer, + sw.buffer_offset + swizzled.offset, image.guest_size_bytes - sw.buffer_offset); - compute_pass_descriptor_queue.AddBuffer(*image.compute_unswizzle_buffer, 0, + compute_pass_descriptor_queue.AddBuffer(*image.compute_unswizzle_buffer, 0, image.compute_unswizzle_buffer_size); const void* descriptor_data = compute_pass_descriptor_queue.UpdateData(); @@ -825,12 +825,12 @@ void BlockLinearUnswizzle3DPass::UnswizzleChunk( const u32 gx = Common::DivCeil(blocks_x, 8u); const u32 gy = Common::DivCeil(blocks_y, 8u); const u32 gz = Common::DivCeil(z_count, 4u); - + const u32 bytes_per_block = 1u << pc.bytes_per_block_log2; const VkDeviceSize output_slice_size = static_cast(blocks_x) * blocks_y * bytes_per_block; const VkDeviceSize barrier_size = output_slice_size * z_count; - + const bool is_first_chunk = (z_start == 0); const VkBuffer out_buffer = *image.compute_unswizzle_buffer; @@ -843,11 +843,11 @@ void BlockLinearUnswizzle3DPass::UnswizzleChunk( barrier_size, is_first_chunk, out_buffer, dst_image, aspect, image_width, image_height ](vk::CommandBuffer cmdbuf) { - + if (dst_image == VK_NULL_HANDLE || out_buffer == VK_NULL_HANDLE) { return; } - + device.GetLogical().UpdateDescriptorSet(set, *descriptor_template, descriptor_data); cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline); cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *layout, 0, set, {}); @@ -866,15 +866,15 @@ void BlockLinearUnswizzle3DPass::UnswizzleChunk( .offset = 0, .size = barrier_size, }; - + // Image layout transition const VkImageMemoryBarrier pre_barrier{ .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, .pNext = nullptr, - .srcAccessMask = is_first_chunk ? VkAccessFlags{} : + .srcAccessMask = is_first_chunk ? VkAccessFlags{} : static_cast(VK_ACCESS_TRANSFER_WRITE_BIT), .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, - .oldLayout = is_first_chunk ? VK_IMAGE_LAYOUT_UNDEFINED : + .oldLayout = is_first_chunk ? VK_IMAGE_LAYOUT_UNDEFINED : VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, .newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, @@ -882,12 +882,12 @@ void BlockLinearUnswizzle3DPass::UnswizzleChunk( .image = dst_image, .subresourceRange = {aspect, 0, 1, 0, 1}, }; - + // Single barrier handles both buffer and image cmdbuf.PipelineBarrier( - VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, - VK_PIPELINE_STAGE_TRANSFER_BIT, - 0, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + VK_PIPELINE_STAGE_TRANSFER_BIT, + 0, nullptr, buffer_barrier, pre_barrier ); @@ -900,7 +900,7 @@ void BlockLinearUnswizzle3DPass::UnswizzleChunk( .imageOffset = {0, 0, static_cast(z_start)}, // Write to correct Z .imageExtent = {image_width, image_height, z_count}, }; - cmdbuf.CopyBufferToImage(out_buffer, dst_image, + cmdbuf.CopyBufferToImage(out_buffer, dst_image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, copy); // Post-copy transition @@ -918,9 +918,9 @@ void BlockLinearUnswizzle3DPass::UnswizzleChunk( }; cmdbuf.PipelineBarrier( - VK_PIPELINE_STAGE_TRANSFER_BIT, - VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, - 0, + VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + 0, nullptr, nullptr, post_barrier ); }); diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.h b/src/video_core/renderer_vulkan/vk_compute_pass.h index 3edcf020a2..0e5badce01 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.h +++ b/src/video_core/renderer_vulkan/vk_compute_pass.h @@ -144,12 +144,12 @@ public: StagingBufferPool& staging_buffer_pool_, ComputePassDescriptorQueue& compute_pass_descriptor_queue_); ~BlockLinearUnswizzle3DPass(); - + void Unswizzle(Image& image, const StagingBufferRef& swizzled, std::span swizzles, u32 z_start, u32 z_count); - + void UnswizzleChunk( Image& image, const StagingBufferRef& swizzled, diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index 8eb232af03..3aab4ba276 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp @@ -43,7 +43,7 @@ Scheduler::Scheduler(const Device& device_, StateTracker& state_tracker_) : device{device_}, state_tracker{state_tracker_}, master_semaphore{std::make_unique(device)}, command_pool{std::make_unique(*master_semaphore, device)} { - + // PRE-OPTIMIZATION: Warm up the pool to prevent mid-frame spikes { std::scoped_lock rl{reserve_mutex}; diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 4f24cc13cd..c6e239fbd5 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -880,14 +880,14 @@ TextureCacheRuntime::TextureCacheRuntime(const Device& device_, Scheduler& sched } } } - + bl3d_unswizzle_pass.emplace(device, scheduler, descriptor_pool, staging_buffer_pool, compute_pass_descriptor_queue); // --- Create swizzle table buffer --- { auto table = Tegra::Texture::MakeSwizzleTable(); - + swizzle_table_size = static_cast(table.size() * sizeof(table[0])); auto staging = staging_buffer_pool.Request(swizzle_table_size, MemoryUsage::Upload); @@ -896,19 +896,19 @@ TextureCacheRuntime::TextureCacheRuntime(const Device& device_, Scheduler& sched VkBufferCreateInfo ci{ .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, .size = swizzle_table_size, - .usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | - VK_BUFFER_USAGE_TRANSFER_DST_BIT | + .usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | + VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT, .sharingMode = VK_SHARING_MODE_EXCLUSIVE, }; swizzle_table_buffer = memory_allocator.CreateBuffer(ci, MemoryUsage::DeviceLocal); scheduler.RequestOutsideRenderPassOperationContext(); - scheduler.Record([staging_buf = staging.buffer, - dst_buf = *swizzle_table_buffer, + scheduler.Record([staging_buf = staging.buffer, + dst_buf = *swizzle_table_buffer, size = swizzle_table_size, src_off = staging.offset](vk::CommandBuffer cmdbuf) { - + const VkBufferCopy region{ .srcOffset = src_off, .dstOffset = 0, diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index cc8b888d57..09a1df446f 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h @@ -81,7 +81,7 @@ public: void ReinterpretImage(Image& dst, Image& src, std::span copies); void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view); - + bool IsSteamDeck() const; bool CanAccelerateImageUpload(Image&) const noexcept { @@ -130,7 +130,7 @@ public: BlitImageHelper& blit_image_helper; RenderPassCache& render_pass_cache; std::optional astc_decoder_pass; - + std::optional bl3d_unswizzle_pass; vk::Buffer swizzle_table_buffer; VkDeviceSize swizzle_table_size = 0; @@ -171,7 +171,7 @@ public: void DownloadMemory(const StagingBufferRef& map, std::span copies); - + void AllocateComputeUnswizzleImage(); [[nodiscard]] VkImage Handle() const noexcept { @@ -200,7 +200,7 @@ public: bool ScaleDown(bool ignore = false); u64 allocation_tick; - + friend class BlockLinearUnswizzle3DPass; private: @@ -213,7 +213,7 @@ private: vk::Image original_image; vk::Image scaled_image; - + vk::Buffer compute_unswizzle_buffer; VkDeviceSize compute_unswizzle_buffer_size = 0; bool has_compute_unswizzle_buffer = false; diff --git a/src/video_core/texture_cache/image_base.h b/src/video_core/texture_cache/image_base.h index 78b49e8610..cb51ddc3e7 100644 --- a/src/video_core/texture_cache/image_base.h +++ b/src/video_core/texture_cache/image_base.h @@ -125,7 +125,7 @@ struct ImageBase { std::vector aliased_images; std::vector overlapping_images; ImageMapId map_view_id{}; - + boost::container::small_vector dirty_offsets; std::unordered_map sparse_bindings; u32 sparse_tile_size = 65536; diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index ca746de000..cad8accc10 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -76,7 +76,7 @@ TextureCache

::TextureCache(Runtime& runtime_, Tegra::MaxwellDeviceMemoryManag expected_memory = DEFAULT_EXPECTED_MEMORY + 512_MiB; critical_memory = DEFAULT_CRITICAL_MEMORY + 1_GiB; minimum_memory = 0; - + lowmemorydevice = true; } } @@ -94,7 +94,7 @@ void TextureCache

::RunGarbageCollector() { ticks_to_destroy = aggressive_mode ? 10ULL : high_priority_mode ? 25ULL : 50ULL; num_iterations = aggressive_mode ? 40 : (high_priority_mode ? 20 : 10); }; - + const auto Cleanup = [this, &num_iterations, &high_priority_mode, &aggressive_mode](ImageId image_id) { if (num_iterations == 0) { @@ -102,34 +102,34 @@ void TextureCache

::RunGarbageCollector() { } --num_iterations; auto& image = slot_images[image_id]; - + // Never delete recently allocated sparse textures (within 3 frames) const bool is_recently_allocated = image.allocation_tick >= frame_tick - 3; if (is_recently_allocated && image.info.is_sparse) { return false; } - + if (True(image.flags & ImageFlagBits::IsDecoding)) { // This image is still being decoded, deleting it will invalidate the slot // used by the async decoder thread. return false; } - + // Prioritize large sparse textures for cleanup - const bool is_large_sparse = image.info.is_sparse && + const bool is_large_sparse = image.info.is_sparse && image.guest_size_bytes >= 256_MiB; - - if (!aggressive_mode && !is_large_sparse && + + if (!aggressive_mode && !is_large_sparse && True(image.flags & ImageFlagBits::CostlyLoad)) { return false; } - + const bool must_download = image.IsSafeDownload() && False(image.flags & ImageFlagBits::BadOverlap); if (!high_priority_mode && !is_large_sparse && must_download) { return false; } - + if (must_download && !is_large_sparse) { auto map = runtime.DownloadStagingBuffer(image.unswizzled_size_bytes); const auto copies = FixSmallVectorADL(FullDownloadCopies(image.info)); @@ -138,13 +138,13 @@ void TextureCache

::RunGarbageCollector() { SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span, swizzle_data_buffer); } - + if (True(image.flags & ImageFlagBits::Tracked)) { UntrackImage(image, image_id); } UnregisterImage(image_id); DeleteImage(image_id, image.scale_tick > frame_tick + 5); - + if (total_used_memory < critical_memory) { if (aggressive_mode) { // Sink the aggresiveness. @@ -165,10 +165,10 @@ void TextureCache

::RunGarbageCollector() { lru_cache.ForEachItemBelow(frame_tick, [&](ImageId image_id) { auto& image = slot_images[image_id]; // Only target sparse textures that are old enough - if (image.info.is_sparse && + if (image.info.is_sparse && image.guest_size_bytes >= 256_MiB && image.allocation_tick < frame_tick - 3) { - LOG_DEBUG(HW_GPU, "GC targeting old sparse texture at 0x{:X} ({} MiB, age: {} frames)", + LOG_DEBUG(HW_GPU, "GC targeting old sparse texture at 0x{:X} ({} MiB, age: {} frames)", image.gpu_addr, image.guest_size_bytes / (1024 * 1024), frame_tick - image.allocation_tick); return Cleanup(image_id); @@ -658,24 +658,24 @@ void TextureCache

::UnmapMemory(DAddr cpu_addr, size_t size) { template std::optional TextureCache

::CalculateSparseBinding( const Image& image, GPUVAddr gpu_addr, DAddr dev_addr) { - + if (!image.info.is_sparse) { return std::nullopt; } - + const u64 offset = gpu_addr - image.gpu_addr; const u64 tile_index = offset / image.sparse_tile_size; const u32 tile_width_blocks = 128; const u32 tile_height_blocks = 32; - + const u32 width_in_tiles = (image.info.size.width / 4 + tile_width_blocks - 1) / tile_width_blocks; const u32 height_in_tiles = (image.info.size.height / 4 + tile_height_blocks - 1) / tile_height_blocks; - + const u32 tile_x = static_cast((tile_index % width_in_tiles) * tile_width_blocks * 4); const u32 tile_y = static_cast(((tile_index / width_in_tiles) % height_in_tiles) * tile_height_blocks * 4); const u32 tile_z = static_cast(tile_index / (width_in_tiles * height_in_tiles)); - + return SparseBinding{ .gpu_addr = gpu_addr, .device_addr = dev_addr, @@ -1133,14 +1133,14 @@ void TextureCache

::RefreshContents(Image& image, ImageId image_id) { // Only upload modified images return; } - + image.flags &= ~ImageFlagBits::CpuModified; if( lowmemorydevice && image.info.format == PixelFormat::BC1_RGBA_UNORM && MapSizeBytes(image) >= 256_MiB ) { return; } - + TrackImage(image, image_id); - + if (image.info.num_samples > 1 && !runtime.CanUploadMSAA()) { LOG_WARNING(HW_GPU, "MSAA image uploads are not implemented"); runtime.TransitionImageLayout(image); @@ -1156,7 +1156,7 @@ void TextureCache

::RefreshContents(Image& image, ImageId image_id) { image.info.resources.layers == 1 && MapSizeBytes(image) >= 128_MiB && False(image.flags & ImageFlagBits::GpuModified)) { - + QueueAsyncUnswizzle(image, image_id); return; } @@ -1411,7 +1411,7 @@ void TextureCache

::QueueAsyncUnswizzle(Image& image, ImageId image_id) { } image.flags |= ImageFlagBits::IsDecoding; - + unswizzle_queue.push_back({ .image_id = image_id, .info = image.info @@ -1448,31 +1448,31 @@ void TextureCache

::TickAsyncUnswizzle() { if (unswizzle_queue.empty()) { return; } - + if(current_unswizzle_frame > 0) { current_unswizzle_frame--; return; } - + PendingUnswizzle& task = unswizzle_queue.front(); Image& image = slot_images[task.image_id]; - + if (!task.initialized) { task.total_size = MapSizeBytes(image); task.staging_buffer = runtime.UploadStagingBuffer(task.total_size, true); - + const auto& info = image.info; const u32 bytes_per_block = BytesPerBlock(info.format); const u32 width_blocks = Common::DivCeil(info.size.width, 4u); const u32 height_blocks = Common::DivCeil(info.size.height, 4u); - + const u32 stride = width_blocks * bytes_per_block; const u32 aligned_height = height_blocks; task.bytes_per_slice = static_cast(stride) * aligned_height; task.last_submitted_offset = 0; task.initialized = true; } - + size_t CHUNK_SIZE; switch (Settings::values.gpu_unzwizzle_stream_size.GetValue()) { case Settings::GpuUnswizzle::VeryLow: CHUNK_SIZE = 4_MiB; break; @@ -1492,28 +1492,28 @@ void TextureCache

::TickAsyncUnswizzle() { case Settings::GpuUnswizzleChunk::High: SLICES_PER_BATCH = 512; break; default: SLICES_PER_BATCH = 128; } - + // Read data if (task.current_offset < task.total_size) { const size_t remaining = task.total_size - task.current_offset; - + size_t copy_amount = std::min(CHUNK_SIZE, remaining); if (remaining > CHUNK_SIZE) { copy_amount = (copy_amount / task.bytes_per_slice) * task.bytes_per_slice; if (copy_amount == 0) copy_amount = task.bytes_per_slice; } - - gpu_memory->ReadBlock(image.gpu_addr + task.current_offset, - task.staging_buffer.mapped_span.data() + task.current_offset, + + gpu_memory->ReadBlock(image.gpu_addr + task.current_offset, + task.staging_buffer.mapped_span.data() + task.current_offset, copy_amount); task.current_offset += copy_amount; } - + const bool is_final_batch = task.current_offset >= task.total_size; const size_t bytes_ready = task.current_offset - task.last_submitted_offset; const u32 complete_slices = static_cast(bytes_ready / task.bytes_per_slice); - + if (complete_slices >= SLICES_PER_BATCH || (is_final_batch && complete_slices > 0)) { const u32 z_start = static_cast(task.last_submitted_offset / task.bytes_per_slice); const u32 slices_to_process = std::min(complete_slices, SLICES_PER_BATCH); @@ -1525,16 +1525,16 @@ void TextureCache

::TickAsyncUnswizzle() { task.last_submitted_offset += (static_cast(z_count) * task.bytes_per_slice); } } - + // Check if complete const u32 slices_submitted = static_cast(task.last_submitted_offset / task.bytes_per_slice); const bool all_slices_submitted = slices_submitted >= image.info.size.depth; - + if (is_final_batch && all_slices_submitted) { runtime.FreeDeferredStagingBuffer(task.staging_buffer); image.flags &= ~ImageFlagBits::IsDecoding; unswizzle_queue.pop_front(); - + // Wait 4 frames to process the next entry current_unswizzle_frame = 4u; } @@ -1578,29 +1578,29 @@ ImageId TextureCache

::InsertImage(const ImageInfo& info, GPUVAddr gpu_addr, } } ASSERT_MSG(cpu_addr, "Tried to insert an image to an invalid gpu_addr=0x{:x}", gpu_addr); - + // For large sparse textures, aggressively clean up old allocations at same address if (info.is_sparse && CalculateGuestSizeInBytes(info) >= 256_MiB) { const auto alloc_it = image_allocs_table.find(gpu_addr); if (alloc_it != image_allocs_table.end()) { const ImageAllocId alloc_id = alloc_it->second; auto& alloc_images = slot_image_allocs[alloc_id].images; - + // Collect old images at this address that were created more than 2 frames ago boost::container::small_vector to_delete; for (ImageId old_image_id : alloc_images) { Image& old_image = slot_images[old_image_id]; - if (old_image.info.is_sparse && + if (old_image.info.is_sparse && old_image.gpu_addr == gpu_addr && old_image.allocation_tick < frame_tick - 2) { // Try not to delete fresh textures to_delete.push_back(old_image_id); } } - + // Delete old images immediately for (ImageId old_id : to_delete) { Image& old_image = slot_images[old_id]; - LOG_DEBUG(HW_GPU, "Immediately deleting old sparse texture at 0x{:X} ({} MiB)", + LOG_DEBUG(HW_GPU, "Immediately deleting old sparse texture at 0x{:X} ({} MiB)", gpu_addr, old_image.guest_size_bytes / (1024 * 1024)); if (True(old_image.flags & ImageFlagBits::Tracked)) { UntrackImage(old_image, old_id); @@ -1610,7 +1610,7 @@ ImageId TextureCache

::InsertImage(const ImageInfo& info, GPUVAddr gpu_addr, } } } - + const ImageId image_id = JoinImages(info, gpu_addr, *cpu_addr); const Image& image = slot_images[image_id]; // Using "image.gpu_addr" instead of "gpu_addr" is important because it might be different @@ -1626,11 +1626,11 @@ template ImageId TextureCache

::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, DAddr cpu_addr) { ImageInfo new_info = info; const size_t size_bytes = CalculateGuestSizeInBytes(new_info); - + // Proactive cleanup for large sparse texture allocations if (new_info.is_sparse && size_bytes >= 256_MiB) { const u64 estimated_alloc_size = size_bytes; - + if (total_used_memory + estimated_alloc_size >= critical_memory) { LOG_DEBUG(HW_GPU, "Large sparse texture allocation ({} MiB) - running aggressive GC. " "Current memory: {} MiB, Critical: {} MiB", @@ -1638,7 +1638,7 @@ ImageId TextureCache

::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, DA total_used_memory / (1024 * 1024), critical_memory / (1024 * 1024)); RunGarbageCollector(); - + // If still over threshold after GC, try one more aggressive pass if (total_used_memory + estimated_alloc_size >= critical_memory) { LOG_DEBUG(HW_GPU, "Still critically low on memory, running second GC pass"); @@ -1646,7 +1646,7 @@ ImageId TextureCache

::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, DA } } } - + const bool broken_views = runtime.HasBrokenTextureViewFormats(); const bool native_bgr = runtime.HasNativeBgr(); join_overlap_ids.clear(); @@ -1742,7 +1742,7 @@ ImageId TextureCache

::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, DA const ImageId new_image_id = slot_images.insert(runtime, new_info, gpu_addr, cpu_addr); Image& new_image = slot_images[new_image_id]; - + new_image.allocation_tick = frame_tick; if (!gpu_memory->IsContinuousRange(new_image.gpu_addr, new_image.guest_size_bytes) && diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index 616d77776a..1dc1da1698 100644 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h @@ -224,7 +224,7 @@ public: /// Remove images in a region void UnmapGPUMemory(size_t as_id, GPUVAddr gpu_addr, size_t size, DAddr dev_addr); - + /// Basic sparse binding std::optional CalculateSparseBinding( const Image& image, GPUVAddr gpu_addr, DAddr dev_addr); @@ -327,7 +327,7 @@ private: /// Refresh the contents (pixel data) of an image void RefreshContents(Image& image, ImageId image_id); - + /// Sparse texture partial upload template void UploadSparseDirtyTiles(Image& image, StagingBuffer& staging); diff --git a/src/video_core/texture_cache/util.h b/src/video_core/texture_cache/util.h index e9300cbae5..51379753b5 100644 --- a/src/video_core/texture_cache/util.h +++ b/src/video_core/texture_cache/util.h @@ -73,7 +73,7 @@ struct SparseTileUnswizzleResult { [[nodiscard]] boost::container::small_vector UnswizzleImage( Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, const ImageInfo& info, std::span input, std::span output); - + void ConvertImage(std::span input, const ImageInfo& info, std::span output, std::span copies);