diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index cb1c0fab83..be97f5ab05 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp @@ -123,12 +123,7 @@ GPUVAddr MemoryManager::PageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] DAddr de [[maybe_unused]] const auto current_entry_type = GetEntry(current_gpu_addr); SetEntry(current_gpu_addr, entry_type); if (current_entry_type != entry_type) { - if constexpr (entry_type == EntryType::Mapped) { - const DAddr current_dev_addr = dev_addr + offset; - rasterizer->ModifyGPUMemory(unique_identifier, current_gpu_addr, page_size, current_dev_addr); - } else { - rasterizer->ModifyGPUMemory(unique_identifier, current_gpu_addr, page_size, 0u); - } + rasterizer->ModifyGPUMemory(unique_identifier, current_gpu_addr, page_size); } if constexpr (entry_type == EntryType::Mapped) { const DAddr current_dev_addr = dev_addr + offset; @@ -151,12 +146,7 @@ GPUVAddr MemoryManager::BigPageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] DAddr [[maybe_unused]] const auto current_entry_type = GetEntry(current_gpu_addr); SetEntry(current_gpu_addr, entry_type); if (current_entry_type != entry_type) { - if constexpr (entry_type == EntryType::Mapped) { - const DAddr current_dev_addr = dev_addr + offset; - rasterizer->ModifyGPUMemory(unique_identifier, current_gpu_addr, page_size, current_dev_addr); - } else { - rasterizer->ModifyGPUMemory(unique_identifier, current_gpu_addr, big_page_size, 0u); - } + rasterizer->ModifyGPUMemory(unique_identifier, current_gpu_addr, big_page_size); } if constexpr (entry_type == EntryType::Mapped) { const DAddr current_dev_addr = dev_addr + offset; diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index 9b08f47ef3..d097a4740e 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h @@ -123,7 +123,7 @@ public: virtual void UnmapMemory(DAddr addr, u64 size) = 0; /// Remap GPU memory range. This means underneath backing memory changed - virtual void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size, DAddr d_addr) = 0; + virtual void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) = 0; /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory /// and invalidated diff --git a/src/video_core/renderer_null/null_rasterizer.cpp b/src/video_core/renderer_null/null_rasterizer.cpp index 0f9d1a01d5..378e685315 100644 --- a/src/video_core/renderer_null/null_rasterizer.cpp +++ b/src/video_core/renderer_null/null_rasterizer.cpp @@ -65,7 +65,7 @@ VideoCore::RasterizerDownloadArea RasterizerNull::GetFlushArea(PAddr addr, u64 s } void RasterizerNull::InvalidateGPUCache() {} void RasterizerNull::UnmapMemory(DAddr addr, u64 size) {} -void RasterizerNull::ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size, DAddr d_addr) {} +void RasterizerNull::ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) {} void RasterizerNull::SignalFence(std::function&& func) { func(); } diff --git a/src/video_core/renderer_null/null_rasterizer.h b/src/video_core/renderer_null/null_rasterizer.h index 905a22cef8..85b79a2137 100644 --- a/src/video_core/renderer_null/null_rasterizer.h +++ b/src/video_core/renderer_null/null_rasterizer.h @@ -61,7 +61,7 @@ public: VideoCore::RasterizerDownloadArea GetFlushArea(DAddr addr, u64 size) override; void InvalidateGPUCache() override; void UnmapMemory(DAddr addr, u64 size) override; - void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size, DAddr d_addr) override; + void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) override; void SignalFence(std::function&& func) override; void SyncOperation(std::function&& func) override; void SignalSyncPoint(u32 value) override; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 7bfd00f7bd..602509bfdb 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -600,10 +600,10 @@ void RasterizerOpenGL::UnmapMemory(DAddr addr, u64 size) { shader_cache.OnCacheInvalidation(addr, size); } -void RasterizerOpenGL::ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size, DAddr d_addr) { +void RasterizerOpenGL::ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) { { std::scoped_lock lock{texture_cache.mutex}; - texture_cache.UnmapGPUMemory(as_id, addr, size, d_addr); + texture_cache.UnmapGPUMemory(as_id, addr, size); } } diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index f41c64b002..f952a4f34c 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -106,7 +106,7 @@ public: bool OnCPUWrite(PAddr addr, u64 size) override; void InvalidateGPUCache() override; void UnmapMemory(DAddr addr, u64 size) override; - void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size, DAddr d_addr) override; + void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) override; void SignalFence(std::function&& func) override; void SyncOperation(std::function&& func) override; void SignalSyncPoint(u32 value) override; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 521e19621d..101a884fd7 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -748,10 +748,10 @@ void RasterizerVulkan::UnmapMemory(DAddr addr, u64 size) { pipeline_cache.OnCacheInvalidation(addr, size); } -void RasterizerVulkan::ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size, DAddr d_addr) { +void RasterizerVulkan::ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) { { std::scoped_lock lock{texture_cache.mutex}; - texture_cache.UnmapGPUMemory(as_id, addr, size, d_addr); + texture_cache.UnmapGPUMemory(as_id, addr, size); } } diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index f076cbc42b..b689c6b660 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -108,7 +108,7 @@ public: bool OnCPUWrite(DAddr addr, u64 size) override; void InvalidateGPUCache() override; void UnmapMemory(DAddr addr, u64 size) override; - void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size, DAddr d_addr) override; + void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) override; void SignalFence(std::function&& func) override; void SyncOperation(std::function&& func) override; void SignalSyncPoint(u32 value) override; diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index 3aab4ba276..e19029812b 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp @@ -44,14 +44,14 @@ Scheduler::Scheduler(const Device& device_, StateTracker& state_tracker_) master_semaphore{std::make_unique(device)}, command_pool{std::make_unique(*master_semaphore, device)} { - // PRE-OPTIMIZATION: Warm up the pool to prevent mid-frame spikes + /*// PRE-OPTIMIZATION: Warm up the pool to prevent mid-frame spikes { std::scoped_lock rl{reserve_mutex}; chunk_reserve.reserve(2048); // Prevent vector resizing for (int i = 0; i < 1024; ++i) { chunk_reserve.push_back(std::make_unique()); } - } + }*/ AcquireNewChunk(); AllocateWorkerCommandBuffer(); diff --git a/src/video_core/texture_cache/image_base.h b/src/video_core/texture_cache/image_base.h index cb51ddc3e7..f0ae4f34a3 100644 --- a/src/video_core/texture_cache/image_base.h +++ b/src/video_core/texture_cache/image_base.h @@ -55,13 +55,6 @@ struct AliasedImage { ImageId id; }; -struct SparseBinding { - GPUVAddr gpu_addr; // Virtual GPU address of this tile - DAddr device_addr; // Physical device memory address - u64 tile_index; // Linear tile index in the texture - Extent3D tile_coord; // 3D coordinate of this tile -}; - struct NullImageParams {}; struct ImageBase { @@ -125,10 +118,6 @@ struct ImageBase { std::vector aliased_images; std::vector overlapping_images; ImageMapId map_view_id{}; - - boost::container::small_vector dirty_offsets; - std::unordered_map sparse_bindings; - u32 sparse_tile_size = 65536; }; struct ImageMapView { diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index c71d990ea6..425c8e23de 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -143,7 +143,8 @@ void TextureCache

::RunGarbageCollector() { } // Prioritize large sparse textures for cleanup - const bool is_large_sparse = image.info.is_sparse && + const bool is_large_sparse = lowmemorydevice && + image.info.is_sparse && image.guest_size_bytes >= 256_MiB; if (!aggressive_mode && !is_large_sparse && @@ -192,7 +193,8 @@ void TextureCache

::RunGarbageCollector() { lru_cache.ForEachItemBelow(frame_tick, [&](ImageId image_id) { auto& image = slot_images[image_id]; // Only target sparse textures that are old enough - if (image.info.is_sparse && + if (lowmemorydevice && + image.info.is_sparse && image.guest_size_bytes >= 256_MiB && image.allocation_tick < frame_tick - 3) { LOG_DEBUG(HW_GPU, "GC targeting old sparse texture at 0x{:X} ({} MiB, age: {} frames)", @@ -683,36 +685,7 @@ void TextureCache

::UnmapMemory(DAddr cpu_addr, size_t size) { } template -std::optional TextureCache

::CalculateSparseBinding( - const Image& image, GPUVAddr gpu_addr, DAddr dev_addr) { - - if (!image.info.is_sparse) { - return std::nullopt; - } - - const u64 offset = gpu_addr - image.gpu_addr; - const u64 tile_index = offset / image.sparse_tile_size; - - const u32 tile_width_blocks = 128; - const u32 tile_height_blocks = 32; - - const u32 width_in_tiles = (image.info.size.width / 4 + tile_width_blocks - 1) / tile_width_blocks; - const u32 height_in_tiles = (image.info.size.height / 4 + tile_height_blocks - 1) / tile_height_blocks; - - const u32 tile_x = static_cast((tile_index % width_in_tiles) * tile_width_blocks * 4); - const u32 tile_y = static_cast(((tile_index / width_in_tiles) % height_in_tiles) * tile_height_blocks * 4); - const u32 tile_z = static_cast(tile_index / (width_in_tiles * height_in_tiles)); - - return SparseBinding{ - .gpu_addr = gpu_addr, - .device_addr = dev_addr, - .tile_index = tile_index, - .tile_coord = {tile_x, tile_y, tile_z} - }; -} - -template -void TextureCache

::UnmapGPUMemory(size_t as_id, GPUVAddr gpu_addr, size_t size, DAddr dev_addr) { +void TextureCache

::UnmapGPUMemory(size_t as_id, GPUVAddr gpu_addr, size_t size) { boost::container::small_vector deleted_images; ForEachImageInRegionGPU(as_id, gpu_addr, size, [&](ImageId id, Image&) { deleted_images.push_back(id); }); @@ -728,15 +701,6 @@ void TextureCache

::UnmapGPUMemory(size_t as_id, GPUVAddr gpu_addr, size_t siz continue; } image.flags |= ImageFlagBits::Remapped; - - if (image.info.is_sparse && dev_addr != 0) { - // Calculate and store the binding - auto binding = CalculateSparseBinding(image, gpu_addr, dev_addr); - if (binding) { - image.sparse_bindings[gpu_addr] = *binding; - image.dirty_offsets.push_back(binding->tile_index); - } - } } } @@ -1587,7 +1551,7 @@ ImageId TextureCache

::InsertImage(const ImageInfo& info, GPUVAddr gpu_addr, ASSERT_MSG(cpu_addr, "Tried to insert an image to an invalid gpu_addr=0x{:x}", gpu_addr); // For large sparse textures, aggressively clean up old allocations at same address - if (info.is_sparse && CalculateGuestSizeInBytes(info) >= 256_MiB) { + if (lowmemorydevice && info.is_sparse && CalculateGuestSizeInBytes(info) >= 256_MiB) { const auto alloc_it = image_allocs_table.find(gpu_addr); if (alloc_it != image_allocs_table.end()) { const ImageAllocId alloc_id = alloc_it->second; @@ -1635,7 +1599,7 @@ ImageId TextureCache

::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, DA const size_t size_bytes = CalculateGuestSizeInBytes(new_info); // Proactive cleanup for large sparse texture allocations - if (new_info.is_sparse && size_bytes >= 256_MiB) { + if (lowmemorydevice && new_info.is_sparse && size_bytes >= 256_MiB) { const u64 estimated_alloc_size = size_bytes; if (total_used_memory + estimated_alloc_size >= critical_memory) { @@ -2690,7 +2654,6 @@ void TextureCache

::SynchronizeAliases(ImageId image_id) { template void TextureCache

::PrepareImage(ImageId image_id, bool is_modification, bool invalidate) { Image& image = slot_images[image_id]; - runtime.TransitionImageLayout(image); if (invalidate) { image.flags &= ~(ImageFlagBits::CpuModified | ImageFlagBits::GpuModified); if (False(image.flags & ImageFlagBits::Tracked)) { diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index 5c8f420bfc..42f1a158d9 100644 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h @@ -223,11 +223,7 @@ public: void UnmapMemory(DAddr cpu_addr, size_t size); /// Remove images in a region - void UnmapGPUMemory(size_t as_id, GPUVAddr gpu_addr, size_t size, DAddr dev_addr); - - /// Basic sparse binding - std::optional CalculateSparseBinding( - const Image& image, GPUVAddr gpu_addr, DAddr dev_addr); + void UnmapGPUMemory(size_t as_id, GPUVAddr gpu_addr, size_t size); /// Blit an image with the given parameters bool BlitImage(const Tegra::Engines::Fermi2D::Surface& dst,