diff --git a/src/common/slot_vector.h b/src/common/slot_vector.h index e464d3d948..2de5b1fa1f 100644 --- a/src/common/slot_vector.h +++ b/src/common/slot_vector.h @@ -130,6 +130,17 @@ public: ResetStorageBit(id.index); } + [[nodiscard]] bool Contains(SlotId id) const noexcept { + if (!id) { + return false; + } + const size_t word = id.index / 64; + if (word >= stored_bitset.size()) { + return false; + } + return ((stored_bitset[word] >> (id.index % 64)) & 1) != 0; + } + [[nodiscard]] Iterator begin() noexcept { const auto it = std::ranges::find_if(stored_bitset, [](u64 value) { return value != 0; }); if (it == stored_bitset.end()) { diff --git a/src/video_core/fence_manager.h b/src/video_core/fence_manager.h index 61e4da9609..d82784ae18 100644 --- a/src/video_core/fence_manager.h +++ b/src/video_core/fence_manager.h @@ -91,6 +91,10 @@ public: uncommitted_operations.clear(); } QueueFence(new_fence); + if (!new_fence->IsStubbed()) { + std::scoped_lock lock{texture_cache.mutex}; + texture_cache.CommitPendingGpuAccesses(new_fence->WaitTick()); + } fences.push(std::move(new_fence)); if (should_flush) { rasterizer.FlushCommands(); @@ -179,7 +183,7 @@ private: return; } } - PopAsyncFlushes(); + PopAsyncFlushes(current_fence->WaitTick()); auto operations = std::move(pending_operations.front()); pending_operations.pop_front(); for (auto& operation : operations) { @@ -214,7 +218,7 @@ private: if (!current_fence->IsStubbed()) { WaitFence(current_fence); } - PopAsyncFlushes(); + PopAsyncFlushes(current_fence->WaitTick()); for (auto& operation : current_operations) { operation(); } @@ -237,10 +241,11 @@ private: query_cache.HasUncommittedFlushes(); } - void PopAsyncFlushes() { + void PopAsyncFlushes(u64 completed_tick) { { std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; texture_cache.PopAsyncFlushes(); + texture_cache.CompleteGpuAccesses(completed_tick); buffer_cache.PopAsyncFlushes(); } query_cache.PopAsyncFlushes(); diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index 1b30e2c76c..786667343a 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -143,6 +143,8 @@ public: void TickFrame() {} + void WaitForGpuTick(u64) {} + StateTracker& GetStateTracker() { return state_tracker; } diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.h b/src/video_core/renderer_vulkan/vk_fence_manager.h index 3365735743..0ea9be276b 100644 --- a/src/video_core/renderer_vulkan/vk_fence_manager.h +++ b/src/video_core/renderer_vulkan/vk_fence_manager.h @@ -34,6 +34,10 @@ public: void Wait(); + [[nodiscard]] u64 WaitTick() const noexcept { + return wait_tick; + } + private: Scheduler& scheduler; u64 wait_tick = 0; diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 8f499f6b79..4e3f657cb5 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -910,6 +910,13 @@ void TextureCacheRuntime::FreeDeferredStagingBuffer(StagingBufferRef& ref) { staging_buffer_pool.FreeDeferred(ref); } +void TextureCacheRuntime::WaitForGpuTick(u64 tick) { + if (tick == 0) { + return; + } + scheduler.Wait(tick); +} + bool TextureCacheRuntime::ShouldReinterpret(Image& dst, Image& src) { if (VideoCore::Surface::GetFormatType(dst.info.format) == VideoCore::Surface::SurfaceType::DepthStencil && diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index b4df3ccaac..0c62924070 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h @@ -6,6 +6,7 @@ #pragma once +#include #include #include "video_core/texture_cache/texture_cache_base.h" @@ -59,6 +60,8 @@ public: void TickFrame(); + void WaitForGpuTick(u64 tick); + u64 GetDeviceLocalMemory() const; u64 GetDeviceMemoryUsage() const; @@ -177,6 +180,30 @@ public: return (this->*current_image).UsageFlags(); } + void TrackGpuReadTick(u64 tick) noexcept { + TrackPendingReadTick(tick); + } + + void TrackGpuWriteTick(u64 tick) noexcept { + TrackPendingWriteTick(tick); + } + + void CompleteGpuReadTick(u64 completed_tick) noexcept { + ClearPendingReadTick(completed_tick); + } + + void CompleteGpuWriteTick(u64 completed_tick) noexcept { + ClearPendingWriteTick(completed_tick); + } + + [[nodiscard]] std::optional PendingGpuReadTick() const noexcept { + return PendingReadTick(); + } + + [[nodiscard]] std::optional PendingGpuWriteTick() const noexcept { + return PendingWriteTick(); + } + /// Returns true when the image is already initialized and mark it as initialized [[nodiscard]] bool ExchangeInitialization() noexcept { return std::exchange(initialized, true); diff --git a/src/video_core/texture_cache/image_base.h b/src/video_core/texture_cache/image_base.h index 0587d7b724..e2f9f94d83 100644 --- a/src/video_core/texture_cache/image_base.h +++ b/src/video_core/texture_cache/image_base.h @@ -1,8 +1,12 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + // SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later #pragma once +#include #include #include #include @@ -58,6 +62,50 @@ struct ImageBase { explicit ImageBase(const ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr); explicit ImageBase(const NullImageParams&); + void TrackPendingReadTick(u64 tick) noexcept { + if (pending_read_tick) { + *pending_read_tick = std::max(*pending_read_tick, tick); + } else { + pending_read_tick = tick; + } + } + + void TrackPendingWriteTick(u64 tick) noexcept { + if (pending_write_tick) { + *pending_write_tick = std::max(*pending_write_tick, tick); + } else { + pending_write_tick = tick; + } + } + + void ClearPendingReadTick(u64 completed_tick) noexcept { + if (pending_read_tick && completed_tick >= *pending_read_tick) { + pending_read_tick.reset(); + } + } + + void ClearPendingWriteTick(u64 completed_tick) noexcept { + if (pending_write_tick && completed_tick >= *pending_write_tick) { + pending_write_tick.reset(); + } + } + + [[nodiscard]] bool HasPendingReadTick() const noexcept { + return pending_read_tick.has_value(); + } + + [[nodiscard]] bool HasPendingWriteTick() const noexcept { + return pending_write_tick.has_value(); + } + + [[nodiscard]] std::optional PendingReadTick() const noexcept { + return pending_read_tick; + } + + [[nodiscard]] std::optional PendingWriteTick() const noexcept { + return pending_write_tick; + } + [[nodiscard]] std::optional TryFindBase(GPUVAddr other_addr) const noexcept; [[nodiscard]] ImageViewId FindView(const ImageViewInfo& view_info) const noexcept; @@ -115,6 +163,9 @@ struct ImageBase { std::vector aliased_images; std::vector overlapping_images; ImageMapId map_view_id{}; + + std::optional pending_read_tick; + std::optional pending_write_tick; }; struct ImageMapView { diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 73c6034a1d..d8230a5f1a 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -540,6 +540,7 @@ void TextureCache

::WriteMemory(DAddr cpu_addr, size_t size) { if (True(image.flags & ImageFlagBits::CpuModified)) { return; } + EnsureImageReady(image, ImageAccessType::Write); image.flags |= ImageFlagBits::CpuModified; if (True(image.flags & ImageFlagBits::Tracked)) { UntrackImage(image, image_id); @@ -550,11 +551,12 @@ void TextureCache

::WriteMemory(DAddr cpu_addr, size_t size) { template void TextureCache

::DownloadMemory(DAddr cpu_addr, size_t size) { boost::container::small_vector images; - ForEachImageInRegion(cpu_addr, size, [&images](ImageId image_id, ImageBase& image) { + ForEachImageInRegion(cpu_addr, size, [this, &images](ImageId image_id, ImageBase& image) { if (!image.IsSafeDownload()) { return; } image.flags &= ~ImageFlagBits::GpuModified; + EnsureImageReady(this->slot_images[image_id], ImageAccessType::Read); images.push_back(image_id); }); if (images.empty()) { @@ -606,6 +608,7 @@ void TextureCache

::UnmapMemory(DAddr cpu_addr, size_t size) { ForEachImageInRegion(cpu_addr, size, [&](ImageId id, Image&) { deleted_images.push_back(id); }); for (const ImageId id : deleted_images) { Image& image = slot_images[id]; + EnsureImageReady(image, ImageAccessType::Write); if (True(image.flags & ImageFlagBits::Tracked)) { UntrackImage(image, id); } @@ -621,6 +624,7 @@ void TextureCache

::UnmapGPUMemory(size_t as_id, GPUVAddr gpu_addr, size_t siz [&](ImageId id, Image&) { deleted_images.push_back(id); }); for (const ImageId id : deleted_images) { Image& image = slot_images[id]; + EnsureImageReady(image, ImageAccessType::Write); if (False(image.flags & ImageFlagBits::CpuModified)) { image.flags |= ImageFlagBits::CpuModified; if (True(image.flags & ImageFlagBits::Tracked)) { @@ -2423,6 +2427,8 @@ void TextureCache

::SynchronizeAliases(ImageId image_id) { template void TextureCache

::PrepareImage(ImageId image_id, bool is_modification, bool invalidate) { Image& image = slot_images[image_id]; + EnsureImageReady(image, is_modification ? ImageAccessType::Write : ImageAccessType::Read); + TrackGpuImageAccess(image_id, is_modification ? ImageAccessType::Write : ImageAccessType::Read); runtime.TransitionImageLayout(image); if (invalidate) { image.flags &= ~(ImageFlagBits::CpuModified | ImageFlagBits::GpuModified); @@ -2439,6 +2445,85 @@ void TextureCache

::PrepareImage(ImageId image_id, bool is_modification, bool lru_cache.Touch(image.lru_index, frame_tick); } +template +void TextureCache

::TrackGpuImageAccess(ImageId image_id, ImageAccessType access) { + staged_gpu_accesses.push_back({image_id, access}); +} + +template +void TextureCache

::CommitPendingGpuAccesses(u64 fence_value) { + if (staged_gpu_accesses.empty()) { + return; + } + if (fence_value == 0) { + return; + } + auto& batch = committed_gpu_accesses.emplace_back(std::move(staged_gpu_accesses)); + staged_gpu_accesses.clear(); + committed_gpu_ticks.push_back(fence_value); + for (const PendingImageAccess& access : batch) { + ImageBase& image = slot_images[access.image_id]; + if (access.access == ImageAccessType::Read) { + image.TrackPendingReadTick(fence_value); + } else { + image.TrackPendingWriteTick(fence_value); + } + } +} + +template +void TextureCache

::CompleteGpuAccesses(u64 completed_fence) { + if (completed_fence == 0) { + return; + } + while (!committed_gpu_ticks.empty() && committed_gpu_ticks.front() <= completed_fence) { + auto accesses = std::move(committed_gpu_accesses.front()); + committed_gpu_accesses.pop_front(); + committed_gpu_ticks.pop_front(); + for (const PendingImageAccess& access : accesses) { + if (!slot_images.Contains(access.image_id)) { + continue; + } + ImageBase& image = slot_images[access.image_id]; + if (access.access == ImageAccessType::Read) { + image.ClearPendingReadTick(completed_fence); + } else { + image.ClearPendingWriteTick(completed_fence); + } + } + } +} + +template +void TextureCache

::EnsureImageReady(ImageBase& image, ImageAccessType access) { + auto wait_tick = [this](std::optional tick) -> std::optional { + if (!tick) { + return std::nullopt; + } + runtime.WaitForGpuTick(*tick); + return tick; + }; + + if (access == ImageAccessType::Write) { + if (const auto tick = image.PendingReadTick()) { + if (const auto waited = wait_tick(tick)) { + image.ClearPendingReadTick(*waited); + } + } + if (const auto tick = image.PendingWriteTick()) { + if (const auto waited = wait_tick(tick)) { + image.ClearPendingWriteTick(*waited); + } + } + } else { + if (const auto tick = image.PendingWriteTick()) { + if (const auto waited = wait_tick(tick)) { + image.ClearPendingWriteTick(*waited); + } + } + } +} + template void TextureCache

::PrepareImageView(ImageViewId image_view_id, bool is_modification, bool invalidate) { @@ -2456,6 +2541,8 @@ template void TextureCache

::CopyImage(ImageId dst_id, ImageId src_id, std::vector copies) { Image& dst = slot_images[dst_id]; Image& src = slot_images[src_id]; + EnsureImageReady(dst, ImageAccessType::Write); + EnsureImageReady(src, ImageAccessType::Read); const bool is_rescaled = True(src.flags & ImageFlagBits::Rescaled); if (is_rescaled) { ASSERT(True(dst.flags & ImageFlagBits::Rescaled)); @@ -2472,20 +2559,30 @@ void TextureCache

::CopyImage(ImageId dst_id, ImageId src_id, std::vector::CopyImage(ImageId dst_id, ImageId src_id, std::vector diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index ee10ac2d23..20d444a69f 100644 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h @@ -263,6 +263,15 @@ public: /// Prepare an image to be used void PrepareImage(ImageId image_id, bool is_modification, bool invalidate); + /// Track that an image participates in upcoming GPU work with the given access type + void TrackGpuImageAccess(ImageId image_id, ImageAccessType access); + + /// Notify the cache that tracked GPU work has been submitted with the specified fence value + void CommitPendingGpuAccesses(u64 fence_value); + + /// Notify the cache that a fence value has completed so tracked accesses can be released + void CompleteGpuAccesses(u64 completed_fence); + std::recursive_mutex mutex; private: @@ -413,6 +422,8 @@ private: /// Execute copies from one image to the other, even if they are incompatible void CopyImage(ImageId dst_id, ImageId src_id, std::vector copies); + void EnsureImageReady(ImageBase& image, ImageAccessType access); + /// Bind an image view as render target, downloading resources preemtively if needed void BindRenderTarget(ImageViewId* old_id, ImageViewId new_id); @@ -470,6 +481,11 @@ private: Common::SlotId object_id; }; + struct PendingImageAccess { + ImageId image_id; + ImageAccessType access; + }; + Common::SlotVector slot_images; Common::SlotVector slot_map_views; Common::SlotVector slot_image_views; @@ -485,6 +501,9 @@ private: std::vector uncommitted_async_buffers; std::deque> async_buffers; std::deque async_buffers_death_ring; + std::vector staged_gpu_accesses; + std::deque> committed_gpu_accesses; + std::deque committed_gpu_ticks; struct LRUItemParams { using ObjectType = ImageId; diff --git a/src/video_core/texture_cache/types.h b/src/video_core/texture_cache/types.h index ecacd8c6bd..20c17e49a6 100644 --- a/src/video_core/texture_cache/types.h +++ b/src/video_core/texture_cache/types.h @@ -155,4 +155,9 @@ struct SwizzleParameters { s32 level; }; +enum class ImageAccessType : u8 { + Read, + Write, +}; + } // namespace VideoCommon