Browse Source

[gl, vk] Access Tracking Synchronization

eds-true-adreno-fixes
CamilleLaVey 3 weeks ago
committed by Caio Oliveira
parent
commit
3fc9f0efe3
No known key found for this signature in database GPG Key ID: AAAE6C7FD4186B0C
  1. 11
      src/common/slot_vector.h
  2. 11
      src/video_core/fence_manager.h
  3. 2
      src/video_core/renderer_opengl/gl_texture_cache.h
  4. 4
      src/video_core/renderer_vulkan/vk_fence_manager.h
  5. 7
      src/video_core/renderer_vulkan/vk_texture_cache.cpp
  6. 27
      src/video_core/renderer_vulkan/vk_texture_cache.h
  7. 51
      src/video_core/texture_cache/image_base.h
  8. 106
      src/video_core/texture_cache/texture_cache.h
  9. 19
      src/video_core/texture_cache/texture_cache_base.h
  10. 5
      src/video_core/texture_cache/types.h

11
src/common/slot_vector.h

@ -130,6 +130,17 @@ public:
ResetStorageBit(id.index); ResetStorageBit(id.index);
} }
[[nodiscard]] bool Contains(SlotId id) const noexcept {
if (!id) {
return false;
}
const size_t word = id.index / 64;
if (word >= stored_bitset.size()) {
return false;
}
return ((stored_bitset[word] >> (id.index % 64)) & 1) != 0;
}
[[nodiscard]] Iterator begin() noexcept { [[nodiscard]] Iterator begin() noexcept {
const auto it = std::ranges::find_if(stored_bitset, [](u64 value) { return value != 0; }); const auto it = std::ranges::find_if(stored_bitset, [](u64 value) { return value != 0; });
if (it == stored_bitset.end()) { if (it == stored_bitset.end()) {

11
src/video_core/fence_manager.h

@ -91,6 +91,10 @@ public:
uncommitted_operations.clear(); uncommitted_operations.clear();
} }
QueueFence(new_fence); QueueFence(new_fence);
if (!new_fence->IsStubbed()) {
std::scoped_lock lock{texture_cache.mutex};
texture_cache.CommitPendingGpuAccesses(new_fence->WaitTick());
}
fences.push(std::move(new_fence)); fences.push(std::move(new_fence));
if (should_flush) { if (should_flush) {
rasterizer.FlushCommands(); rasterizer.FlushCommands();
@ -179,7 +183,7 @@ private:
return; return;
} }
} }
PopAsyncFlushes();
PopAsyncFlushes(current_fence->WaitTick());
auto operations = std::move(pending_operations.front()); auto operations = std::move(pending_operations.front());
pending_operations.pop_front(); pending_operations.pop_front();
for (auto& operation : operations) { for (auto& operation : operations) {
@ -214,7 +218,7 @@ private:
if (!current_fence->IsStubbed()) { if (!current_fence->IsStubbed()) {
WaitFence(current_fence); WaitFence(current_fence);
} }
PopAsyncFlushes();
PopAsyncFlushes(current_fence->WaitTick());
for (auto& operation : current_operations) { for (auto& operation : current_operations) {
operation(); operation();
} }
@ -237,10 +241,11 @@ private:
query_cache.HasUncommittedFlushes(); query_cache.HasUncommittedFlushes();
} }
void PopAsyncFlushes() {
void PopAsyncFlushes(u64 completed_tick) {
{ {
std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
texture_cache.PopAsyncFlushes(); texture_cache.PopAsyncFlushes();
texture_cache.CompleteGpuAccesses(completed_tick);
buffer_cache.PopAsyncFlushes(); buffer_cache.PopAsyncFlushes();
} }
query_cache.PopAsyncFlushes(); query_cache.PopAsyncFlushes();

2
src/video_core/renderer_opengl/gl_texture_cache.h

@ -143,6 +143,8 @@ public:
void TickFrame() {} void TickFrame() {}
void WaitForGpuTick(u64) {}
StateTracker& GetStateTracker() { StateTracker& GetStateTracker() {
return state_tracker; return state_tracker;
} }

4
src/video_core/renderer_vulkan/vk_fence_manager.h

@ -34,6 +34,10 @@ public:
void Wait(); void Wait();
[[nodiscard]] u64 WaitTick() const noexcept {
return wait_tick;
}
private: private:
Scheduler& scheduler; Scheduler& scheduler;
u64 wait_tick = 0; u64 wait_tick = 0;

7
src/video_core/renderer_vulkan/vk_texture_cache.cpp

@ -910,6 +910,13 @@ void TextureCacheRuntime::FreeDeferredStagingBuffer(StagingBufferRef& ref) {
staging_buffer_pool.FreeDeferred(ref); staging_buffer_pool.FreeDeferred(ref);
} }
void TextureCacheRuntime::WaitForGpuTick(u64 tick) {
if (tick == 0) {
return;
}
scheduler.Wait(tick);
}
bool TextureCacheRuntime::ShouldReinterpret(Image& dst, Image& src) { bool TextureCacheRuntime::ShouldReinterpret(Image& dst, Image& src) {
if (VideoCore::Surface::GetFormatType(dst.info.format) == if (VideoCore::Surface::GetFormatType(dst.info.format) ==
VideoCore::Surface::SurfaceType::DepthStencil && VideoCore::Surface::SurfaceType::DepthStencil &&

27
src/video_core/renderer_vulkan/vk_texture_cache.h

@ -6,6 +6,7 @@
#pragma once #pragma once
#include <optional>
#include <span> #include <span>
#include "video_core/texture_cache/texture_cache_base.h" #include "video_core/texture_cache/texture_cache_base.h"
@ -59,6 +60,8 @@ public:
void TickFrame(); void TickFrame();
void WaitForGpuTick(u64 tick);
u64 GetDeviceLocalMemory() const; u64 GetDeviceLocalMemory() const;
u64 GetDeviceMemoryUsage() const; u64 GetDeviceMemoryUsage() const;
@ -177,6 +180,30 @@ public:
return (this->*current_image).UsageFlags(); return (this->*current_image).UsageFlags();
} }
void TrackGpuReadTick(u64 tick) noexcept {
TrackPendingReadTick(tick);
}
void TrackGpuWriteTick(u64 tick) noexcept {
TrackPendingWriteTick(tick);
}
void CompleteGpuReadTick(u64 completed_tick) noexcept {
ClearPendingReadTick(completed_tick);
}
void CompleteGpuWriteTick(u64 completed_tick) noexcept {
ClearPendingWriteTick(completed_tick);
}
[[nodiscard]] std::optional<u64> PendingGpuReadTick() const noexcept {
return PendingReadTick();
}
[[nodiscard]] std::optional<u64> PendingGpuWriteTick() const noexcept {
return PendingWriteTick();
}
/// Returns true when the image is already initialized and mark it as initialized /// Returns true when the image is already initialized and mark it as initialized
[[nodiscard]] bool ExchangeInitialization() noexcept { [[nodiscard]] bool ExchangeInitialization() noexcept {
return std::exchange(initialized, true); return std::exchange(initialized, true);

51
src/video_core/texture_cache/image_base.h

@ -1,8 +1,12 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project // SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
#pragma once #pragma once
#include <algorithm>
#include <array> #include <array>
#include <optional> #include <optional>
#include <vector> #include <vector>
@ -58,6 +62,50 @@ struct ImageBase {
explicit ImageBase(const ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr); explicit ImageBase(const ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr);
explicit ImageBase(const NullImageParams&); explicit ImageBase(const NullImageParams&);
void TrackPendingReadTick(u64 tick) noexcept {
if (pending_read_tick) {
*pending_read_tick = std::max(*pending_read_tick, tick);
} else {
pending_read_tick = tick;
}
}
void TrackPendingWriteTick(u64 tick) noexcept {
if (pending_write_tick) {
*pending_write_tick = std::max(*pending_write_tick, tick);
} else {
pending_write_tick = tick;
}
}
void ClearPendingReadTick(u64 completed_tick) noexcept {
if (pending_read_tick && completed_tick >= *pending_read_tick) {
pending_read_tick.reset();
}
}
void ClearPendingWriteTick(u64 completed_tick) noexcept {
if (pending_write_tick && completed_tick >= *pending_write_tick) {
pending_write_tick.reset();
}
}
[[nodiscard]] bool HasPendingReadTick() const noexcept {
return pending_read_tick.has_value();
}
[[nodiscard]] bool HasPendingWriteTick() const noexcept {
return pending_write_tick.has_value();
}
[[nodiscard]] std::optional<u64> PendingReadTick() const noexcept {
return pending_read_tick;
}
[[nodiscard]] std::optional<u64> PendingWriteTick() const noexcept {
return pending_write_tick;
}
[[nodiscard]] std::optional<SubresourceBase> TryFindBase(GPUVAddr other_addr) const noexcept; [[nodiscard]] std::optional<SubresourceBase> TryFindBase(GPUVAddr other_addr) const noexcept;
[[nodiscard]] ImageViewId FindView(const ImageViewInfo& view_info) const noexcept; [[nodiscard]] ImageViewId FindView(const ImageViewInfo& view_info) const noexcept;
@ -115,6 +163,9 @@ struct ImageBase {
std::vector<AliasedImage> aliased_images; std::vector<AliasedImage> aliased_images;
std::vector<ImageId> overlapping_images; std::vector<ImageId> overlapping_images;
ImageMapId map_view_id{}; ImageMapId map_view_id{};
std::optional<u64> pending_read_tick;
std::optional<u64> pending_write_tick;
}; };
struct ImageMapView { struct ImageMapView {

106
src/video_core/texture_cache/texture_cache.h

@ -540,6 +540,7 @@ void TextureCache<P>::WriteMemory(DAddr cpu_addr, size_t size) {
if (True(image.flags & ImageFlagBits::CpuModified)) { if (True(image.flags & ImageFlagBits::CpuModified)) {
return; return;
} }
EnsureImageReady(image, ImageAccessType::Write);
image.flags |= ImageFlagBits::CpuModified; image.flags |= ImageFlagBits::CpuModified;
if (True(image.flags & ImageFlagBits::Tracked)) { if (True(image.flags & ImageFlagBits::Tracked)) {
UntrackImage(image, image_id); UntrackImage(image, image_id);
@ -550,11 +551,12 @@ void TextureCache<P>::WriteMemory(DAddr cpu_addr, size_t size) {
template <class P> template <class P>
void TextureCache<P>::DownloadMemory(DAddr cpu_addr, size_t size) { void TextureCache<P>::DownloadMemory(DAddr cpu_addr, size_t size) {
boost::container::small_vector<ImageId, 16> images; boost::container::small_vector<ImageId, 16> images;
ForEachImageInRegion(cpu_addr, size, [&images](ImageId image_id, ImageBase& image) {
ForEachImageInRegion(cpu_addr, size, [this, &images](ImageId image_id, ImageBase& image) {
if (!image.IsSafeDownload()) { if (!image.IsSafeDownload()) {
return; return;
} }
image.flags &= ~ImageFlagBits::GpuModified; image.flags &= ~ImageFlagBits::GpuModified;
EnsureImageReady(this->slot_images[image_id], ImageAccessType::Read);
images.push_back(image_id); images.push_back(image_id);
}); });
if (images.empty()) { if (images.empty()) {
@ -606,6 +608,7 @@ void TextureCache<P>::UnmapMemory(DAddr cpu_addr, size_t size) {
ForEachImageInRegion(cpu_addr, size, [&](ImageId id, Image&) { deleted_images.push_back(id); }); ForEachImageInRegion(cpu_addr, size, [&](ImageId id, Image&) { deleted_images.push_back(id); });
for (const ImageId id : deleted_images) { for (const ImageId id : deleted_images) {
Image& image = slot_images[id]; Image& image = slot_images[id];
EnsureImageReady(image, ImageAccessType::Write);
if (True(image.flags & ImageFlagBits::Tracked)) { if (True(image.flags & ImageFlagBits::Tracked)) {
UntrackImage(image, id); UntrackImage(image, id);
} }
@ -621,6 +624,7 @@ void TextureCache<P>::UnmapGPUMemory(size_t as_id, GPUVAddr gpu_addr, size_t siz
[&](ImageId id, Image&) { deleted_images.push_back(id); }); [&](ImageId id, Image&) { deleted_images.push_back(id); });
for (const ImageId id : deleted_images) { for (const ImageId id : deleted_images) {
Image& image = slot_images[id]; Image& image = slot_images[id];
EnsureImageReady(image, ImageAccessType::Write);
if (False(image.flags & ImageFlagBits::CpuModified)) { if (False(image.flags & ImageFlagBits::CpuModified)) {
image.flags |= ImageFlagBits::CpuModified; image.flags |= ImageFlagBits::CpuModified;
if (True(image.flags & ImageFlagBits::Tracked)) { if (True(image.flags & ImageFlagBits::Tracked)) {
@ -2423,6 +2427,8 @@ void TextureCache<P>::SynchronizeAliases(ImageId image_id) {
template <class P> template <class P>
void TextureCache<P>::PrepareImage(ImageId image_id, bool is_modification, bool invalidate) { void TextureCache<P>::PrepareImage(ImageId image_id, bool is_modification, bool invalidate) {
Image& image = slot_images[image_id]; Image& image = slot_images[image_id];
EnsureImageReady(image, is_modification ? ImageAccessType::Write : ImageAccessType::Read);
TrackGpuImageAccess(image_id, is_modification ? ImageAccessType::Write : ImageAccessType::Read);
runtime.TransitionImageLayout(image); runtime.TransitionImageLayout(image);
if (invalidate) { if (invalidate) {
image.flags &= ~(ImageFlagBits::CpuModified | ImageFlagBits::GpuModified); image.flags &= ~(ImageFlagBits::CpuModified | ImageFlagBits::GpuModified);
@ -2439,6 +2445,85 @@ void TextureCache<P>::PrepareImage(ImageId image_id, bool is_modification, bool
lru_cache.Touch(image.lru_index, frame_tick); lru_cache.Touch(image.lru_index, frame_tick);
} }
template <class P>
void TextureCache<P>::TrackGpuImageAccess(ImageId image_id, ImageAccessType access) {
staged_gpu_accesses.push_back({image_id, access});
}
template <class P>
void TextureCache<P>::CommitPendingGpuAccesses(u64 fence_value) {
if (staged_gpu_accesses.empty()) {
return;
}
if (fence_value == 0) {
return;
}
auto& batch = committed_gpu_accesses.emplace_back(std::move(staged_gpu_accesses));
staged_gpu_accesses.clear();
committed_gpu_ticks.push_back(fence_value);
for (const PendingImageAccess& access : batch) {
ImageBase& image = slot_images[access.image_id];
if (access.access == ImageAccessType::Read) {
image.TrackPendingReadTick(fence_value);
} else {
image.TrackPendingWriteTick(fence_value);
}
}
}
template <class P>
void TextureCache<P>::CompleteGpuAccesses(u64 completed_fence) {
if (completed_fence == 0) {
return;
}
while (!committed_gpu_ticks.empty() && committed_gpu_ticks.front() <= completed_fence) {
auto accesses = std::move(committed_gpu_accesses.front());
committed_gpu_accesses.pop_front();
committed_gpu_ticks.pop_front();
for (const PendingImageAccess& access : accesses) {
if (!slot_images.Contains(access.image_id)) {
continue;
}
ImageBase& image = slot_images[access.image_id];
if (access.access == ImageAccessType::Read) {
image.ClearPendingReadTick(completed_fence);
} else {
image.ClearPendingWriteTick(completed_fence);
}
}
}
}
template <class P>
void TextureCache<P>::EnsureImageReady(ImageBase& image, ImageAccessType access) {
auto wait_tick = [this](std::optional<u64> tick) -> std::optional<u64> {
if (!tick) {
return std::nullopt;
}
runtime.WaitForGpuTick(*tick);
return tick;
};
if (access == ImageAccessType::Write) {
if (const auto tick = image.PendingReadTick()) {
if (const auto waited = wait_tick(tick)) {
image.ClearPendingReadTick(*waited);
}
}
if (const auto tick = image.PendingWriteTick()) {
if (const auto waited = wait_tick(tick)) {
image.ClearPendingWriteTick(*waited);
}
}
} else {
if (const auto tick = image.PendingWriteTick()) {
if (const auto waited = wait_tick(tick)) {
image.ClearPendingWriteTick(*waited);
}
}
}
}
template <class P> template <class P>
void TextureCache<P>::PrepareImageView(ImageViewId image_view_id, bool is_modification, void TextureCache<P>::PrepareImageView(ImageViewId image_view_id, bool is_modification,
bool invalidate) { bool invalidate) {
@ -2456,6 +2541,8 @@ template <class P>
void TextureCache<P>::CopyImage(ImageId dst_id, ImageId src_id, std::vector<ImageCopy> copies) { void TextureCache<P>::CopyImage(ImageId dst_id, ImageId src_id, std::vector<ImageCopy> copies) {
Image& dst = slot_images[dst_id]; Image& dst = slot_images[dst_id];
Image& src = slot_images[src_id]; Image& src = slot_images[src_id];
EnsureImageReady(dst, ImageAccessType::Write);
EnsureImageReady(src, ImageAccessType::Read);
const bool is_rescaled = True(src.flags & ImageFlagBits::Rescaled); const bool is_rescaled = True(src.flags & ImageFlagBits::Rescaled);
if (is_rescaled) { if (is_rescaled) {
ASSERT(True(dst.flags & ImageFlagBits::Rescaled)); ASSERT(True(dst.flags & ImageFlagBits::Rescaled));
@ -2472,20 +2559,30 @@ void TextureCache<P>::CopyImage(ImageId dst_id, ImageId src_id, std::vector<Imag
} }
} }
} }
const auto TrackCopyAccesses = [this, dst_id, src_id]() {
TrackGpuImageAccess(dst_id, ImageAccessType::Write);
TrackGpuImageAccess(src_id, ImageAccessType::Read);
};
const auto dst_format_type = GetFormatType(dst.info.format); const auto dst_format_type = GetFormatType(dst.info.format);
const auto src_format_type = GetFormatType(src.info.format); const auto src_format_type = GetFormatType(src.info.format);
if (src_format_type == dst_format_type) { if (src_format_type == dst_format_type) {
if constexpr (HAS_EMULATED_COPIES) { if constexpr (HAS_EMULATED_COPIES) {
if (!runtime.CanImageBeCopied(dst, src)) { if (!runtime.CanImageBeCopied(dst, src)) {
return runtime.EmulateCopyImage(dst, src, copies);
runtime.EmulateCopyImage(dst, src, copies);
TrackCopyAccesses();
return;
} }
} }
return runtime.CopyImage(dst, src, copies);
runtime.CopyImage(dst, src, copies);
TrackCopyAccesses();
return;
} }
UNIMPLEMENTED_IF(dst.info.type != ImageType::e2D); UNIMPLEMENTED_IF(dst.info.type != ImageType::e2D);
UNIMPLEMENTED_IF(src.info.type != ImageType::e2D); UNIMPLEMENTED_IF(src.info.type != ImageType::e2D);
if (runtime.ShouldReinterpret(dst, src)) { if (runtime.ShouldReinterpret(dst, src)) {
return runtime.ReinterpretImage(dst, src, copies);
runtime.ReinterpretImage(dst, src, copies);
TrackCopyAccesses();
return;
} }
for (const ImageCopy& copy : copies) { for (const ImageCopy& copy : copies) {
UNIMPLEMENTED_IF(copy.dst_subresource.num_layers != 1); UNIMPLEMENTED_IF(copy.dst_subresource.num_layers != 1);
@ -2538,6 +2635,7 @@ void TextureCache<P>::CopyImage(ImageId dst_id, ImageId src_id, std::vector<Imag
runtime.ConvertImage(dst_framebuffer, dst_view, src_view); runtime.ConvertImage(dst_framebuffer, dst_view, src_view);
} }
TrackCopyAccesses();
} }
template <class P> template <class P>

19
src/video_core/texture_cache/texture_cache_base.h

@ -263,6 +263,15 @@ public:
/// Prepare an image to be used /// Prepare an image to be used
void PrepareImage(ImageId image_id, bool is_modification, bool invalidate); void PrepareImage(ImageId image_id, bool is_modification, bool invalidate);
/// Track that an image participates in upcoming GPU work with the given access type
void TrackGpuImageAccess(ImageId image_id, ImageAccessType access);
/// Notify the cache that tracked GPU work has been submitted with the specified fence value
void CommitPendingGpuAccesses(u64 fence_value);
/// Notify the cache that a fence value has completed so tracked accesses can be released
void CompleteGpuAccesses(u64 completed_fence);
std::recursive_mutex mutex; std::recursive_mutex mutex;
private: private:
@ -413,6 +422,8 @@ private:
/// Execute copies from one image to the other, even if they are incompatible /// Execute copies from one image to the other, even if they are incompatible
void CopyImage(ImageId dst_id, ImageId src_id, std::vector<ImageCopy> copies); void CopyImage(ImageId dst_id, ImageId src_id, std::vector<ImageCopy> copies);
void EnsureImageReady(ImageBase& image, ImageAccessType access);
/// Bind an image view as render target, downloading resources preemtively if needed /// Bind an image view as render target, downloading resources preemtively if needed
void BindRenderTarget(ImageViewId* old_id, ImageViewId new_id); void BindRenderTarget(ImageViewId* old_id, ImageViewId new_id);
@ -470,6 +481,11 @@ private:
Common::SlotId object_id; Common::SlotId object_id;
}; };
struct PendingImageAccess {
ImageId image_id;
ImageAccessType access;
};
Common::SlotVector<Image> slot_images; Common::SlotVector<Image> slot_images;
Common::SlotVector<ImageMapView> slot_map_views; Common::SlotVector<ImageMapView> slot_map_views;
Common::SlotVector<ImageView> slot_image_views; Common::SlotVector<ImageView> slot_image_views;
@ -485,6 +501,9 @@ private:
std::vector<AsyncBuffer> uncommitted_async_buffers; std::vector<AsyncBuffer> uncommitted_async_buffers;
std::deque<std::vector<AsyncBuffer>> async_buffers; std::deque<std::vector<AsyncBuffer>> async_buffers;
std::deque<AsyncBuffer> async_buffers_death_ring; std::deque<AsyncBuffer> async_buffers_death_ring;
std::vector<PendingImageAccess> staged_gpu_accesses;
std::deque<std::vector<PendingImageAccess>> committed_gpu_accesses;
std::deque<u64> committed_gpu_ticks;
struct LRUItemParams { struct LRUItemParams {
using ObjectType = ImageId; using ObjectType = ImageId;

5
src/video_core/texture_cache/types.h

@ -155,4 +155,9 @@ struct SwizzleParameters {
s32 level; s32 level;
}; };
enum class ImageAccessType : u8 {
Read,
Write,
};
} // namespace VideoCommon } // namespace VideoCommon
Loading…
Cancel
Save