Browse Source
Merge pull request #10942 from FernandoS27/android-is-a-pain-in-the-a--
Merge pull request #10942 from FernandoS27/android-is-a-pain-in-the-a--
Memory Tracking: Add mechanism to register small writes when gpu page is contested by GPUnce_cpp
committed by
GitHub
20 changed files with 329 additions and 41 deletions
-
32src/core/core.cpp
-
11src/core/core.h
-
122src/core/gpu_dirty_memory_manager.h
-
40src/core/memory.cpp
-
6src/core/memory.h
-
39src/video_core/buffer_cache/buffer_cache.h
-
5src/video_core/buffer_cache/buffer_cache_base.h
-
2src/video_core/fence_manager.h
-
12src/video_core/gpu.cpp
-
4src/video_core/gpu.h
-
6src/video_core/gpu_thread.cpp
-
4src/video_core/rasterizer_interface.h
-
5src/video_core/renderer_null/null_rasterizer.cpp
-
3src/video_core/renderer_null/null_rasterizer.h
-
35src/video_core/renderer_opengl/gl_rasterizer.cpp
-
3src/video_core/renderer_opengl/gl_rasterizer.h
-
34src/video_core/renderer_vulkan/vk_rasterizer.cpp
-
3src/video_core/renderer_vulkan/vk_rasterizer.h
-
2src/video_core/shader_cache.cpp
-
2src/video_core/shader_cache.h
@ -0,0 +1,122 @@ |
|||
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project |
|||
// SPDX-License-Identifier: GPL-3.0-or-later |
|||
|
|||
#pragma once |
|||
|
|||
#include <atomic> |
|||
#include <bit> |
|||
#include <functional> |
|||
#include <mutex> |
|||
#include <utility> |
|||
#include <vector> |
|||
|
|||
#include "core/memory.h" |
|||
|
|||
namespace Core { |
|||
|
|||
class GPUDirtyMemoryManager { |
|||
public: |
|||
GPUDirtyMemoryManager() : current{default_transform} { |
|||
back_buffer.reserve(256); |
|||
front_buffer.reserve(256); |
|||
} |
|||
|
|||
~GPUDirtyMemoryManager() = default; |
|||
|
|||
void Collect(VAddr address, size_t size) { |
|||
TransformAddress t = BuildTransform(address, size); |
|||
TransformAddress tmp, original; |
|||
do { |
|||
tmp = current.load(std::memory_order_acquire); |
|||
original = tmp; |
|||
if (tmp.address != t.address) { |
|||
if (IsValid(tmp.address)) { |
|||
std::scoped_lock lk(guard); |
|||
back_buffer.emplace_back(tmp); |
|||
current.exchange(t, std::memory_order_relaxed); |
|||
return; |
|||
} |
|||
tmp.address = t.address; |
|||
tmp.mask = 0; |
|||
} |
|||
if ((tmp.mask | t.mask) == tmp.mask) { |
|||
return; |
|||
} |
|||
tmp.mask |= t.mask; |
|||
} while (!current.compare_exchange_weak(original, tmp, std::memory_order_release, |
|||
std::memory_order_relaxed)); |
|||
} |
|||
|
|||
void Gather(std::function<void(VAddr, size_t)>& callback) { |
|||
{ |
|||
std::scoped_lock lk(guard); |
|||
TransformAddress t = current.exchange(default_transform, std::memory_order_relaxed); |
|||
front_buffer.swap(back_buffer); |
|||
if (IsValid(t.address)) { |
|||
front_buffer.emplace_back(t); |
|||
} |
|||
} |
|||
for (auto& transform : front_buffer) { |
|||
size_t offset = 0; |
|||
u64 mask = transform.mask; |
|||
while (mask != 0) { |
|||
const size_t empty_bits = std::countr_zero(mask); |
|||
offset += empty_bits << align_bits; |
|||
mask = mask >> empty_bits; |
|||
|
|||
const size_t continuous_bits = std::countr_one(mask); |
|||
callback((static_cast<VAddr>(transform.address) << page_bits) + offset, |
|||
continuous_bits << align_bits); |
|||
mask = continuous_bits < align_size ? (mask >> continuous_bits) : 0; |
|||
offset += continuous_bits << align_bits; |
|||
} |
|||
} |
|||
front_buffer.clear(); |
|||
} |
|||
|
|||
private: |
|||
struct alignas(8) TransformAddress { |
|||
u32 address; |
|||
u32 mask; |
|||
}; |
|||
|
|||
constexpr static size_t page_bits = Memory::YUZU_PAGEBITS - 1; |
|||
constexpr static size_t page_size = 1ULL << page_bits; |
|||
constexpr static size_t page_mask = page_size - 1; |
|||
|
|||
constexpr static size_t align_bits = 6U; |
|||
constexpr static size_t align_size = 1U << align_bits; |
|||
constexpr static size_t align_mask = align_size - 1; |
|||
constexpr static TransformAddress default_transform = {.address = ~0U, .mask = 0U}; |
|||
|
|||
bool IsValid(VAddr address) { |
|||
return address < (1ULL << 39); |
|||
} |
|||
|
|||
template <typename T> |
|||
T CreateMask(size_t top_bit, size_t minor_bit) { |
|||
T mask = ~T(0); |
|||
mask <<= (sizeof(T) * 8 - top_bit); |
|||
mask >>= (sizeof(T) * 8 - top_bit); |
|||
mask >>= minor_bit; |
|||
mask <<= minor_bit; |
|||
return mask; |
|||
} |
|||
|
|||
TransformAddress BuildTransform(VAddr address, size_t size) { |
|||
const size_t minor_address = address & page_mask; |
|||
const size_t minor_bit = minor_address >> align_bits; |
|||
const size_t top_bit = (minor_address + size + align_mask) >> align_bits; |
|||
TransformAddress result{}; |
|||
result.address = static_cast<u32>(address >> page_bits); |
|||
result.mask = CreateMask<u32>(top_bit, minor_bit); |
|||
return result; |
|||
} |
|||
|
|||
std::atomic<TransformAddress> current{}; |
|||
std::mutex guard; |
|||
std::vector<TransformAddress> back_buffer; |
|||
std::vector<TransformAddress> front_buffer; |
|||
}; |
|||
|
|||
} // namespace Core |
|||
Write
Preview
Loading…
Cancel
Save
Reference in new issue