Browse Source
Merge pull request #3986 from ReinUsesLisp/shader-cache
Merge pull request #3986 from ReinUsesLisp/shader-cache
shader_cache: Implement a generic runtime shader cachence_cpp
committed by
GitHub
14 changed files with 364 additions and 417 deletions
-
3src/video_core/CMakeLists.txt
-
7src/video_core/rasterizer_cache.cpp
-
253src/video_core/rasterizer_cache.h
-
1src/video_core/renderer_opengl/gl_buffer_cache.h
-
19src/video_core/renderer_opengl/gl_rasterizer.cpp
-
17src/video_core/renderer_opengl/gl_rasterizer.h
-
87src/video_core/renderer_opengl/gl_shader_cache.cpp
-
51src/video_core/renderer_opengl/gl_shader_cache.h
-
1src/video_core/renderer_vulkan/vk_buffer_cache.h
-
72src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
-
33src/video_core/renderer_vulkan/vk_pipeline_cache.h
-
7src/video_core/renderer_vulkan/vk_rasterizer.cpp
-
2src/video_core/renderer_vulkan/vk_rasterizer.h
-
228src/video_core/shader_cache.h
@ -1,7 +0,0 @@ |
|||
// Copyright 2018 yuzu Emulator Project
|
|||
// Licensed under GPLv2 or any later version
|
|||
// Refer to the license.txt file included.
|
|||
|
|||
#include "video_core/rasterizer_cache.h"
|
|||
|
|||
RasterizerCacheObject::~RasterizerCacheObject() = default; |
|||
@ -1,253 +0,0 @@ |
|||
// Copyright 2018 yuzu Emulator Project |
|||
// Licensed under GPLv2 or any later version |
|||
// Refer to the license.txt file included. |
|||
|
|||
#pragma once |
|||
|
|||
#include <mutex> |
|||
#include <set> |
|||
#include <unordered_map> |
|||
|
|||
#include <boost/icl/interval_map.hpp> |
|||
#include <boost/range/iterator_range_core.hpp> |
|||
|
|||
#include "common/common_types.h" |
|||
#include "core/settings.h" |
|||
#include "video_core/gpu.h" |
|||
#include "video_core/rasterizer_interface.h" |
|||
|
|||
class RasterizerCacheObject { |
|||
public: |
|||
explicit RasterizerCacheObject(const VAddr cpu_addr) : cpu_addr{cpu_addr} {} |
|||
|
|||
virtual ~RasterizerCacheObject(); |
|||
|
|||
VAddr GetCpuAddr() const { |
|||
return cpu_addr; |
|||
} |
|||
|
|||
/// Gets the size of the shader in guest memory, required for cache management |
|||
virtual std::size_t GetSizeInBytes() const = 0; |
|||
|
|||
/// Sets whether the cached object should be considered registered |
|||
void SetIsRegistered(bool registered) { |
|||
is_registered = registered; |
|||
} |
|||
|
|||
/// Returns true if the cached object is registered |
|||
bool IsRegistered() const { |
|||
return is_registered; |
|||
} |
|||
|
|||
/// Returns true if the cached object is dirty |
|||
bool IsDirty() const { |
|||
return is_dirty; |
|||
} |
|||
|
|||
/// Returns ticks from when this cached object was last modified |
|||
u64 GetLastModifiedTicks() const { |
|||
return last_modified_ticks; |
|||
} |
|||
|
|||
/// Marks an object as recently modified, used to specify whether it is clean or dirty |
|||
template <class T> |
|||
void MarkAsModified(bool dirty, T& cache) { |
|||
is_dirty = dirty; |
|||
last_modified_ticks = cache.GetModifiedTicks(); |
|||
} |
|||
|
|||
void SetMemoryMarked(bool is_memory_marked_) { |
|||
is_memory_marked = is_memory_marked_; |
|||
} |
|||
|
|||
bool IsMemoryMarked() const { |
|||
return is_memory_marked; |
|||
} |
|||
|
|||
void SetSyncPending(bool is_sync_pending_) { |
|||
is_sync_pending = is_sync_pending_; |
|||
} |
|||
|
|||
bool IsSyncPending() const { |
|||
return is_sync_pending; |
|||
} |
|||
|
|||
private: |
|||
bool is_registered{}; ///< Whether the object is currently registered with the cache |
|||
bool is_dirty{}; ///< Whether the object is dirty (out of sync with guest memory) |
|||
bool is_memory_marked{}; ///< Whether the object is marking rasterizer memory. |
|||
bool is_sync_pending{}; ///< Whether the object is pending deletion. |
|||
u64 last_modified_ticks{}; ///< When the object was last modified, used for in-order flushing |
|||
VAddr cpu_addr{}; ///< Cpu address memory, unique from emulated virtual address space |
|||
}; |
|||
|
|||
template <class T> |
|||
class RasterizerCache : NonCopyable { |
|||
friend class RasterizerCacheObject; |
|||
|
|||
public: |
|||
explicit RasterizerCache(VideoCore::RasterizerInterface& rasterizer) : rasterizer{rasterizer} {} |
|||
|
|||
/// Write any cached resources overlapping the specified region back to memory |
|||
void FlushRegion(VAddr addr, std::size_t size) { |
|||
std::lock_guard lock{mutex}; |
|||
|
|||
const auto& objects{GetSortedObjectsFromRegion(addr, size)}; |
|||
for (auto& object : objects) { |
|||
FlushObject(object); |
|||
} |
|||
} |
|||
|
|||
/// Mark the specified region as being invalidated |
|||
void InvalidateRegion(VAddr addr, u64 size) { |
|||
std::lock_guard lock{mutex}; |
|||
|
|||
const auto& objects{GetSortedObjectsFromRegion(addr, size)}; |
|||
for (auto& object : objects) { |
|||
if (!object->IsRegistered()) { |
|||
// Skip duplicates |
|||
continue; |
|||
} |
|||
Unregister(object); |
|||
} |
|||
} |
|||
|
|||
void OnCPUWrite(VAddr addr, std::size_t size) { |
|||
std::lock_guard lock{mutex}; |
|||
|
|||
for (const auto& object : GetSortedObjectsFromRegion(addr, size)) { |
|||
if (object->IsRegistered()) { |
|||
UnmarkMemory(object); |
|||
object->SetSyncPending(true); |
|||
marked_for_unregister.emplace_back(object); |
|||
} |
|||
} |
|||
} |
|||
|
|||
void SyncGuestHost() { |
|||
std::lock_guard lock{mutex}; |
|||
|
|||
for (const auto& object : marked_for_unregister) { |
|||
if (object->IsRegistered()) { |
|||
object->SetSyncPending(false); |
|||
Unregister(object); |
|||
} |
|||
} |
|||
marked_for_unregister.clear(); |
|||
} |
|||
|
|||
/// Invalidates everything in the cache |
|||
void InvalidateAll() { |
|||
std::lock_guard lock{mutex}; |
|||
|
|||
while (interval_cache.begin() != interval_cache.end()) { |
|||
Unregister(*interval_cache.begin()->second.begin()); |
|||
} |
|||
} |
|||
|
|||
protected: |
|||
/// Tries to get an object from the cache with the specified cache address |
|||
T TryGet(VAddr addr) const { |
|||
const auto iter = map_cache.find(addr); |
|||
if (iter != map_cache.end()) |
|||
return iter->second; |
|||
return nullptr; |
|||
} |
|||
|
|||
/// Register an object into the cache |
|||
virtual void Register(const T& object) { |
|||
std::lock_guard lock{mutex}; |
|||
|
|||
object->SetIsRegistered(true); |
|||
interval_cache.add({GetInterval(object), ObjectSet{object}}); |
|||
map_cache.insert({object->GetCpuAddr(), object}); |
|||
rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), 1); |
|||
object->SetMemoryMarked(true); |
|||
} |
|||
|
|||
/// Unregisters an object from the cache |
|||
virtual void Unregister(const T& object) { |
|||
std::lock_guard lock{mutex}; |
|||
|
|||
UnmarkMemory(object); |
|||
object->SetIsRegistered(false); |
|||
if (object->IsSyncPending()) { |
|||
marked_for_unregister.remove(object); |
|||
object->SetSyncPending(false); |
|||
} |
|||
const VAddr addr = object->GetCpuAddr(); |
|||
interval_cache.subtract({GetInterval(object), ObjectSet{object}}); |
|||
map_cache.erase(addr); |
|||
} |
|||
|
|||
void UnmarkMemory(const T& object) { |
|||
if (!object->IsMemoryMarked()) { |
|||
return; |
|||
} |
|||
rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), -1); |
|||
object->SetMemoryMarked(false); |
|||
} |
|||
|
|||
/// Returns a ticks counter used for tracking when cached objects were last modified |
|||
u64 GetModifiedTicks() { |
|||
std::lock_guard lock{mutex}; |
|||
|
|||
return ++modified_ticks; |
|||
} |
|||
|
|||
virtual void FlushObjectInner(const T& object) = 0; |
|||
|
|||
/// Flushes the specified object, updating appropriate cache state as needed |
|||
void FlushObject(const T& object) { |
|||
std::lock_guard lock{mutex}; |
|||
|
|||
if (!object->IsDirty()) { |
|||
return; |
|||
} |
|||
FlushObjectInner(object); |
|||
object->MarkAsModified(false, *this); |
|||
} |
|||
|
|||
std::recursive_mutex mutex; |
|||
|
|||
private: |
|||
/// Returns a list of cached objects from the specified memory region, ordered by access time |
|||
std::vector<T> GetSortedObjectsFromRegion(VAddr addr, u64 size) { |
|||
if (size == 0) { |
|||
return {}; |
|||
} |
|||
|
|||
std::vector<T> objects; |
|||
const ObjectInterval interval{addr, addr + size}; |
|||
for (auto& pair : boost::make_iterator_range(interval_cache.equal_range(interval))) { |
|||
for (auto& cached_object : pair.second) { |
|||
if (!cached_object) { |
|||
continue; |
|||
} |
|||
objects.push_back(cached_object); |
|||
} |
|||
} |
|||
|
|||
std::sort(objects.begin(), objects.end(), [](const T& a, const T& b) -> bool { |
|||
return a->GetLastModifiedTicks() < b->GetLastModifiedTicks(); |
|||
}); |
|||
|
|||
return objects; |
|||
} |
|||
|
|||
using ObjectSet = std::set<T>; |
|||
using ObjectCache = std::unordered_map<VAddr, T>; |
|||
using IntervalCache = boost::icl::interval_map<VAddr, ObjectSet>; |
|||
using ObjectInterval = typename IntervalCache::interval_type; |
|||
|
|||
static auto GetInterval(const T& object) { |
|||
return ObjectInterval::right_open(object->GetCpuAddr(), |
|||
object->GetCpuAddr() + object->GetSizeInBytes()); |
|||
} |
|||
|
|||
ObjectCache map_cache; |
|||
IntervalCache interval_cache; ///< Cache of objects |
|||
u64 modified_ticks{}; ///< Counter of cache state ticks, used for in-order flushing |
|||
VideoCore::RasterizerInterface& rasterizer; |
|||
std::list<T> marked_for_unregister; |
|||
}; |
|||
@ -0,0 +1,228 @@ |
|||
// Copyright 2020 yuzu Emulator Project |
|||
// Licensed under GPLv2 or any later version |
|||
// Refer to the license.txt file included. |
|||
|
|||
#pragma once |
|||
|
|||
#include <algorithm> |
|||
#include <memory> |
|||
#include <mutex> |
|||
#include <unordered_map> |
|||
#include <utility> |
|||
#include <vector> |
|||
|
|||
#include "common/assert.h" |
|||
#include "common/common_types.h" |
|||
#include "video_core/rasterizer_interface.h" |
|||
|
|||
namespace VideoCommon { |
|||
|
|||
template <class T> |
|||
class ShaderCache { |
|||
static constexpr u64 PAGE_SHIFT = 14; |
|||
|
|||
struct Entry { |
|||
VAddr addr_start; |
|||
VAddr addr_end; |
|||
T* data; |
|||
|
|||
bool is_memory_marked = true; |
|||
|
|||
constexpr bool Overlaps(VAddr start, VAddr end) const noexcept { |
|||
return start < addr_end && addr_start < end; |
|||
} |
|||
}; |
|||
|
|||
public: |
|||
virtual ~ShaderCache() = default; |
|||
|
|||
/// @brief Removes shaders inside a given region |
|||
/// @note Checks for ranges |
|||
/// @param addr Start address of the invalidation |
|||
/// @param size Number of bytes of the invalidation |
|||
void InvalidateRegion(VAddr addr, std::size_t size) { |
|||
std::scoped_lock lock{invalidation_mutex}; |
|||
InvalidatePagesInRegion(addr, size); |
|||
RemovePendingShaders(); |
|||
} |
|||
|
|||
/// @brief Unmarks a memory region as cached and marks it for removal |
|||
/// @param addr Start address of the CPU write operation |
|||
/// @param size Number of bytes of the CPU write operation |
|||
void OnCPUWrite(VAddr addr, std::size_t size) { |
|||
std::lock_guard lock{invalidation_mutex}; |
|||
InvalidatePagesInRegion(addr, size); |
|||
} |
|||
|
|||
/// @brief Flushes delayed removal operations |
|||
void SyncGuestHost() { |
|||
std::scoped_lock lock{invalidation_mutex}; |
|||
RemovePendingShaders(); |
|||
} |
|||
|
|||
/// @brief Tries to obtain a cached shader starting in a given address |
|||
/// @note Doesn't check for ranges, the given address has to be the start of the shader |
|||
/// @param addr Start address of the shader, this doesn't cache for region |
|||
/// @return Pointer to a valid shader, nullptr when nothing is found |
|||
T* TryGet(VAddr addr) const { |
|||
std::scoped_lock lock{lookup_mutex}; |
|||
|
|||
const auto it = lookup_cache.find(addr); |
|||
if (it == lookup_cache.end()) { |
|||
return nullptr; |
|||
} |
|||
return it->second->data; |
|||
} |
|||
|
|||
protected: |
|||
explicit ShaderCache(VideoCore::RasterizerInterface& rasterizer_) : rasterizer{rasterizer_} {} |
|||
|
|||
/// @brief Register in the cache a given entry |
|||
/// @param data Shader to store in the cache |
|||
/// @param addr Start address of the shader that will be registered |
|||
/// @param size Size in bytes of the shader |
|||
void Register(std::unique_ptr<T> data, VAddr addr, std::size_t size) { |
|||
std::scoped_lock lock{invalidation_mutex, lookup_mutex}; |
|||
|
|||
const VAddr addr_end = addr + size; |
|||
Entry* const entry = NewEntry(addr, addr_end, data.get()); |
|||
|
|||
const u64 page_end = addr_end >> PAGE_SHIFT; |
|||
for (u64 page = addr >> PAGE_SHIFT; page <= page_end; ++page) { |
|||
invalidation_cache[page].push_back(entry); |
|||
} |
|||
|
|||
storage.push_back(std::move(data)); |
|||
|
|||
rasterizer.UpdatePagesCachedCount(addr, size, 1); |
|||
} |
|||
|
|||
/// @brief Called when a shader is going to be removed |
|||
/// @param shader Shader that will be removed |
|||
/// @pre invalidation_cache is locked |
|||
/// @pre lookup_mutex is locked |
|||
virtual void OnShaderRemoval([[maybe_unused]] T* shader) {} |
|||
|
|||
private: |
|||
/// @brief Invalidate pages in a given region |
|||
/// @pre invalidation_mutex is locked |
|||
void InvalidatePagesInRegion(VAddr addr, std::size_t size) { |
|||
const VAddr addr_end = addr + size; |
|||
const u64 page_end = addr_end >> PAGE_SHIFT; |
|||
for (u64 page = addr >> PAGE_SHIFT; page <= page_end; ++page) { |
|||
const auto it = invalidation_cache.find(page); |
|||
if (it == invalidation_cache.end()) { |
|||
continue; |
|||
} |
|||
|
|||
std::vector<Entry*>& entries = it->second; |
|||
InvalidatePageEntries(entries, addr, addr_end); |
|||
|
|||
// If there's nothing else in this page, remove it to avoid overpopulating the hash map. |
|||
if (entries.empty()) { |
|||
invalidation_cache.erase(it); |
|||
} |
|||
} |
|||
} |
|||
|
|||
/// @brief Remove shaders marked for deletion |
|||
/// @pre invalidation_mutex is locked |
|||
void RemovePendingShaders() { |
|||
if (marked_for_removal.empty()) { |
|||
return; |
|||
} |
|||
std::scoped_lock lock{lookup_mutex}; |
|||
|
|||
std::vector<T*> removed_shaders; |
|||
removed_shaders.reserve(marked_for_removal.size()); |
|||
|
|||
for (Entry* const entry : marked_for_removal) { |
|||
if (lookup_cache.erase(entry->addr_start) > 0) { |
|||
removed_shaders.push_back(entry->data); |
|||
} |
|||
} |
|||
marked_for_removal.clear(); |
|||
|
|||
if (!removed_shaders.empty()) { |
|||
RemoveShadersFromStorage(std::move(removed_shaders)); |
|||
} |
|||
} |
|||
|
|||
/// @brief Invalidates entries in a given range for the passed page |
|||
/// @param entries Vector of entries in the page, it will be modified on overlaps |
|||
/// @param addr Start address of the invalidation |
|||
/// @param addr_end Non-inclusive end address of the invalidation |
|||
/// @pre invalidation_mutex is locked |
|||
void InvalidatePageEntries(std::vector<Entry*>& entries, VAddr addr, VAddr addr_end) { |
|||
auto it = entries.begin(); |
|||
while (it != entries.end()) { |
|||
Entry* const entry = *it; |
|||
if (!entry->Overlaps(addr, addr_end)) { |
|||
++it; |
|||
continue; |
|||
} |
|||
UnmarkMemory(entry); |
|||
marked_for_removal.push_back(entry); |
|||
|
|||
it = entries.erase(it); |
|||
} |
|||
} |
|||
|
|||
/// @brief Unmarks an entry from the rasterizer cache |
|||
/// @param entry Entry to unmark from memory |
|||
void UnmarkMemory(Entry* entry) { |
|||
if (!entry->is_memory_marked) { |
|||
return; |
|||
} |
|||
entry->is_memory_marked = false; |
|||
|
|||
const VAddr addr = entry->addr_start; |
|||
const std::size_t size = entry->addr_end - addr; |
|||
rasterizer.UpdatePagesCachedCount(addr, size, -1); |
|||
} |
|||
|
|||
/// @brief Removes a vector of shaders from a list |
|||
/// @param removed_shaders Shaders to be removed from the storage, it can contain duplicates |
|||
/// @pre invalidation_mutex is locked |
|||
/// @pre lookup_mutex is locked |
|||
void RemoveShadersFromStorage(std::vector<T*> removed_shaders) { |
|||
// Remove duplicates |
|||
std::sort(removed_shaders.begin(), removed_shaders.end()); |
|||
removed_shaders.erase(std::unique(removed_shaders.begin(), removed_shaders.end()), |
|||
removed_shaders.end()); |
|||
|
|||
// Now that there are no duplicates, we can notify removals |
|||
for (T* const shader : removed_shaders) { |
|||
OnShaderRemoval(shader); |
|||
} |
|||
|
|||
// Remove them from the cache |
|||
const auto is_removed = [&removed_shaders](std::unique_ptr<T>& shader) { |
|||
return std::find(removed_shaders.begin(), removed_shaders.end(), shader.get()) != |
|||
removed_shaders.end(); |
|||
}; |
|||
storage.erase(std::remove_if(storage.begin(), storage.end(), is_removed), storage.end()); |
|||
} |
|||
|
|||
/// @brief Creates a new entry in the lookup cache and returns its pointer |
|||
/// @pre lookup_mutex is locked |
|||
Entry* NewEntry(VAddr addr, VAddr addr_end, T* data) { |
|||
auto entry = std::make_unique<Entry>(Entry{addr, addr_end, data}); |
|||
Entry* const entry_pointer = entry.get(); |
|||
|
|||
lookup_cache.emplace(addr, std::move(entry)); |
|||
return entry_pointer; |
|||
} |
|||
|
|||
VideoCore::RasterizerInterface& rasterizer; |
|||
|
|||
mutable std::mutex lookup_mutex; |
|||
std::mutex invalidation_mutex; |
|||
|
|||
std::unordered_map<u64, std::unique_ptr<Entry>> lookup_cache; |
|||
std::unordered_map<u64, std::vector<Entry*>> invalidation_cache; |
|||
std::vector<std::unique_ptr<T>> storage; |
|||
std::vector<Entry*> marked_for_removal; |
|||
}; |
|||
|
|||
} // namespace VideoCommon |
|||
Write
Preview
Loading…
Cancel
Save
Reference in new issue