Browse Source
Merge pull request #2783 from FernandoS27/new-buffer-cache
Merge pull request #2783 from FernandoS27/new-buffer-cache
Implement a New LLE Buffer Cachence_cpp
committed by
GitHub
9 changed files with 684 additions and 330 deletions
-
4src/video_core/CMakeLists.txt
-
299src/video_core/buffer_cache.h
-
77src/video_core/buffer_cache/buffer_block.h
-
449src/video_core/buffer_cache/buffer_cache.h
-
89src/video_core/buffer_cache/map_interval.h
-
4src/video_core/gpu.h
-
51src/video_core/renderer_opengl/gl_buffer_cache.cpp
-
39src/video_core/renderer_opengl/gl_buffer_cache.h
-
2src/video_core/renderer_opengl/gl_rasterizer.cpp
@ -1,299 +0,0 @@ |
|||||
// Copyright 2019 yuzu Emulator Project |
|
||||
// Licensed under GPLv2 or any later version |
|
||||
// Refer to the license.txt file included. |
|
||||
|
|
||||
#pragma once |
|
||||
|
|
||||
#include <array> |
|
||||
#include <memory> |
|
||||
#include <mutex> |
|
||||
#include <unordered_map> |
|
||||
#include <unordered_set> |
|
||||
#include <utility> |
|
||||
#include <vector> |
|
||||
|
|
||||
#include "common/alignment.h" |
|
||||
#include "common/common_types.h" |
|
||||
#include "core/core.h" |
|
||||
#include "video_core/memory_manager.h" |
|
||||
#include "video_core/rasterizer_cache.h" |
|
||||
|
|
||||
namespace VideoCore { |
|
||||
class RasterizerInterface; |
|
||||
} |
|
||||
|
|
||||
namespace VideoCommon { |
|
||||
|
|
||||
template <typename BufferStorageType> |
|
||||
class CachedBuffer final : public RasterizerCacheObject { |
|
||||
public: |
|
||||
explicit CachedBuffer(VAddr cpu_addr, u8* host_ptr) |
|
||||
: RasterizerCacheObject{host_ptr}, host_ptr{host_ptr}, cpu_addr{cpu_addr} {} |
|
||||
~CachedBuffer() override = default; |
|
||||
|
|
||||
VAddr GetCpuAddr() const override { |
|
||||
return cpu_addr; |
|
||||
} |
|
||||
|
|
||||
std::size_t GetSizeInBytes() const override { |
|
||||
return size; |
|
||||
} |
|
||||
|
|
||||
u8* GetWritableHostPtr() const { |
|
||||
return host_ptr; |
|
||||
} |
|
||||
|
|
||||
std::size_t GetSize() const { |
|
||||
return size; |
|
||||
} |
|
||||
|
|
||||
std::size_t GetCapacity() const { |
|
||||
return capacity; |
|
||||
} |
|
||||
|
|
||||
bool IsInternalized() const { |
|
||||
return is_internal; |
|
||||
} |
|
||||
|
|
||||
const BufferStorageType& GetBuffer() const { |
|
||||
return buffer; |
|
||||
} |
|
||||
|
|
||||
void SetSize(std::size_t new_size) { |
|
||||
size = new_size; |
|
||||
} |
|
||||
|
|
||||
void SetInternalState(bool is_internal_) { |
|
||||
is_internal = is_internal_; |
|
||||
} |
|
||||
|
|
||||
BufferStorageType ExchangeBuffer(BufferStorageType buffer_, std::size_t new_capacity) { |
|
||||
capacity = new_capacity; |
|
||||
std::swap(buffer, buffer_); |
|
||||
return buffer_; |
|
||||
} |
|
||||
|
|
||||
private: |
|
||||
u8* host_ptr{}; |
|
||||
VAddr cpu_addr{}; |
|
||||
std::size_t size{}; |
|
||||
std::size_t capacity{}; |
|
||||
bool is_internal{}; |
|
||||
BufferStorageType buffer; |
|
||||
}; |
|
||||
|
|
||||
template <typename BufferStorageType, typename BufferType, typename StreamBuffer> |
|
||||
class BufferCache : public RasterizerCache<std::shared_ptr<CachedBuffer<BufferStorageType>>> { |
|
||||
public: |
|
||||
using Buffer = std::shared_ptr<CachedBuffer<BufferStorageType>>; |
|
||||
using BufferInfo = std::pair<const BufferType*, u64>; |
|
||||
|
|
||||
explicit BufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system, |
|
||||
std::unique_ptr<StreamBuffer> stream_buffer) |
|
||||
: RasterizerCache<Buffer>{rasterizer}, system{system}, |
|
||||
stream_buffer{std::move(stream_buffer)}, stream_buffer_handle{ |
|
||||
this->stream_buffer->GetHandle()} {} |
|
||||
~BufferCache() = default; |
|
||||
|
|
||||
void Unregister(const Buffer& entry) override { |
|
||||
std::lock_guard lock{RasterizerCache<Buffer>::mutex}; |
|
||||
if (entry->IsInternalized()) { |
|
||||
internalized_entries.erase(entry->GetCacheAddr()); |
|
||||
} |
|
||||
ReserveBuffer(entry); |
|
||||
RasterizerCache<Buffer>::Unregister(entry); |
|
||||
} |
|
||||
|
|
||||
void TickFrame() { |
|
||||
marked_for_destruction_index = |
|
||||
(marked_for_destruction_index + 1) % marked_for_destruction_ring_buffer.size(); |
|
||||
MarkedForDestruction().clear(); |
|
||||
} |
|
||||
|
|
||||
BufferInfo UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4, |
|
||||
bool internalize = false, bool is_written = false) { |
|
||||
std::lock_guard lock{RasterizerCache<Buffer>::mutex}; |
|
||||
|
|
||||
auto& memory_manager = system.GPU().MemoryManager(); |
|
||||
const auto host_ptr = memory_manager.GetPointer(gpu_addr); |
|
||||
if (!host_ptr) { |
|
||||
return {GetEmptyBuffer(size), 0}; |
|
||||
} |
|
||||
const auto cache_addr = ToCacheAddr(host_ptr); |
|
||||
|
|
||||
// Cache management is a big overhead, so only cache entries with a given size. |
|
||||
// TODO: Figure out which size is the best for given games. |
|
||||
constexpr std::size_t max_stream_size = 0x800; |
|
||||
if (!internalize && size < max_stream_size && |
|
||||
internalized_entries.find(cache_addr) == internalized_entries.end()) { |
|
||||
return StreamBufferUpload(host_ptr, size, alignment); |
|
||||
} |
|
||||
|
|
||||
auto entry = RasterizerCache<Buffer>::TryGet(cache_addr); |
|
||||
if (!entry) { |
|
||||
return FixedBufferUpload(gpu_addr, host_ptr, size, internalize, is_written); |
|
||||
} |
|
||||
|
|
||||
if (entry->GetSize() < size) { |
|
||||
IncreaseBufferSize(entry, size); |
|
||||
} |
|
||||
if (is_written) { |
|
||||
entry->MarkAsModified(true, *this); |
|
||||
} |
|
||||
return {ToHandle(entry->GetBuffer()), 0}; |
|
||||
} |
|
||||
|
|
||||
/// Uploads from a host memory. Returns the OpenGL buffer where it's located and its offset. |
|
||||
BufferInfo UploadHostMemory(const void* raw_pointer, std::size_t size, |
|
||||
std::size_t alignment = 4) { |
|
||||
std::lock_guard lock{RasterizerCache<Buffer>::mutex}; |
|
||||
return StreamBufferUpload(raw_pointer, size, alignment); |
|
||||
} |
|
||||
|
|
||||
void Map(std::size_t max_size) { |
|
||||
std::tie(buffer_ptr, buffer_offset_base, invalidated) = stream_buffer->Map(max_size, 4); |
|
||||
buffer_offset = buffer_offset_base; |
|
||||
} |
|
||||
|
|
||||
/// Finishes the upload stream, returns true on bindings invalidation. |
|
||||
bool Unmap() { |
|
||||
stream_buffer->Unmap(buffer_offset - buffer_offset_base); |
|
||||
return std::exchange(invalidated, false); |
|
||||
} |
|
||||
|
|
||||
virtual const BufferType* GetEmptyBuffer(std::size_t size) = 0; |
|
||||
|
|
||||
protected: |
|
||||
void FlushObjectInner(const Buffer& entry) override { |
|
||||
DownloadBufferData(entry->GetBuffer(), 0, entry->GetSize(), entry->GetWritableHostPtr()); |
|
||||
} |
|
||||
|
|
||||
virtual BufferStorageType CreateBuffer(std::size_t size) = 0; |
|
||||
|
|
||||
virtual const BufferType* ToHandle(const BufferStorageType& storage) = 0; |
|
||||
|
|
||||
virtual void UploadBufferData(const BufferStorageType& buffer, std::size_t offset, |
|
||||
std::size_t size, const u8* data) = 0; |
|
||||
|
|
||||
virtual void DownloadBufferData(const BufferStorageType& buffer, std::size_t offset, |
|
||||
std::size_t size, u8* data) = 0; |
|
||||
|
|
||||
virtual void CopyBufferData(const BufferStorageType& src, const BufferStorageType& dst, |
|
||||
std::size_t src_offset, std::size_t dst_offset, |
|
||||
std::size_t size) = 0; |
|
||||
|
|
||||
private: |
|
||||
BufferInfo StreamBufferUpload(const void* raw_pointer, std::size_t size, |
|
||||
std::size_t alignment) { |
|
||||
AlignBuffer(alignment); |
|
||||
const std::size_t uploaded_offset = buffer_offset; |
|
||||
std::memcpy(buffer_ptr, raw_pointer, size); |
|
||||
|
|
||||
buffer_ptr += size; |
|
||||
buffer_offset += size; |
|
||||
return {&stream_buffer_handle, uploaded_offset}; |
|
||||
} |
|
||||
|
|
||||
BufferInfo FixedBufferUpload(GPUVAddr gpu_addr, u8* host_ptr, std::size_t size, |
|
||||
bool internalize, bool is_written) { |
|
||||
auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager(); |
|
||||
const auto cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr); |
|
||||
ASSERT(cpu_addr); |
|
||||
|
|
||||
auto entry = GetUncachedBuffer(*cpu_addr, host_ptr); |
|
||||
entry->SetSize(size); |
|
||||
entry->SetInternalState(internalize); |
|
||||
RasterizerCache<Buffer>::Register(entry); |
|
||||
|
|
||||
if (internalize) { |
|
||||
internalized_entries.emplace(ToCacheAddr(host_ptr)); |
|
||||
} |
|
||||
if (is_written) { |
|
||||
entry->MarkAsModified(true, *this); |
|
||||
} |
|
||||
|
|
||||
if (entry->GetCapacity() < size) { |
|
||||
MarkedForDestruction().push_back(entry->ExchangeBuffer(CreateBuffer(size), size)); |
|
||||
} |
|
||||
|
|
||||
UploadBufferData(entry->GetBuffer(), 0, size, host_ptr); |
|
||||
return {ToHandle(entry->GetBuffer()), 0}; |
|
||||
} |
|
||||
|
|
||||
void IncreaseBufferSize(Buffer& entry, std::size_t new_size) { |
|
||||
const std::size_t old_size = entry->GetSize(); |
|
||||
if (entry->GetCapacity() < new_size) { |
|
||||
const auto& old_buffer = entry->GetBuffer(); |
|
||||
auto new_buffer = CreateBuffer(new_size); |
|
||||
|
|
||||
// Copy bits from the old buffer to the new buffer. |
|
||||
CopyBufferData(old_buffer, new_buffer, 0, 0, old_size); |
|
||||
MarkedForDestruction().push_back( |
|
||||
entry->ExchangeBuffer(std::move(new_buffer), new_size)); |
|
||||
|
|
||||
// This buffer could have been used |
|
||||
invalidated = true; |
|
||||
} |
|
||||
// Upload the new bits. |
|
||||
const std::size_t size_diff = new_size - old_size; |
|
||||
UploadBufferData(entry->GetBuffer(), old_size, size_diff, entry->GetHostPtr() + old_size); |
|
||||
|
|
||||
// Update entry's size in the object and in the cache. |
|
||||
Unregister(entry); |
|
||||
|
|
||||
entry->SetSize(new_size); |
|
||||
RasterizerCache<Buffer>::Register(entry); |
|
||||
} |
|
||||
|
|
||||
Buffer GetUncachedBuffer(VAddr cpu_addr, u8* host_ptr) { |
|
||||
if (auto entry = TryGetReservedBuffer(host_ptr)) { |
|
||||
return entry; |
|
||||
} |
|
||||
return std::make_shared<CachedBuffer<BufferStorageType>>(cpu_addr, host_ptr); |
|
||||
} |
|
||||
|
|
||||
Buffer TryGetReservedBuffer(u8* host_ptr) { |
|
||||
const auto it = buffer_reserve.find(ToCacheAddr(host_ptr)); |
|
||||
if (it == buffer_reserve.end()) { |
|
||||
return {}; |
|
||||
} |
|
||||
auto& reserve = it->second; |
|
||||
auto entry = reserve.back(); |
|
||||
reserve.pop_back(); |
|
||||
return entry; |
|
||||
} |
|
||||
|
|
||||
void ReserveBuffer(Buffer entry) { |
|
||||
buffer_reserve[entry->GetCacheAddr()].push_back(std::move(entry)); |
|
||||
} |
|
||||
|
|
||||
void AlignBuffer(std::size_t alignment) { |
|
||||
// Align the offset, not the mapped pointer |
|
||||
const std::size_t offset_aligned = Common::AlignUp(buffer_offset, alignment); |
|
||||
buffer_ptr += offset_aligned - buffer_offset; |
|
||||
buffer_offset = offset_aligned; |
|
||||
} |
|
||||
|
|
||||
std::vector<BufferStorageType>& MarkedForDestruction() { |
|
||||
return marked_for_destruction_ring_buffer[marked_for_destruction_index]; |
|
||||
} |
|
||||
|
|
||||
Core::System& system; |
|
||||
|
|
||||
std::unique_ptr<StreamBuffer> stream_buffer; |
|
||||
BufferType stream_buffer_handle{}; |
|
||||
|
|
||||
bool invalidated = false; |
|
||||
|
|
||||
u8* buffer_ptr = nullptr; |
|
||||
u64 buffer_offset = 0; |
|
||||
u64 buffer_offset_base = 0; |
|
||||
|
|
||||
std::size_t marked_for_destruction_index = 0; |
|
||||
std::array<std::vector<BufferStorageType>, 4> marked_for_destruction_ring_buffer; |
|
||||
|
|
||||
std::unordered_set<CacheAddr> internalized_entries; |
|
||||
std::unordered_map<CacheAddr, std::vector<Buffer>> buffer_reserve; |
|
||||
}; |
|
||||
|
|
||||
} // namespace VideoCommon |
|
||||
@ -0,0 +1,77 @@ |
|||||
|
// Copyright 2019 yuzu Emulator Project |
||||
|
// Licensed under GPLv2 or any later version |
||||
|
// Refer to the license.txt file included. |
||||
|
|
||||
|
#pragma once |
||||
|
|
||||
|
#include <unordered_set> |
||||
|
#include <utility> |
||||
|
|
||||
|
#include "common/alignment.h" |
||||
|
#include "common/common_types.h" |
||||
|
#include "video_core/gpu.h" |
||||
|
|
||||
|
namespace VideoCommon { |
||||
|
|
||||
|
class BufferBlock { |
||||
|
public: |
||||
|
bool Overlaps(const CacheAddr start, const CacheAddr end) const { |
||||
|
return (cache_addr < end) && (cache_addr_end > start); |
||||
|
} |
||||
|
|
||||
|
bool IsInside(const CacheAddr other_start, const CacheAddr other_end) const { |
||||
|
return cache_addr <= other_start && other_end <= cache_addr_end; |
||||
|
} |
||||
|
|
||||
|
u8* GetWritableHostPtr() const { |
||||
|
return FromCacheAddr(cache_addr); |
||||
|
} |
||||
|
|
||||
|
u8* GetWritableHostPtr(std::size_t offset) const { |
||||
|
return FromCacheAddr(cache_addr + offset); |
||||
|
} |
||||
|
|
||||
|
std::size_t GetOffset(const CacheAddr in_addr) { |
||||
|
return static_cast<std::size_t>(in_addr - cache_addr); |
||||
|
} |
||||
|
|
||||
|
CacheAddr GetCacheAddr() const { |
||||
|
return cache_addr; |
||||
|
} |
||||
|
|
||||
|
CacheAddr GetCacheAddrEnd() const { |
||||
|
return cache_addr_end; |
||||
|
} |
||||
|
|
||||
|
void SetCacheAddr(const CacheAddr new_addr) { |
||||
|
cache_addr = new_addr; |
||||
|
cache_addr_end = new_addr + size; |
||||
|
} |
||||
|
|
||||
|
std::size_t GetSize() const { |
||||
|
return size; |
||||
|
} |
||||
|
|
||||
|
void SetEpoch(u64 new_epoch) { |
||||
|
epoch = new_epoch; |
||||
|
} |
||||
|
|
||||
|
u64 GetEpoch() { |
||||
|
return epoch; |
||||
|
} |
||||
|
|
||||
|
protected: |
||||
|
explicit BufferBlock(CacheAddr cache_addr, const std::size_t size) : size{size} { |
||||
|
SetCacheAddr(cache_addr); |
||||
|
} |
||||
|
~BufferBlock() = default; |
||||
|
|
||||
|
private: |
||||
|
CacheAddr cache_addr{}; |
||||
|
CacheAddr cache_addr_end{}; |
||||
|
u64 pages{}; |
||||
|
std::size_t size{}; |
||||
|
u64 epoch{}; |
||||
|
}; |
||||
|
|
||||
|
} // namespace VideoCommon |
||||
@ -0,0 +1,449 @@ |
|||||
|
// Copyright 2019 yuzu Emulator Project |
||||
|
// Licensed under GPLv2 or any later version |
||||
|
// Refer to the license.txt file included. |
||||
|
|
||||
|
#pragma once |
||||
|
|
||||
|
#include <array> |
||||
|
#include <memory> |
||||
|
#include <mutex> |
||||
|
#include <unordered_map> |
||||
|
#include <unordered_set> |
||||
|
#include <utility> |
||||
|
#include <vector> |
||||
|
|
||||
|
#include "common/alignment.h" |
||||
|
#include "common/common_types.h" |
||||
|
#include "core/core.h" |
||||
|
#include "video_core/buffer_cache/buffer_block.h" |
||||
|
#include "video_core/buffer_cache/map_interval.h" |
||||
|
#include "video_core/memory_manager.h" |
||||
|
|
||||
|
namespace VideoCore { |
||||
|
class RasterizerInterface; |
||||
|
} |
||||
|
|
||||
|
namespace VideoCommon { |
||||
|
|
||||
|
using MapInterval = std::shared_ptr<MapIntervalBase>; |
||||
|
|
||||
|
template <typename TBuffer, typename TBufferType, typename StreamBuffer> |
||||
|
class BufferCache { |
||||
|
public: |
||||
|
using BufferInfo = std::pair<const TBufferType*, u64>; |
||||
|
|
||||
|
BufferInfo UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4, |
||||
|
bool is_written = false) { |
||||
|
std::lock_guard lock{mutex}; |
||||
|
|
||||
|
auto& memory_manager = system.GPU().MemoryManager(); |
||||
|
const auto host_ptr = memory_manager.GetPointer(gpu_addr); |
||||
|
if (!host_ptr) { |
||||
|
return {GetEmptyBuffer(size), 0}; |
||||
|
} |
||||
|
const auto cache_addr = ToCacheAddr(host_ptr); |
||||
|
|
||||
|
// Cache management is a big overhead, so only cache entries with a given size. |
||||
|
// TODO: Figure out which size is the best for given games. |
||||
|
constexpr std::size_t max_stream_size = 0x800; |
||||
|
if (size < max_stream_size) { |
||||
|
if (!is_written && !IsRegionWritten(cache_addr, cache_addr + size - 1)) { |
||||
|
return StreamBufferUpload(host_ptr, size, alignment); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
auto block = GetBlock(cache_addr, size); |
||||
|
auto map = MapAddress(block, gpu_addr, cache_addr, size); |
||||
|
if (is_written) { |
||||
|
map->MarkAsModified(true, GetModifiedTicks()); |
||||
|
if (!map->IsWritten()) { |
||||
|
map->MarkAsWritten(true); |
||||
|
MarkRegionAsWritten(map->GetStart(), map->GetEnd() - 1); |
||||
|
} |
||||
|
} else { |
||||
|
if (map->IsWritten()) { |
||||
|
WriteBarrier(); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
const u64 offset = static_cast<u64>(block->GetOffset(cache_addr)); |
||||
|
|
||||
|
return {ToHandle(block), offset}; |
||||
|
} |
||||
|
|
||||
|
/// Uploads from a host memory. Returns the OpenGL buffer where it's located and its offset. |
||||
|
BufferInfo UploadHostMemory(const void* raw_pointer, std::size_t size, |
||||
|
std::size_t alignment = 4) { |
||||
|
std::lock_guard lock{mutex}; |
||||
|
return StreamBufferUpload(raw_pointer, size, alignment); |
||||
|
} |
||||
|
|
||||
|
void Map(std::size_t max_size) { |
||||
|
std::lock_guard lock{mutex}; |
||||
|
|
||||
|
std::tie(buffer_ptr, buffer_offset_base, invalidated) = stream_buffer->Map(max_size, 4); |
||||
|
buffer_offset = buffer_offset_base; |
||||
|
} |
||||
|
|
||||
|
/// Finishes the upload stream, returns true on bindings invalidation. |
||||
|
bool Unmap() { |
||||
|
std::lock_guard lock{mutex}; |
||||
|
|
||||
|
stream_buffer->Unmap(buffer_offset - buffer_offset_base); |
||||
|
return std::exchange(invalidated, false); |
||||
|
} |
||||
|
|
||||
|
void TickFrame() { |
||||
|
++epoch; |
||||
|
while (!pending_destruction.empty()) { |
||||
|
if (pending_destruction.front()->GetEpoch() + 1 > epoch) { |
||||
|
break; |
||||
|
} |
||||
|
pending_destruction.pop_front(); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
/// Write any cached resources overlapping the specified region back to memory |
||||
|
void FlushRegion(CacheAddr addr, std::size_t size) { |
||||
|
std::lock_guard lock{mutex}; |
||||
|
|
||||
|
std::vector<MapInterval> objects = GetMapsInRange(addr, size); |
||||
|
std::sort(objects.begin(), objects.end(), [](const MapInterval& a, const MapInterval& b) { |
||||
|
return a->GetModificationTick() < b->GetModificationTick(); |
||||
|
}); |
||||
|
for (auto& object : objects) { |
||||
|
if (object->IsModified() && object->IsRegistered()) { |
||||
|
FlushMap(object); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
/// Mark the specified region as being invalidated |
||||
|
void InvalidateRegion(CacheAddr addr, u64 size) { |
||||
|
std::lock_guard lock{mutex}; |
||||
|
|
||||
|
std::vector<MapInterval> objects = GetMapsInRange(addr, size); |
||||
|
for (auto& object : objects) { |
||||
|
if (object->IsRegistered()) { |
||||
|
Unregister(object); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
virtual const TBufferType* GetEmptyBuffer(std::size_t size) = 0; |
||||
|
|
||||
|
protected: |
||||
|
explicit BufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system, |
||||
|
std::unique_ptr<StreamBuffer> stream_buffer) |
||||
|
: rasterizer{rasterizer}, system{system}, stream_buffer{std::move(stream_buffer)}, |
||||
|
stream_buffer_handle{this->stream_buffer->GetHandle()} {} |
||||
|
|
||||
|
~BufferCache() = default; |
||||
|
|
||||
|
virtual const TBufferType* ToHandle(const TBuffer& storage) = 0; |
||||
|
|
||||
|
virtual void WriteBarrier() = 0; |
||||
|
|
||||
|
virtual TBuffer CreateBlock(CacheAddr cache_addr, std::size_t size) = 0; |
||||
|
|
||||
|
virtual void UploadBlockData(const TBuffer& buffer, std::size_t offset, std::size_t size, |
||||
|
const u8* data) = 0; |
||||
|
|
||||
|
virtual void DownloadBlockData(const TBuffer& buffer, std::size_t offset, std::size_t size, |
||||
|
u8* data) = 0; |
||||
|
|
||||
|
virtual void CopyBlock(const TBuffer& src, const TBuffer& dst, std::size_t src_offset, |
||||
|
std::size_t dst_offset, std::size_t size) = 0; |
||||
|
|
||||
|
/// Register an object into the cache |
||||
|
void Register(const MapInterval& new_map, bool inherit_written = false) { |
||||
|
const CacheAddr cache_ptr = new_map->GetStart(); |
||||
|
const std::optional<VAddr> cpu_addr = |
||||
|
system.GPU().MemoryManager().GpuToCpuAddress(new_map->GetGpuAddress()); |
||||
|
if (!cache_ptr || !cpu_addr) { |
||||
|
LOG_CRITICAL(HW_GPU, "Failed to register buffer with unmapped gpu_address 0x{:016x}", |
||||
|
new_map->GetGpuAddress()); |
||||
|
return; |
||||
|
} |
||||
|
const std::size_t size = new_map->GetEnd() - new_map->GetStart(); |
||||
|
new_map->SetCpuAddress(*cpu_addr); |
||||
|
new_map->MarkAsRegistered(true); |
||||
|
const IntervalType interval{new_map->GetStart(), new_map->GetEnd()}; |
||||
|
mapped_addresses.insert({interval, new_map}); |
||||
|
rasterizer.UpdatePagesCachedCount(*cpu_addr, size, 1); |
||||
|
if (inherit_written) { |
||||
|
MarkRegionAsWritten(new_map->GetStart(), new_map->GetEnd() - 1); |
||||
|
new_map->MarkAsWritten(true); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
/// Unregisters an object from the cache |
||||
|
void Unregister(MapInterval& map) { |
||||
|
const std::size_t size = map->GetEnd() - map->GetStart(); |
||||
|
rasterizer.UpdatePagesCachedCount(map->GetCpuAddress(), size, -1); |
||||
|
map->MarkAsRegistered(false); |
||||
|
if (map->IsWritten()) { |
||||
|
UnmarkRegionAsWritten(map->GetStart(), map->GetEnd() - 1); |
||||
|
} |
||||
|
const IntervalType delete_interval{map->GetStart(), map->GetEnd()}; |
||||
|
mapped_addresses.erase(delete_interval); |
||||
|
} |
||||
|
|
||||
|
private: |
||||
|
MapInterval CreateMap(const CacheAddr start, const CacheAddr end, const GPUVAddr gpu_addr) { |
||||
|
return std::make_shared<MapIntervalBase>(start, end, gpu_addr); |
||||
|
} |
||||
|
|
||||
|
MapInterval MapAddress(const TBuffer& block, const GPUVAddr gpu_addr, |
||||
|
const CacheAddr cache_addr, const std::size_t size) { |
||||
|
|
||||
|
std::vector<MapInterval> overlaps = GetMapsInRange(cache_addr, size); |
||||
|
if (overlaps.empty()) { |
||||
|
const CacheAddr cache_addr_end = cache_addr + size; |
||||
|
MapInterval new_map = CreateMap(cache_addr, cache_addr_end, gpu_addr); |
||||
|
u8* host_ptr = FromCacheAddr(cache_addr); |
||||
|
UploadBlockData(block, block->GetOffset(cache_addr), size, host_ptr); |
||||
|
Register(new_map); |
||||
|
return new_map; |
||||
|
} |
||||
|
|
||||
|
const CacheAddr cache_addr_end = cache_addr + size; |
||||
|
if (overlaps.size() == 1) { |
||||
|
MapInterval& current_map = overlaps[0]; |
||||
|
if (current_map->IsInside(cache_addr, cache_addr_end)) { |
||||
|
return current_map; |
||||
|
} |
||||
|
} |
||||
|
CacheAddr new_start = cache_addr; |
||||
|
CacheAddr new_end = cache_addr_end; |
||||
|
bool write_inheritance = false; |
||||
|
bool modified_inheritance = false; |
||||
|
// Calculate new buffer parameters |
||||
|
for (auto& overlap : overlaps) { |
||||
|
new_start = std::min(overlap->GetStart(), new_start); |
||||
|
new_end = std::max(overlap->GetEnd(), new_end); |
||||
|
write_inheritance |= overlap->IsWritten(); |
||||
|
modified_inheritance |= overlap->IsModified(); |
||||
|
} |
||||
|
GPUVAddr new_gpu_addr = gpu_addr + new_start - cache_addr; |
||||
|
for (auto& overlap : overlaps) { |
||||
|
Unregister(overlap); |
||||
|
} |
||||
|
UpdateBlock(block, new_start, new_end, overlaps); |
||||
|
MapInterval new_map = CreateMap(new_start, new_end, new_gpu_addr); |
||||
|
if (modified_inheritance) { |
||||
|
new_map->MarkAsModified(true, GetModifiedTicks()); |
||||
|
} |
||||
|
Register(new_map, write_inheritance); |
||||
|
return new_map; |
||||
|
} |
||||
|
|
||||
|
void UpdateBlock(const TBuffer& block, CacheAddr start, CacheAddr end, |
||||
|
std::vector<MapInterval>& overlaps) { |
||||
|
const IntervalType base_interval{start, end}; |
||||
|
IntervalSet interval_set{}; |
||||
|
interval_set.add(base_interval); |
||||
|
for (auto& overlap : overlaps) { |
||||
|
const IntervalType subtract{overlap->GetStart(), overlap->GetEnd()}; |
||||
|
interval_set.subtract(subtract); |
||||
|
} |
||||
|
for (auto& interval : interval_set) { |
||||
|
std::size_t size = interval.upper() - interval.lower(); |
||||
|
if (size > 0) { |
||||
|
u8* host_ptr = FromCacheAddr(interval.lower()); |
||||
|
UploadBlockData(block, block->GetOffset(interval.lower()), size, host_ptr); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
std::vector<MapInterval> GetMapsInRange(CacheAddr addr, std::size_t size) { |
||||
|
if (size == 0) { |
||||
|
return {}; |
||||
|
} |
||||
|
|
||||
|
std::vector<MapInterval> objects{}; |
||||
|
const IntervalType interval{addr, addr + size}; |
||||
|
for (auto& pair : boost::make_iterator_range(mapped_addresses.equal_range(interval))) { |
||||
|
objects.push_back(pair.second); |
||||
|
} |
||||
|
|
||||
|
return objects; |
||||
|
} |
||||
|
|
||||
|
/// Returns a ticks counter used for tracking when cached objects were last modified |
||||
|
u64 GetModifiedTicks() { |
||||
|
return ++modified_ticks; |
||||
|
} |
||||
|
|
||||
|
void FlushMap(MapInterval map) { |
||||
|
std::size_t size = map->GetEnd() - map->GetStart(); |
||||
|
TBuffer block = blocks[map->GetStart() >> block_page_bits]; |
||||
|
u8* host_ptr = FromCacheAddr(map->GetStart()); |
||||
|
DownloadBlockData(block, block->GetOffset(map->GetStart()), size, host_ptr); |
||||
|
map->MarkAsModified(false, 0); |
||||
|
} |
||||
|
|
||||
|
BufferInfo StreamBufferUpload(const void* raw_pointer, std::size_t size, |
||||
|
std::size_t alignment) { |
||||
|
AlignBuffer(alignment); |
||||
|
const std::size_t uploaded_offset = buffer_offset; |
||||
|
std::memcpy(buffer_ptr, raw_pointer, size); |
||||
|
|
||||
|
buffer_ptr += size; |
||||
|
buffer_offset += size; |
||||
|
return {&stream_buffer_handle, uploaded_offset}; |
||||
|
} |
||||
|
|
||||
|
void AlignBuffer(std::size_t alignment) { |
||||
|
// Align the offset, not the mapped pointer |
||||
|
const std::size_t offset_aligned = Common::AlignUp(buffer_offset, alignment); |
||||
|
buffer_ptr += offset_aligned - buffer_offset; |
||||
|
buffer_offset = offset_aligned; |
||||
|
} |
||||
|
|
||||
|
TBuffer EnlargeBlock(TBuffer buffer) { |
||||
|
const std::size_t old_size = buffer->GetSize(); |
||||
|
const std::size_t new_size = old_size + block_page_size; |
||||
|
const CacheAddr cache_addr = buffer->GetCacheAddr(); |
||||
|
TBuffer new_buffer = CreateBlock(cache_addr, new_size); |
||||
|
CopyBlock(buffer, new_buffer, 0, 0, old_size); |
||||
|
buffer->SetEpoch(epoch); |
||||
|
pending_destruction.push_back(buffer); |
||||
|
const CacheAddr cache_addr_end = cache_addr + new_size - 1; |
||||
|
u64 page_start = cache_addr >> block_page_bits; |
||||
|
const u64 page_end = cache_addr_end >> block_page_bits; |
||||
|
while (page_start <= page_end) { |
||||
|
blocks[page_start] = new_buffer; |
||||
|
++page_start; |
||||
|
} |
||||
|
return new_buffer; |
||||
|
} |
||||
|
|
||||
|
TBuffer MergeBlocks(TBuffer first, TBuffer second) { |
||||
|
const std::size_t size_1 = first->GetSize(); |
||||
|
const std::size_t size_2 = second->GetSize(); |
||||
|
const CacheAddr first_addr = first->GetCacheAddr(); |
||||
|
const CacheAddr second_addr = second->GetCacheAddr(); |
||||
|
const CacheAddr new_addr = std::min(first_addr, second_addr); |
||||
|
const std::size_t new_size = size_1 + size_2; |
||||
|
TBuffer new_buffer = CreateBlock(new_addr, new_size); |
||||
|
CopyBlock(first, new_buffer, 0, new_buffer->GetOffset(first_addr), size_1); |
||||
|
CopyBlock(second, new_buffer, 0, new_buffer->GetOffset(second_addr), size_2); |
||||
|
first->SetEpoch(epoch); |
||||
|
second->SetEpoch(epoch); |
||||
|
pending_destruction.push_back(first); |
||||
|
pending_destruction.push_back(second); |
||||
|
const CacheAddr cache_addr_end = new_addr + new_size - 1; |
||||
|
u64 page_start = new_addr >> block_page_bits; |
||||
|
const u64 page_end = cache_addr_end >> block_page_bits; |
||||
|
while (page_start <= page_end) { |
||||
|
blocks[page_start] = new_buffer; |
||||
|
++page_start; |
||||
|
} |
||||
|
return new_buffer; |
||||
|
} |
||||
|
|
||||
|
TBuffer GetBlock(const CacheAddr cache_addr, const std::size_t size) { |
||||
|
TBuffer found{}; |
||||
|
const CacheAddr cache_addr_end = cache_addr + size - 1; |
||||
|
u64 page_start = cache_addr >> block_page_bits; |
||||
|
const u64 page_end = cache_addr_end >> block_page_bits; |
||||
|
const u64 num_pages = page_end - page_start + 1; |
||||
|
while (page_start <= page_end) { |
||||
|
auto it = blocks.find(page_start); |
||||
|
if (it == blocks.end()) { |
||||
|
if (found) { |
||||
|
found = EnlargeBlock(found); |
||||
|
} else { |
||||
|
const CacheAddr start_addr = (page_start << block_page_bits); |
||||
|
found = CreateBlock(start_addr, block_page_size); |
||||
|
blocks[page_start] = found; |
||||
|
} |
||||
|
} else { |
||||
|
if (found) { |
||||
|
if (found == it->second) { |
||||
|
++page_start; |
||||
|
continue; |
||||
|
} |
||||
|
found = MergeBlocks(found, it->second); |
||||
|
} else { |
||||
|
found = it->second; |
||||
|
} |
||||
|
} |
||||
|
++page_start; |
||||
|
} |
||||
|
return found; |
||||
|
} |
||||
|
|
||||
|
void MarkRegionAsWritten(const CacheAddr start, const CacheAddr end) { |
||||
|
u64 page_start = start >> write_page_bit; |
||||
|
const u64 page_end = end >> write_page_bit; |
||||
|
while (page_start <= page_end) { |
||||
|
auto it = written_pages.find(page_start); |
||||
|
if (it != written_pages.end()) { |
||||
|
it->second = it->second + 1; |
||||
|
} else { |
||||
|
written_pages[page_start] = 1; |
||||
|
} |
||||
|
page_start++; |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
void UnmarkRegionAsWritten(const CacheAddr start, const CacheAddr end) { |
||||
|
u64 page_start = start >> write_page_bit; |
||||
|
const u64 page_end = end >> write_page_bit; |
||||
|
while (page_start <= page_end) { |
||||
|
auto it = written_pages.find(page_start); |
||||
|
if (it != written_pages.end()) { |
||||
|
if (it->second > 1) { |
||||
|
it->second = it->second - 1; |
||||
|
} else { |
||||
|
written_pages.erase(it); |
||||
|
} |
||||
|
} |
||||
|
page_start++; |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
bool IsRegionWritten(const CacheAddr start, const CacheAddr end) const { |
||||
|
u64 page_start = start >> write_page_bit; |
||||
|
const u64 page_end = end >> write_page_bit; |
||||
|
while (page_start <= page_end) { |
||||
|
if (written_pages.count(page_start) > 0) { |
||||
|
return true; |
||||
|
} |
||||
|
page_start++; |
||||
|
} |
||||
|
return false; |
||||
|
} |
||||
|
|
||||
|
std::unique_ptr<StreamBuffer> stream_buffer; |
||||
|
TBufferType stream_buffer_handle{}; |
||||
|
|
||||
|
bool invalidated = false; |
||||
|
|
||||
|
u8* buffer_ptr = nullptr; |
||||
|
u64 buffer_offset = 0; |
||||
|
u64 buffer_offset_base = 0; |
||||
|
|
||||
|
using IntervalSet = boost::icl::interval_set<CacheAddr>; |
||||
|
using IntervalCache = boost::icl::interval_map<CacheAddr, MapInterval>; |
||||
|
using IntervalType = typename IntervalCache::interval_type; |
||||
|
IntervalCache mapped_addresses{}; |
||||
|
|
||||
|
static constexpr u64 write_page_bit{11}; |
||||
|
std::unordered_map<u64, u32> written_pages{}; |
||||
|
|
||||
|
static constexpr u64 block_page_bits{21}; |
||||
|
static constexpr u64 block_page_size{1 << block_page_bits}; |
||||
|
std::unordered_map<u64, TBuffer> blocks{}; |
||||
|
|
||||
|
std::list<TBuffer> pending_destruction{}; |
||||
|
u64 epoch{}; |
||||
|
u64 modified_ticks{}; |
||||
|
VideoCore::RasterizerInterface& rasterizer; |
||||
|
Core::System& system; |
||||
|
std::recursive_mutex mutex; |
||||
|
}; |
||||
|
|
||||
|
} // namespace VideoCommon |
||||
@ -0,0 +1,89 @@ |
|||||
|
// Copyright 2019 yuzu Emulator Project |
||||
|
// Licensed under GPLv2 or any later version |
||||
|
// Refer to the license.txt file included. |
||||
|
|
||||
|
#pragma once |
||||
|
|
||||
|
#include "common/common_types.h" |
||||
|
#include "video_core/gpu.h" |
||||
|
|
||||
|
namespace VideoCommon { |
||||
|
|
||||
|
class MapIntervalBase { |
||||
|
public: |
||||
|
MapIntervalBase(const CacheAddr start, const CacheAddr end, const GPUVAddr gpu_addr) |
||||
|
: start{start}, end{end}, gpu_addr{gpu_addr} {} |
||||
|
|
||||
|
void SetCpuAddress(VAddr new_cpu_addr) { |
||||
|
cpu_addr = new_cpu_addr; |
||||
|
} |
||||
|
|
||||
|
VAddr GetCpuAddress() const { |
||||
|
return cpu_addr; |
||||
|
} |
||||
|
|
||||
|
GPUVAddr GetGpuAddress() const { |
||||
|
return gpu_addr; |
||||
|
} |
||||
|
|
||||
|
bool IsInside(const CacheAddr other_start, const CacheAddr other_end) const { |
||||
|
return (start <= other_start && other_end <= end); |
||||
|
} |
||||
|
|
||||
|
bool operator==(const MapIntervalBase& rhs) const { |
||||
|
return std::tie(start, end) == std::tie(rhs.start, rhs.end); |
||||
|
} |
||||
|
|
||||
|
bool operator!=(const MapIntervalBase& rhs) const { |
||||
|
return !operator==(rhs); |
||||
|
} |
||||
|
|
||||
|
void MarkAsRegistered(const bool registered) { |
||||
|
is_registered = registered; |
||||
|
} |
||||
|
|
||||
|
bool IsRegistered() const { |
||||
|
return is_registered; |
||||
|
} |
||||
|
|
||||
|
CacheAddr GetStart() const { |
||||
|
return start; |
||||
|
} |
||||
|
|
||||
|
CacheAddr GetEnd() const { |
||||
|
return end; |
||||
|
} |
||||
|
|
||||
|
void MarkAsModified(const bool is_modified_, const u64 tick) { |
||||
|
is_modified = is_modified_; |
||||
|
ticks = tick; |
||||
|
} |
||||
|
|
||||
|
bool IsModified() const { |
||||
|
return is_modified; |
||||
|
} |
||||
|
|
||||
|
u64 GetModificationTick() const { |
||||
|
return ticks; |
||||
|
} |
||||
|
|
||||
|
void MarkAsWritten(const bool is_written_) { |
||||
|
is_written = is_written_; |
||||
|
} |
||||
|
|
||||
|
bool IsWritten() const { |
||||
|
return is_written; |
||||
|
} |
||||
|
|
||||
|
private: |
||||
|
CacheAddr start; |
||||
|
CacheAddr end; |
||||
|
GPUVAddr gpu_addr; |
||||
|
VAddr cpu_addr{}; |
||||
|
bool is_written{}; |
||||
|
bool is_modified{}; |
||||
|
bool is_registered{}; |
||||
|
u64 ticks{}; |
||||
|
}; |
||||
|
|
||||
|
} // namespace VideoCommon |
||||
Write
Preview
Loading…
Cancel
Save
Reference in new issue