Browse Source
Merge pull request #2783 from FernandoS27/new-buffer-cache
Merge pull request #2783 from FernandoS27/new-buffer-cache
Implement a New LLE Buffer Cachence_cpp
committed by
GitHub
9 changed files with 684 additions and 330 deletions
-
4src/video_core/CMakeLists.txt
-
299src/video_core/buffer_cache.h
-
77src/video_core/buffer_cache/buffer_block.h
-
449src/video_core/buffer_cache/buffer_cache.h
-
89src/video_core/buffer_cache/map_interval.h
-
4src/video_core/gpu.h
-
51src/video_core/renderer_opengl/gl_buffer_cache.cpp
-
39src/video_core/renderer_opengl/gl_buffer_cache.h
-
2src/video_core/renderer_opengl/gl_rasterizer.cpp
@ -1,299 +0,0 @@ |
|||
// Copyright 2019 yuzu Emulator Project |
|||
// Licensed under GPLv2 or any later version |
|||
// Refer to the license.txt file included. |
|||
|
|||
#pragma once |
|||
|
|||
#include <array> |
|||
#include <memory> |
|||
#include <mutex> |
|||
#include <unordered_map> |
|||
#include <unordered_set> |
|||
#include <utility> |
|||
#include <vector> |
|||
|
|||
#include "common/alignment.h" |
|||
#include "common/common_types.h" |
|||
#include "core/core.h" |
|||
#include "video_core/memory_manager.h" |
|||
#include "video_core/rasterizer_cache.h" |
|||
|
|||
namespace VideoCore { |
|||
class RasterizerInterface; |
|||
} |
|||
|
|||
namespace VideoCommon { |
|||
|
|||
template <typename BufferStorageType> |
|||
class CachedBuffer final : public RasterizerCacheObject { |
|||
public: |
|||
explicit CachedBuffer(VAddr cpu_addr, u8* host_ptr) |
|||
: RasterizerCacheObject{host_ptr}, host_ptr{host_ptr}, cpu_addr{cpu_addr} {} |
|||
~CachedBuffer() override = default; |
|||
|
|||
VAddr GetCpuAddr() const override { |
|||
return cpu_addr; |
|||
} |
|||
|
|||
std::size_t GetSizeInBytes() const override { |
|||
return size; |
|||
} |
|||
|
|||
u8* GetWritableHostPtr() const { |
|||
return host_ptr; |
|||
} |
|||
|
|||
std::size_t GetSize() const { |
|||
return size; |
|||
} |
|||
|
|||
std::size_t GetCapacity() const { |
|||
return capacity; |
|||
} |
|||
|
|||
bool IsInternalized() const { |
|||
return is_internal; |
|||
} |
|||
|
|||
const BufferStorageType& GetBuffer() const { |
|||
return buffer; |
|||
} |
|||
|
|||
void SetSize(std::size_t new_size) { |
|||
size = new_size; |
|||
} |
|||
|
|||
void SetInternalState(bool is_internal_) { |
|||
is_internal = is_internal_; |
|||
} |
|||
|
|||
BufferStorageType ExchangeBuffer(BufferStorageType buffer_, std::size_t new_capacity) { |
|||
capacity = new_capacity; |
|||
std::swap(buffer, buffer_); |
|||
return buffer_; |
|||
} |
|||
|
|||
private: |
|||
u8* host_ptr{}; |
|||
VAddr cpu_addr{}; |
|||
std::size_t size{}; |
|||
std::size_t capacity{}; |
|||
bool is_internal{}; |
|||
BufferStorageType buffer; |
|||
}; |
|||
|
|||
template <typename BufferStorageType, typename BufferType, typename StreamBuffer> |
|||
class BufferCache : public RasterizerCache<std::shared_ptr<CachedBuffer<BufferStorageType>>> { |
|||
public: |
|||
using Buffer = std::shared_ptr<CachedBuffer<BufferStorageType>>; |
|||
using BufferInfo = std::pair<const BufferType*, u64>; |
|||
|
|||
explicit BufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system, |
|||
std::unique_ptr<StreamBuffer> stream_buffer) |
|||
: RasterizerCache<Buffer>{rasterizer}, system{system}, |
|||
stream_buffer{std::move(stream_buffer)}, stream_buffer_handle{ |
|||
this->stream_buffer->GetHandle()} {} |
|||
~BufferCache() = default; |
|||
|
|||
void Unregister(const Buffer& entry) override { |
|||
std::lock_guard lock{RasterizerCache<Buffer>::mutex}; |
|||
if (entry->IsInternalized()) { |
|||
internalized_entries.erase(entry->GetCacheAddr()); |
|||
} |
|||
ReserveBuffer(entry); |
|||
RasterizerCache<Buffer>::Unregister(entry); |
|||
} |
|||
|
|||
void TickFrame() { |
|||
marked_for_destruction_index = |
|||
(marked_for_destruction_index + 1) % marked_for_destruction_ring_buffer.size(); |
|||
MarkedForDestruction().clear(); |
|||
} |
|||
|
|||
BufferInfo UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4, |
|||
bool internalize = false, bool is_written = false) { |
|||
std::lock_guard lock{RasterizerCache<Buffer>::mutex}; |
|||
|
|||
auto& memory_manager = system.GPU().MemoryManager(); |
|||
const auto host_ptr = memory_manager.GetPointer(gpu_addr); |
|||
if (!host_ptr) { |
|||
return {GetEmptyBuffer(size), 0}; |
|||
} |
|||
const auto cache_addr = ToCacheAddr(host_ptr); |
|||
|
|||
// Cache management is a big overhead, so only cache entries with a given size. |
|||
// TODO: Figure out which size is the best for given games. |
|||
constexpr std::size_t max_stream_size = 0x800; |
|||
if (!internalize && size < max_stream_size && |
|||
internalized_entries.find(cache_addr) == internalized_entries.end()) { |
|||
return StreamBufferUpload(host_ptr, size, alignment); |
|||
} |
|||
|
|||
auto entry = RasterizerCache<Buffer>::TryGet(cache_addr); |
|||
if (!entry) { |
|||
return FixedBufferUpload(gpu_addr, host_ptr, size, internalize, is_written); |
|||
} |
|||
|
|||
if (entry->GetSize() < size) { |
|||
IncreaseBufferSize(entry, size); |
|||
} |
|||
if (is_written) { |
|||
entry->MarkAsModified(true, *this); |
|||
} |
|||
return {ToHandle(entry->GetBuffer()), 0}; |
|||
} |
|||
|
|||
/// Uploads from a host memory. Returns the OpenGL buffer where it's located and its offset. |
|||
BufferInfo UploadHostMemory(const void* raw_pointer, std::size_t size, |
|||
std::size_t alignment = 4) { |
|||
std::lock_guard lock{RasterizerCache<Buffer>::mutex}; |
|||
return StreamBufferUpload(raw_pointer, size, alignment); |
|||
} |
|||
|
|||
void Map(std::size_t max_size) { |
|||
std::tie(buffer_ptr, buffer_offset_base, invalidated) = stream_buffer->Map(max_size, 4); |
|||
buffer_offset = buffer_offset_base; |
|||
} |
|||
|
|||
/// Finishes the upload stream, returns true on bindings invalidation. |
|||
bool Unmap() { |
|||
stream_buffer->Unmap(buffer_offset - buffer_offset_base); |
|||
return std::exchange(invalidated, false); |
|||
} |
|||
|
|||
virtual const BufferType* GetEmptyBuffer(std::size_t size) = 0; |
|||
|
|||
protected: |
|||
void FlushObjectInner(const Buffer& entry) override { |
|||
DownloadBufferData(entry->GetBuffer(), 0, entry->GetSize(), entry->GetWritableHostPtr()); |
|||
} |
|||
|
|||
virtual BufferStorageType CreateBuffer(std::size_t size) = 0; |
|||
|
|||
virtual const BufferType* ToHandle(const BufferStorageType& storage) = 0; |
|||
|
|||
virtual void UploadBufferData(const BufferStorageType& buffer, std::size_t offset, |
|||
std::size_t size, const u8* data) = 0; |
|||
|
|||
virtual void DownloadBufferData(const BufferStorageType& buffer, std::size_t offset, |
|||
std::size_t size, u8* data) = 0; |
|||
|
|||
virtual void CopyBufferData(const BufferStorageType& src, const BufferStorageType& dst, |
|||
std::size_t src_offset, std::size_t dst_offset, |
|||
std::size_t size) = 0; |
|||
|
|||
private: |
|||
BufferInfo StreamBufferUpload(const void* raw_pointer, std::size_t size, |
|||
std::size_t alignment) { |
|||
AlignBuffer(alignment); |
|||
const std::size_t uploaded_offset = buffer_offset; |
|||
std::memcpy(buffer_ptr, raw_pointer, size); |
|||
|
|||
buffer_ptr += size; |
|||
buffer_offset += size; |
|||
return {&stream_buffer_handle, uploaded_offset}; |
|||
} |
|||
|
|||
BufferInfo FixedBufferUpload(GPUVAddr gpu_addr, u8* host_ptr, std::size_t size, |
|||
bool internalize, bool is_written) { |
|||
auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager(); |
|||
const auto cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr); |
|||
ASSERT(cpu_addr); |
|||
|
|||
auto entry = GetUncachedBuffer(*cpu_addr, host_ptr); |
|||
entry->SetSize(size); |
|||
entry->SetInternalState(internalize); |
|||
RasterizerCache<Buffer>::Register(entry); |
|||
|
|||
if (internalize) { |
|||
internalized_entries.emplace(ToCacheAddr(host_ptr)); |
|||
} |
|||
if (is_written) { |
|||
entry->MarkAsModified(true, *this); |
|||
} |
|||
|
|||
if (entry->GetCapacity() < size) { |
|||
MarkedForDestruction().push_back(entry->ExchangeBuffer(CreateBuffer(size), size)); |
|||
} |
|||
|
|||
UploadBufferData(entry->GetBuffer(), 0, size, host_ptr); |
|||
return {ToHandle(entry->GetBuffer()), 0}; |
|||
} |
|||
|
|||
void IncreaseBufferSize(Buffer& entry, std::size_t new_size) { |
|||
const std::size_t old_size = entry->GetSize(); |
|||
if (entry->GetCapacity() < new_size) { |
|||
const auto& old_buffer = entry->GetBuffer(); |
|||
auto new_buffer = CreateBuffer(new_size); |
|||
|
|||
// Copy bits from the old buffer to the new buffer. |
|||
CopyBufferData(old_buffer, new_buffer, 0, 0, old_size); |
|||
MarkedForDestruction().push_back( |
|||
entry->ExchangeBuffer(std::move(new_buffer), new_size)); |
|||
|
|||
// This buffer could have been used |
|||
invalidated = true; |
|||
} |
|||
// Upload the new bits. |
|||
const std::size_t size_diff = new_size - old_size; |
|||
UploadBufferData(entry->GetBuffer(), old_size, size_diff, entry->GetHostPtr() + old_size); |
|||
|
|||
// Update entry's size in the object and in the cache. |
|||
Unregister(entry); |
|||
|
|||
entry->SetSize(new_size); |
|||
RasterizerCache<Buffer>::Register(entry); |
|||
} |
|||
|
|||
Buffer GetUncachedBuffer(VAddr cpu_addr, u8* host_ptr) { |
|||
if (auto entry = TryGetReservedBuffer(host_ptr)) { |
|||
return entry; |
|||
} |
|||
return std::make_shared<CachedBuffer<BufferStorageType>>(cpu_addr, host_ptr); |
|||
} |
|||
|
|||
Buffer TryGetReservedBuffer(u8* host_ptr) { |
|||
const auto it = buffer_reserve.find(ToCacheAddr(host_ptr)); |
|||
if (it == buffer_reserve.end()) { |
|||
return {}; |
|||
} |
|||
auto& reserve = it->second; |
|||
auto entry = reserve.back(); |
|||
reserve.pop_back(); |
|||
return entry; |
|||
} |
|||
|
|||
void ReserveBuffer(Buffer entry) { |
|||
buffer_reserve[entry->GetCacheAddr()].push_back(std::move(entry)); |
|||
} |
|||
|
|||
void AlignBuffer(std::size_t alignment) { |
|||
// Align the offset, not the mapped pointer |
|||
const std::size_t offset_aligned = Common::AlignUp(buffer_offset, alignment); |
|||
buffer_ptr += offset_aligned - buffer_offset; |
|||
buffer_offset = offset_aligned; |
|||
} |
|||
|
|||
std::vector<BufferStorageType>& MarkedForDestruction() { |
|||
return marked_for_destruction_ring_buffer[marked_for_destruction_index]; |
|||
} |
|||
|
|||
Core::System& system; |
|||
|
|||
std::unique_ptr<StreamBuffer> stream_buffer; |
|||
BufferType stream_buffer_handle{}; |
|||
|
|||
bool invalidated = false; |
|||
|
|||
u8* buffer_ptr = nullptr; |
|||
u64 buffer_offset = 0; |
|||
u64 buffer_offset_base = 0; |
|||
|
|||
std::size_t marked_for_destruction_index = 0; |
|||
std::array<std::vector<BufferStorageType>, 4> marked_for_destruction_ring_buffer; |
|||
|
|||
std::unordered_set<CacheAddr> internalized_entries; |
|||
std::unordered_map<CacheAddr, std::vector<Buffer>> buffer_reserve; |
|||
}; |
|||
|
|||
} // namespace VideoCommon |
|||
@ -0,0 +1,77 @@ |
|||
// Copyright 2019 yuzu Emulator Project |
|||
// Licensed under GPLv2 or any later version |
|||
// Refer to the license.txt file included. |
|||
|
|||
#pragma once |
|||
|
|||
#include <unordered_set> |
|||
#include <utility> |
|||
|
|||
#include "common/alignment.h" |
|||
#include "common/common_types.h" |
|||
#include "video_core/gpu.h" |
|||
|
|||
namespace VideoCommon { |
|||
|
|||
class BufferBlock { |
|||
public: |
|||
bool Overlaps(const CacheAddr start, const CacheAddr end) const { |
|||
return (cache_addr < end) && (cache_addr_end > start); |
|||
} |
|||
|
|||
bool IsInside(const CacheAddr other_start, const CacheAddr other_end) const { |
|||
return cache_addr <= other_start && other_end <= cache_addr_end; |
|||
} |
|||
|
|||
u8* GetWritableHostPtr() const { |
|||
return FromCacheAddr(cache_addr); |
|||
} |
|||
|
|||
u8* GetWritableHostPtr(std::size_t offset) const { |
|||
return FromCacheAddr(cache_addr + offset); |
|||
} |
|||
|
|||
std::size_t GetOffset(const CacheAddr in_addr) { |
|||
return static_cast<std::size_t>(in_addr - cache_addr); |
|||
} |
|||
|
|||
CacheAddr GetCacheAddr() const { |
|||
return cache_addr; |
|||
} |
|||
|
|||
CacheAddr GetCacheAddrEnd() const { |
|||
return cache_addr_end; |
|||
} |
|||
|
|||
void SetCacheAddr(const CacheAddr new_addr) { |
|||
cache_addr = new_addr; |
|||
cache_addr_end = new_addr + size; |
|||
} |
|||
|
|||
std::size_t GetSize() const { |
|||
return size; |
|||
} |
|||
|
|||
void SetEpoch(u64 new_epoch) { |
|||
epoch = new_epoch; |
|||
} |
|||
|
|||
u64 GetEpoch() { |
|||
return epoch; |
|||
} |
|||
|
|||
protected: |
|||
explicit BufferBlock(CacheAddr cache_addr, const std::size_t size) : size{size} { |
|||
SetCacheAddr(cache_addr); |
|||
} |
|||
~BufferBlock() = default; |
|||
|
|||
private: |
|||
CacheAddr cache_addr{}; |
|||
CacheAddr cache_addr_end{}; |
|||
u64 pages{}; |
|||
std::size_t size{}; |
|||
u64 epoch{}; |
|||
}; |
|||
|
|||
} // namespace VideoCommon |
|||
@ -0,0 +1,449 @@ |
|||
// Copyright 2019 yuzu Emulator Project |
|||
// Licensed under GPLv2 or any later version |
|||
// Refer to the license.txt file included. |
|||
|
|||
#pragma once |
|||
|
|||
#include <array> |
|||
#include <memory> |
|||
#include <mutex> |
|||
#include <unordered_map> |
|||
#include <unordered_set> |
|||
#include <utility> |
|||
#include <vector> |
|||
|
|||
#include "common/alignment.h" |
|||
#include "common/common_types.h" |
|||
#include "core/core.h" |
|||
#include "video_core/buffer_cache/buffer_block.h" |
|||
#include "video_core/buffer_cache/map_interval.h" |
|||
#include "video_core/memory_manager.h" |
|||
|
|||
namespace VideoCore { |
|||
class RasterizerInterface; |
|||
} |
|||
|
|||
namespace VideoCommon { |
|||
|
|||
using MapInterval = std::shared_ptr<MapIntervalBase>; |
|||
|
|||
template <typename TBuffer, typename TBufferType, typename StreamBuffer> |
|||
class BufferCache { |
|||
public: |
|||
using BufferInfo = std::pair<const TBufferType*, u64>; |
|||
|
|||
BufferInfo UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4, |
|||
bool is_written = false) { |
|||
std::lock_guard lock{mutex}; |
|||
|
|||
auto& memory_manager = system.GPU().MemoryManager(); |
|||
const auto host_ptr = memory_manager.GetPointer(gpu_addr); |
|||
if (!host_ptr) { |
|||
return {GetEmptyBuffer(size), 0}; |
|||
} |
|||
const auto cache_addr = ToCacheAddr(host_ptr); |
|||
|
|||
// Cache management is a big overhead, so only cache entries with a given size. |
|||
// TODO: Figure out which size is the best for given games. |
|||
constexpr std::size_t max_stream_size = 0x800; |
|||
if (size < max_stream_size) { |
|||
if (!is_written && !IsRegionWritten(cache_addr, cache_addr + size - 1)) { |
|||
return StreamBufferUpload(host_ptr, size, alignment); |
|||
} |
|||
} |
|||
|
|||
auto block = GetBlock(cache_addr, size); |
|||
auto map = MapAddress(block, gpu_addr, cache_addr, size); |
|||
if (is_written) { |
|||
map->MarkAsModified(true, GetModifiedTicks()); |
|||
if (!map->IsWritten()) { |
|||
map->MarkAsWritten(true); |
|||
MarkRegionAsWritten(map->GetStart(), map->GetEnd() - 1); |
|||
} |
|||
} else { |
|||
if (map->IsWritten()) { |
|||
WriteBarrier(); |
|||
} |
|||
} |
|||
|
|||
const u64 offset = static_cast<u64>(block->GetOffset(cache_addr)); |
|||
|
|||
return {ToHandle(block), offset}; |
|||
} |
|||
|
|||
/// Uploads from a host memory. Returns the OpenGL buffer where it's located and its offset. |
|||
BufferInfo UploadHostMemory(const void* raw_pointer, std::size_t size, |
|||
std::size_t alignment = 4) { |
|||
std::lock_guard lock{mutex}; |
|||
return StreamBufferUpload(raw_pointer, size, alignment); |
|||
} |
|||
|
|||
void Map(std::size_t max_size) { |
|||
std::lock_guard lock{mutex}; |
|||
|
|||
std::tie(buffer_ptr, buffer_offset_base, invalidated) = stream_buffer->Map(max_size, 4); |
|||
buffer_offset = buffer_offset_base; |
|||
} |
|||
|
|||
/// Finishes the upload stream, returns true on bindings invalidation. |
|||
bool Unmap() { |
|||
std::lock_guard lock{mutex}; |
|||
|
|||
stream_buffer->Unmap(buffer_offset - buffer_offset_base); |
|||
return std::exchange(invalidated, false); |
|||
} |
|||
|
|||
void TickFrame() { |
|||
++epoch; |
|||
while (!pending_destruction.empty()) { |
|||
if (pending_destruction.front()->GetEpoch() + 1 > epoch) { |
|||
break; |
|||
} |
|||
pending_destruction.pop_front(); |
|||
} |
|||
} |
|||
|
|||
/// Write any cached resources overlapping the specified region back to memory |
|||
void FlushRegion(CacheAddr addr, std::size_t size) { |
|||
std::lock_guard lock{mutex}; |
|||
|
|||
std::vector<MapInterval> objects = GetMapsInRange(addr, size); |
|||
std::sort(objects.begin(), objects.end(), [](const MapInterval& a, const MapInterval& b) { |
|||
return a->GetModificationTick() < b->GetModificationTick(); |
|||
}); |
|||
for (auto& object : objects) { |
|||
if (object->IsModified() && object->IsRegistered()) { |
|||
FlushMap(object); |
|||
} |
|||
} |
|||
} |
|||
|
|||
/// Mark the specified region as being invalidated |
|||
void InvalidateRegion(CacheAddr addr, u64 size) { |
|||
std::lock_guard lock{mutex}; |
|||
|
|||
std::vector<MapInterval> objects = GetMapsInRange(addr, size); |
|||
for (auto& object : objects) { |
|||
if (object->IsRegistered()) { |
|||
Unregister(object); |
|||
} |
|||
} |
|||
} |
|||
|
|||
virtual const TBufferType* GetEmptyBuffer(std::size_t size) = 0; |
|||
|
|||
protected: |
|||
explicit BufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system, |
|||
std::unique_ptr<StreamBuffer> stream_buffer) |
|||
: rasterizer{rasterizer}, system{system}, stream_buffer{std::move(stream_buffer)}, |
|||
stream_buffer_handle{this->stream_buffer->GetHandle()} {} |
|||
|
|||
~BufferCache() = default; |
|||
|
|||
virtual const TBufferType* ToHandle(const TBuffer& storage) = 0; |
|||
|
|||
virtual void WriteBarrier() = 0; |
|||
|
|||
virtual TBuffer CreateBlock(CacheAddr cache_addr, std::size_t size) = 0; |
|||
|
|||
virtual void UploadBlockData(const TBuffer& buffer, std::size_t offset, std::size_t size, |
|||
const u8* data) = 0; |
|||
|
|||
virtual void DownloadBlockData(const TBuffer& buffer, std::size_t offset, std::size_t size, |
|||
u8* data) = 0; |
|||
|
|||
virtual void CopyBlock(const TBuffer& src, const TBuffer& dst, std::size_t src_offset, |
|||
std::size_t dst_offset, std::size_t size) = 0; |
|||
|
|||
/// Register an object into the cache |
|||
void Register(const MapInterval& new_map, bool inherit_written = false) { |
|||
const CacheAddr cache_ptr = new_map->GetStart(); |
|||
const std::optional<VAddr> cpu_addr = |
|||
system.GPU().MemoryManager().GpuToCpuAddress(new_map->GetGpuAddress()); |
|||
if (!cache_ptr || !cpu_addr) { |
|||
LOG_CRITICAL(HW_GPU, "Failed to register buffer with unmapped gpu_address 0x{:016x}", |
|||
new_map->GetGpuAddress()); |
|||
return; |
|||
} |
|||
const std::size_t size = new_map->GetEnd() - new_map->GetStart(); |
|||
new_map->SetCpuAddress(*cpu_addr); |
|||
new_map->MarkAsRegistered(true); |
|||
const IntervalType interval{new_map->GetStart(), new_map->GetEnd()}; |
|||
mapped_addresses.insert({interval, new_map}); |
|||
rasterizer.UpdatePagesCachedCount(*cpu_addr, size, 1); |
|||
if (inherit_written) { |
|||
MarkRegionAsWritten(new_map->GetStart(), new_map->GetEnd() - 1); |
|||
new_map->MarkAsWritten(true); |
|||
} |
|||
} |
|||
|
|||
/// Unregisters an object from the cache |
|||
void Unregister(MapInterval& map) { |
|||
const std::size_t size = map->GetEnd() - map->GetStart(); |
|||
rasterizer.UpdatePagesCachedCount(map->GetCpuAddress(), size, -1); |
|||
map->MarkAsRegistered(false); |
|||
if (map->IsWritten()) { |
|||
UnmarkRegionAsWritten(map->GetStart(), map->GetEnd() - 1); |
|||
} |
|||
const IntervalType delete_interval{map->GetStart(), map->GetEnd()}; |
|||
mapped_addresses.erase(delete_interval); |
|||
} |
|||
|
|||
private: |
|||
MapInterval CreateMap(const CacheAddr start, const CacheAddr end, const GPUVAddr gpu_addr) { |
|||
return std::make_shared<MapIntervalBase>(start, end, gpu_addr); |
|||
} |
|||
|
|||
MapInterval MapAddress(const TBuffer& block, const GPUVAddr gpu_addr, |
|||
const CacheAddr cache_addr, const std::size_t size) { |
|||
|
|||
std::vector<MapInterval> overlaps = GetMapsInRange(cache_addr, size); |
|||
if (overlaps.empty()) { |
|||
const CacheAddr cache_addr_end = cache_addr + size; |
|||
MapInterval new_map = CreateMap(cache_addr, cache_addr_end, gpu_addr); |
|||
u8* host_ptr = FromCacheAddr(cache_addr); |
|||
UploadBlockData(block, block->GetOffset(cache_addr), size, host_ptr); |
|||
Register(new_map); |
|||
return new_map; |
|||
} |
|||
|
|||
const CacheAddr cache_addr_end = cache_addr + size; |
|||
if (overlaps.size() == 1) { |
|||
MapInterval& current_map = overlaps[0]; |
|||
if (current_map->IsInside(cache_addr, cache_addr_end)) { |
|||
return current_map; |
|||
} |
|||
} |
|||
CacheAddr new_start = cache_addr; |
|||
CacheAddr new_end = cache_addr_end; |
|||
bool write_inheritance = false; |
|||
bool modified_inheritance = false; |
|||
// Calculate new buffer parameters |
|||
for (auto& overlap : overlaps) { |
|||
new_start = std::min(overlap->GetStart(), new_start); |
|||
new_end = std::max(overlap->GetEnd(), new_end); |
|||
write_inheritance |= overlap->IsWritten(); |
|||
modified_inheritance |= overlap->IsModified(); |
|||
} |
|||
GPUVAddr new_gpu_addr = gpu_addr + new_start - cache_addr; |
|||
for (auto& overlap : overlaps) { |
|||
Unregister(overlap); |
|||
} |
|||
UpdateBlock(block, new_start, new_end, overlaps); |
|||
MapInterval new_map = CreateMap(new_start, new_end, new_gpu_addr); |
|||
if (modified_inheritance) { |
|||
new_map->MarkAsModified(true, GetModifiedTicks()); |
|||
} |
|||
Register(new_map, write_inheritance); |
|||
return new_map; |
|||
} |
|||
|
|||
void UpdateBlock(const TBuffer& block, CacheAddr start, CacheAddr end, |
|||
std::vector<MapInterval>& overlaps) { |
|||
const IntervalType base_interval{start, end}; |
|||
IntervalSet interval_set{}; |
|||
interval_set.add(base_interval); |
|||
for (auto& overlap : overlaps) { |
|||
const IntervalType subtract{overlap->GetStart(), overlap->GetEnd()}; |
|||
interval_set.subtract(subtract); |
|||
} |
|||
for (auto& interval : interval_set) { |
|||
std::size_t size = interval.upper() - interval.lower(); |
|||
if (size > 0) { |
|||
u8* host_ptr = FromCacheAddr(interval.lower()); |
|||
UploadBlockData(block, block->GetOffset(interval.lower()), size, host_ptr); |
|||
} |
|||
} |
|||
} |
|||
|
|||
std::vector<MapInterval> GetMapsInRange(CacheAddr addr, std::size_t size) { |
|||
if (size == 0) { |
|||
return {}; |
|||
} |
|||
|
|||
std::vector<MapInterval> objects{}; |
|||
const IntervalType interval{addr, addr + size}; |
|||
for (auto& pair : boost::make_iterator_range(mapped_addresses.equal_range(interval))) { |
|||
objects.push_back(pair.second); |
|||
} |
|||
|
|||
return objects; |
|||
} |
|||
|
|||
/// Returns a ticks counter used for tracking when cached objects were last modified |
|||
u64 GetModifiedTicks() { |
|||
return ++modified_ticks; |
|||
} |
|||
|
|||
void FlushMap(MapInterval map) { |
|||
std::size_t size = map->GetEnd() - map->GetStart(); |
|||
TBuffer block = blocks[map->GetStart() >> block_page_bits]; |
|||
u8* host_ptr = FromCacheAddr(map->GetStart()); |
|||
DownloadBlockData(block, block->GetOffset(map->GetStart()), size, host_ptr); |
|||
map->MarkAsModified(false, 0); |
|||
} |
|||
|
|||
BufferInfo StreamBufferUpload(const void* raw_pointer, std::size_t size, |
|||
std::size_t alignment) { |
|||
AlignBuffer(alignment); |
|||
const std::size_t uploaded_offset = buffer_offset; |
|||
std::memcpy(buffer_ptr, raw_pointer, size); |
|||
|
|||
buffer_ptr += size; |
|||
buffer_offset += size; |
|||
return {&stream_buffer_handle, uploaded_offset}; |
|||
} |
|||
|
|||
void AlignBuffer(std::size_t alignment) { |
|||
// Align the offset, not the mapped pointer |
|||
const std::size_t offset_aligned = Common::AlignUp(buffer_offset, alignment); |
|||
buffer_ptr += offset_aligned - buffer_offset; |
|||
buffer_offset = offset_aligned; |
|||
} |
|||
|
|||
TBuffer EnlargeBlock(TBuffer buffer) { |
|||
const std::size_t old_size = buffer->GetSize(); |
|||
const std::size_t new_size = old_size + block_page_size; |
|||
const CacheAddr cache_addr = buffer->GetCacheAddr(); |
|||
TBuffer new_buffer = CreateBlock(cache_addr, new_size); |
|||
CopyBlock(buffer, new_buffer, 0, 0, old_size); |
|||
buffer->SetEpoch(epoch); |
|||
pending_destruction.push_back(buffer); |
|||
const CacheAddr cache_addr_end = cache_addr + new_size - 1; |
|||
u64 page_start = cache_addr >> block_page_bits; |
|||
const u64 page_end = cache_addr_end >> block_page_bits; |
|||
while (page_start <= page_end) { |
|||
blocks[page_start] = new_buffer; |
|||
++page_start; |
|||
} |
|||
return new_buffer; |
|||
} |
|||
|
|||
TBuffer MergeBlocks(TBuffer first, TBuffer second) { |
|||
const std::size_t size_1 = first->GetSize(); |
|||
const std::size_t size_2 = second->GetSize(); |
|||
const CacheAddr first_addr = first->GetCacheAddr(); |
|||
const CacheAddr second_addr = second->GetCacheAddr(); |
|||
const CacheAddr new_addr = std::min(first_addr, second_addr); |
|||
const std::size_t new_size = size_1 + size_2; |
|||
TBuffer new_buffer = CreateBlock(new_addr, new_size); |
|||
CopyBlock(first, new_buffer, 0, new_buffer->GetOffset(first_addr), size_1); |
|||
CopyBlock(second, new_buffer, 0, new_buffer->GetOffset(second_addr), size_2); |
|||
first->SetEpoch(epoch); |
|||
second->SetEpoch(epoch); |
|||
pending_destruction.push_back(first); |
|||
pending_destruction.push_back(second); |
|||
const CacheAddr cache_addr_end = new_addr + new_size - 1; |
|||
u64 page_start = new_addr >> block_page_bits; |
|||
const u64 page_end = cache_addr_end >> block_page_bits; |
|||
while (page_start <= page_end) { |
|||
blocks[page_start] = new_buffer; |
|||
++page_start; |
|||
} |
|||
return new_buffer; |
|||
} |
|||
|
|||
TBuffer GetBlock(const CacheAddr cache_addr, const std::size_t size) { |
|||
TBuffer found{}; |
|||
const CacheAddr cache_addr_end = cache_addr + size - 1; |
|||
u64 page_start = cache_addr >> block_page_bits; |
|||
const u64 page_end = cache_addr_end >> block_page_bits; |
|||
const u64 num_pages = page_end - page_start + 1; |
|||
while (page_start <= page_end) { |
|||
auto it = blocks.find(page_start); |
|||
if (it == blocks.end()) { |
|||
if (found) { |
|||
found = EnlargeBlock(found); |
|||
} else { |
|||
const CacheAddr start_addr = (page_start << block_page_bits); |
|||
found = CreateBlock(start_addr, block_page_size); |
|||
blocks[page_start] = found; |
|||
} |
|||
} else { |
|||
if (found) { |
|||
if (found == it->second) { |
|||
++page_start; |
|||
continue; |
|||
} |
|||
found = MergeBlocks(found, it->second); |
|||
} else { |
|||
found = it->second; |
|||
} |
|||
} |
|||
++page_start; |
|||
} |
|||
return found; |
|||
} |
|||
|
|||
void MarkRegionAsWritten(const CacheAddr start, const CacheAddr end) { |
|||
u64 page_start = start >> write_page_bit; |
|||
const u64 page_end = end >> write_page_bit; |
|||
while (page_start <= page_end) { |
|||
auto it = written_pages.find(page_start); |
|||
if (it != written_pages.end()) { |
|||
it->second = it->second + 1; |
|||
} else { |
|||
written_pages[page_start] = 1; |
|||
} |
|||
page_start++; |
|||
} |
|||
} |
|||
|
|||
void UnmarkRegionAsWritten(const CacheAddr start, const CacheAddr end) { |
|||
u64 page_start = start >> write_page_bit; |
|||
const u64 page_end = end >> write_page_bit; |
|||
while (page_start <= page_end) { |
|||
auto it = written_pages.find(page_start); |
|||
if (it != written_pages.end()) { |
|||
if (it->second > 1) { |
|||
it->second = it->second - 1; |
|||
} else { |
|||
written_pages.erase(it); |
|||
} |
|||
} |
|||
page_start++; |
|||
} |
|||
} |
|||
|
|||
bool IsRegionWritten(const CacheAddr start, const CacheAddr end) const { |
|||
u64 page_start = start >> write_page_bit; |
|||
const u64 page_end = end >> write_page_bit; |
|||
while (page_start <= page_end) { |
|||
if (written_pages.count(page_start) > 0) { |
|||
return true; |
|||
} |
|||
page_start++; |
|||
} |
|||
return false; |
|||
} |
|||
|
|||
std::unique_ptr<StreamBuffer> stream_buffer; |
|||
TBufferType stream_buffer_handle{}; |
|||
|
|||
bool invalidated = false; |
|||
|
|||
u8* buffer_ptr = nullptr; |
|||
u64 buffer_offset = 0; |
|||
u64 buffer_offset_base = 0; |
|||
|
|||
using IntervalSet = boost::icl::interval_set<CacheAddr>; |
|||
using IntervalCache = boost::icl::interval_map<CacheAddr, MapInterval>; |
|||
using IntervalType = typename IntervalCache::interval_type; |
|||
IntervalCache mapped_addresses{}; |
|||
|
|||
static constexpr u64 write_page_bit{11}; |
|||
std::unordered_map<u64, u32> written_pages{}; |
|||
|
|||
static constexpr u64 block_page_bits{21}; |
|||
static constexpr u64 block_page_size{1 << block_page_bits}; |
|||
std::unordered_map<u64, TBuffer> blocks{}; |
|||
|
|||
std::list<TBuffer> pending_destruction{}; |
|||
u64 epoch{}; |
|||
u64 modified_ticks{}; |
|||
VideoCore::RasterizerInterface& rasterizer; |
|||
Core::System& system; |
|||
std::recursive_mutex mutex; |
|||
}; |
|||
|
|||
} // namespace VideoCommon |
|||
@ -0,0 +1,89 @@ |
|||
// Copyright 2019 yuzu Emulator Project |
|||
// Licensed under GPLv2 or any later version |
|||
// Refer to the license.txt file included. |
|||
|
|||
#pragma once |
|||
|
|||
#include "common/common_types.h" |
|||
#include "video_core/gpu.h" |
|||
|
|||
namespace VideoCommon { |
|||
|
|||
class MapIntervalBase { |
|||
public: |
|||
MapIntervalBase(const CacheAddr start, const CacheAddr end, const GPUVAddr gpu_addr) |
|||
: start{start}, end{end}, gpu_addr{gpu_addr} {} |
|||
|
|||
void SetCpuAddress(VAddr new_cpu_addr) { |
|||
cpu_addr = new_cpu_addr; |
|||
} |
|||
|
|||
VAddr GetCpuAddress() const { |
|||
return cpu_addr; |
|||
} |
|||
|
|||
GPUVAddr GetGpuAddress() const { |
|||
return gpu_addr; |
|||
} |
|||
|
|||
bool IsInside(const CacheAddr other_start, const CacheAddr other_end) const { |
|||
return (start <= other_start && other_end <= end); |
|||
} |
|||
|
|||
bool operator==(const MapIntervalBase& rhs) const { |
|||
return std::tie(start, end) == std::tie(rhs.start, rhs.end); |
|||
} |
|||
|
|||
bool operator!=(const MapIntervalBase& rhs) const { |
|||
return !operator==(rhs); |
|||
} |
|||
|
|||
void MarkAsRegistered(const bool registered) { |
|||
is_registered = registered; |
|||
} |
|||
|
|||
bool IsRegistered() const { |
|||
return is_registered; |
|||
} |
|||
|
|||
CacheAddr GetStart() const { |
|||
return start; |
|||
} |
|||
|
|||
CacheAddr GetEnd() const { |
|||
return end; |
|||
} |
|||
|
|||
void MarkAsModified(const bool is_modified_, const u64 tick) { |
|||
is_modified = is_modified_; |
|||
ticks = tick; |
|||
} |
|||
|
|||
bool IsModified() const { |
|||
return is_modified; |
|||
} |
|||
|
|||
u64 GetModificationTick() const { |
|||
return ticks; |
|||
} |
|||
|
|||
void MarkAsWritten(const bool is_written_) { |
|||
is_written = is_written_; |
|||
} |
|||
|
|||
bool IsWritten() const { |
|||
return is_written; |
|||
} |
|||
|
|||
private: |
|||
CacheAddr start; |
|||
CacheAddr end; |
|||
GPUVAddr gpu_addr; |
|||
VAddr cpu_addr{}; |
|||
bool is_written{}; |
|||
bool is_modified{}; |
|||
bool is_registered{}; |
|||
u64 ticks{}; |
|||
}; |
|||
|
|||
} // namespace VideoCommon |
|||
Write
Preview
Loading…
Cancel
Save
Reference in new issue