Browse Source
Merge pull request #2675 from ReinUsesLisp/opengl-buffer-cache
Merge pull request #2675 from ReinUsesLisp/opengl-buffer-cache
buffer_cache: Implement a generic buffer cache and its OpenGL backendnce_cpp
committed by
GitHub
16 changed files with 537 additions and 407 deletions
-
3src/video_core/CMakeLists.txt
-
299src/video_core/buffer_cache.h
-
1src/video_core/engines/maxwell_3d.h
-
3src/video_core/rasterizer_interface.h
-
110src/video_core/renderer_opengl/gl_buffer_cache.cpp
-
76src/video_core/renderer_opengl/gl_buffer_cache.h
-
1src/video_core/renderer_opengl/gl_device.cpp
-
5src/video_core/renderer_opengl/gl_device.h
-
102src/video_core/renderer_opengl/gl_global_cache.cpp
-
82src/video_core/renderer_opengl/gl_global_cache.h
-
152src/video_core/renderer_opengl/gl_rasterizer.cpp
-
13src/video_core/renderer_opengl/gl_rasterizer.h
-
2src/video_core/renderer_opengl/gl_shader_decompiler.cpp
-
4src/video_core/renderer_opengl/renderer_opengl.cpp
-
48src/video_core/renderer_opengl/utils.cpp
-
41src/video_core/renderer_opengl/utils.h
@ -0,0 +1,299 @@ |
|||
// Copyright 2019 yuzu Emulator Project |
|||
// Licensed under GPLv2 or any later version |
|||
// Refer to the license.txt file included. |
|||
|
|||
#pragma once |
|||
|
|||
#include <array> |
|||
#include <memory> |
|||
#include <mutex> |
|||
#include <unordered_map> |
|||
#include <unordered_set> |
|||
#include <utility> |
|||
#include <vector> |
|||
|
|||
#include "common/alignment.h" |
|||
#include "common/common_types.h" |
|||
#include "core/core.h" |
|||
#include "video_core/memory_manager.h" |
|||
#include "video_core/rasterizer_cache.h" |
|||
|
|||
namespace VideoCore { |
|||
class RasterizerInterface; |
|||
} |
|||
|
|||
namespace VideoCommon { |
|||
|
|||
template <typename BufferStorageType> |
|||
class CachedBuffer final : public RasterizerCacheObject { |
|||
public: |
|||
explicit CachedBuffer(VAddr cpu_addr, u8* host_ptr) |
|||
: RasterizerCacheObject{host_ptr}, host_ptr{host_ptr}, cpu_addr{cpu_addr} {} |
|||
~CachedBuffer() override = default; |
|||
|
|||
VAddr GetCpuAddr() const override { |
|||
return cpu_addr; |
|||
} |
|||
|
|||
std::size_t GetSizeInBytes() const override { |
|||
return size; |
|||
} |
|||
|
|||
u8* GetWritableHostPtr() const { |
|||
return host_ptr; |
|||
} |
|||
|
|||
std::size_t GetSize() const { |
|||
return size; |
|||
} |
|||
|
|||
std::size_t GetCapacity() const { |
|||
return capacity; |
|||
} |
|||
|
|||
bool IsInternalized() const { |
|||
return is_internal; |
|||
} |
|||
|
|||
const BufferStorageType& GetBuffer() const { |
|||
return buffer; |
|||
} |
|||
|
|||
void SetSize(std::size_t new_size) { |
|||
size = new_size; |
|||
} |
|||
|
|||
void SetInternalState(bool is_internal_) { |
|||
is_internal = is_internal_; |
|||
} |
|||
|
|||
BufferStorageType ExchangeBuffer(BufferStorageType buffer_, std::size_t new_capacity) { |
|||
capacity = new_capacity; |
|||
std::swap(buffer, buffer_); |
|||
return buffer_; |
|||
} |
|||
|
|||
private: |
|||
u8* host_ptr{}; |
|||
VAddr cpu_addr{}; |
|||
std::size_t size{}; |
|||
std::size_t capacity{}; |
|||
bool is_internal{}; |
|||
BufferStorageType buffer; |
|||
}; |
|||
|
|||
template <typename BufferStorageType, typename BufferType, typename StreamBuffer> |
|||
class BufferCache : public RasterizerCache<std::shared_ptr<CachedBuffer<BufferStorageType>>> { |
|||
public: |
|||
using Buffer = std::shared_ptr<CachedBuffer<BufferStorageType>>; |
|||
using BufferInfo = std::pair<const BufferType*, u64>; |
|||
|
|||
explicit BufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system, |
|||
std::unique_ptr<StreamBuffer> stream_buffer) |
|||
: RasterizerCache<Buffer>{rasterizer}, system{system}, |
|||
stream_buffer{std::move(stream_buffer)}, stream_buffer_handle{ |
|||
this->stream_buffer->GetHandle()} {} |
|||
~BufferCache() = default; |
|||
|
|||
void Unregister(const Buffer& entry) override { |
|||
std::lock_guard lock{RasterizerCache<Buffer>::mutex}; |
|||
if (entry->IsInternalized()) { |
|||
internalized_entries.erase(entry->GetCacheAddr()); |
|||
} |
|||
ReserveBuffer(entry); |
|||
RasterizerCache<Buffer>::Unregister(entry); |
|||
} |
|||
|
|||
void TickFrame() { |
|||
marked_for_destruction_index = |
|||
(marked_for_destruction_index + 1) % marked_for_destruction_ring_buffer.size(); |
|||
MarkedForDestruction().clear(); |
|||
} |
|||
|
|||
BufferInfo UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4, |
|||
bool internalize = false, bool is_written = false) { |
|||
std::lock_guard lock{RasterizerCache<Buffer>::mutex}; |
|||
|
|||
auto& memory_manager = system.GPU().MemoryManager(); |
|||
const auto host_ptr = memory_manager.GetPointer(gpu_addr); |
|||
if (!host_ptr) { |
|||
return {GetEmptyBuffer(size), 0}; |
|||
} |
|||
const auto cache_addr = ToCacheAddr(host_ptr); |
|||
|
|||
// Cache management is a big overhead, so only cache entries with a given size. |
|||
// TODO: Figure out which size is the best for given games. |
|||
constexpr std::size_t max_stream_size = 0x800; |
|||
if (!internalize && size < max_stream_size && |
|||
internalized_entries.find(cache_addr) == internalized_entries.end()) { |
|||
return StreamBufferUpload(host_ptr, size, alignment); |
|||
} |
|||
|
|||
auto entry = RasterizerCache<Buffer>::TryGet(cache_addr); |
|||
if (!entry) { |
|||
return FixedBufferUpload(gpu_addr, host_ptr, size, internalize, is_written); |
|||
} |
|||
|
|||
if (entry->GetSize() < size) { |
|||
IncreaseBufferSize(entry, size); |
|||
} |
|||
if (is_written) { |
|||
entry->MarkAsModified(true, *this); |
|||
} |
|||
return {ToHandle(entry->GetBuffer()), 0}; |
|||
} |
|||
|
|||
/// Uploads from a host memory. Returns the OpenGL buffer where it's located and its offset. |
|||
BufferInfo UploadHostMemory(const void* raw_pointer, std::size_t size, |
|||
std::size_t alignment = 4) { |
|||
std::lock_guard lock{RasterizerCache<Buffer>::mutex}; |
|||
return StreamBufferUpload(raw_pointer, size, alignment); |
|||
} |
|||
|
|||
void Map(std::size_t max_size) { |
|||
std::tie(buffer_ptr, buffer_offset_base, invalidated) = stream_buffer->Map(max_size, 4); |
|||
buffer_offset = buffer_offset_base; |
|||
} |
|||
|
|||
/// Finishes the upload stream, returns true on bindings invalidation. |
|||
bool Unmap() { |
|||
stream_buffer->Unmap(buffer_offset - buffer_offset_base); |
|||
return std::exchange(invalidated, false); |
|||
} |
|||
|
|||
virtual const BufferType* GetEmptyBuffer(std::size_t size) = 0; |
|||
|
|||
protected: |
|||
void FlushObjectInner(const Buffer& entry) override { |
|||
DownloadBufferData(entry->GetBuffer(), 0, entry->GetSize(), entry->GetWritableHostPtr()); |
|||
} |
|||
|
|||
virtual BufferStorageType CreateBuffer(std::size_t size) = 0; |
|||
|
|||
virtual const BufferType* ToHandle(const BufferStorageType& storage) = 0; |
|||
|
|||
virtual void UploadBufferData(const BufferStorageType& buffer, std::size_t offset, |
|||
std::size_t size, const u8* data) = 0; |
|||
|
|||
virtual void DownloadBufferData(const BufferStorageType& buffer, std::size_t offset, |
|||
std::size_t size, u8* data) = 0; |
|||
|
|||
virtual void CopyBufferData(const BufferStorageType& src, const BufferStorageType& dst, |
|||
std::size_t src_offset, std::size_t dst_offset, |
|||
std::size_t size) = 0; |
|||
|
|||
private: |
|||
BufferInfo StreamBufferUpload(const void* raw_pointer, std::size_t size, |
|||
std::size_t alignment) { |
|||
AlignBuffer(alignment); |
|||
const std::size_t uploaded_offset = buffer_offset; |
|||
std::memcpy(buffer_ptr, raw_pointer, size); |
|||
|
|||
buffer_ptr += size; |
|||
buffer_offset += size; |
|||
return {&stream_buffer_handle, uploaded_offset}; |
|||
} |
|||
|
|||
BufferInfo FixedBufferUpload(GPUVAddr gpu_addr, u8* host_ptr, std::size_t size, |
|||
bool internalize, bool is_written) { |
|||
auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager(); |
|||
const auto cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr); |
|||
ASSERT(cpu_addr); |
|||
|
|||
auto entry = GetUncachedBuffer(*cpu_addr, host_ptr); |
|||
entry->SetSize(size); |
|||
entry->SetInternalState(internalize); |
|||
RasterizerCache<Buffer>::Register(entry); |
|||
|
|||
if (internalize) { |
|||
internalized_entries.emplace(ToCacheAddr(host_ptr)); |
|||
} |
|||
if (is_written) { |
|||
entry->MarkAsModified(true, *this); |
|||
} |
|||
|
|||
if (entry->GetCapacity() < size) { |
|||
MarkedForDestruction().push_back(entry->ExchangeBuffer(CreateBuffer(size), size)); |
|||
} |
|||
|
|||
UploadBufferData(entry->GetBuffer(), 0, size, host_ptr); |
|||
return {ToHandle(entry->GetBuffer()), 0}; |
|||
} |
|||
|
|||
void IncreaseBufferSize(Buffer& entry, std::size_t new_size) { |
|||
const std::size_t old_size = entry->GetSize(); |
|||
if (entry->GetCapacity() < new_size) { |
|||
const auto& old_buffer = entry->GetBuffer(); |
|||
auto new_buffer = CreateBuffer(new_size); |
|||
|
|||
// Copy bits from the old buffer to the new buffer. |
|||
CopyBufferData(old_buffer, new_buffer, 0, 0, old_size); |
|||
MarkedForDestruction().push_back( |
|||
entry->ExchangeBuffer(std::move(new_buffer), new_size)); |
|||
|
|||
// This buffer could have been used |
|||
invalidated = true; |
|||
} |
|||
// Upload the new bits. |
|||
const std::size_t size_diff = new_size - old_size; |
|||
UploadBufferData(entry->GetBuffer(), old_size, size_diff, entry->GetHostPtr() + old_size); |
|||
|
|||
// Update entry's size in the object and in the cache. |
|||
Unregister(entry); |
|||
|
|||
entry->SetSize(new_size); |
|||
RasterizerCache<Buffer>::Register(entry); |
|||
} |
|||
|
|||
Buffer GetUncachedBuffer(VAddr cpu_addr, u8* host_ptr) { |
|||
if (auto entry = TryGetReservedBuffer(host_ptr)) { |
|||
return entry; |
|||
} |
|||
return std::make_shared<CachedBuffer<BufferStorageType>>(cpu_addr, host_ptr); |
|||
} |
|||
|
|||
Buffer TryGetReservedBuffer(u8* host_ptr) { |
|||
const auto it = buffer_reserve.find(ToCacheAddr(host_ptr)); |
|||
if (it == buffer_reserve.end()) { |
|||
return {}; |
|||
} |
|||
auto& reserve = it->second; |
|||
auto entry = reserve.back(); |
|||
reserve.pop_back(); |
|||
return entry; |
|||
} |
|||
|
|||
void ReserveBuffer(Buffer entry) { |
|||
buffer_reserve[entry->GetCacheAddr()].push_back(std::move(entry)); |
|||
} |
|||
|
|||
void AlignBuffer(std::size_t alignment) { |
|||
// Align the offset, not the mapped pointer |
|||
const std::size_t offset_aligned = Common::AlignUp(buffer_offset, alignment); |
|||
buffer_ptr += offset_aligned - buffer_offset; |
|||
buffer_offset = offset_aligned; |
|||
} |
|||
|
|||
std::vector<BufferStorageType>& MarkedForDestruction() { |
|||
return marked_for_destruction_ring_buffer[marked_for_destruction_index]; |
|||
} |
|||
|
|||
Core::System& system; |
|||
|
|||
std::unique_ptr<StreamBuffer> stream_buffer; |
|||
BufferType stream_buffer_handle{}; |
|||
|
|||
bool invalidated = false; |
|||
|
|||
u8* buffer_ptr = nullptr; |
|||
u64 buffer_offset = 0; |
|||
u64 buffer_offset_base = 0; |
|||
|
|||
std::size_t marked_for_destruction_index = 0; |
|||
std::array<std::vector<BufferStorageType>, 4> marked_for_destruction_ring_buffer; |
|||
|
|||
std::unordered_set<CacheAddr> internalized_entries; |
|||
std::unordered_map<CacheAddr, std::vector<Buffer>> buffer_reserve; |
|||
}; |
|||
|
|||
} // namespace VideoCommon |
|||
@ -1,102 +0,0 @@ |
|||
// Copyright 2018 yuzu Emulator Project
|
|||
// Licensed under GPLv2 or any later version
|
|||
// Refer to the license.txt file included.
|
|||
|
|||
#include <glad/glad.h>
|
|||
|
|||
#include "common/logging/log.h"
|
|||
#include "core/core.h"
|
|||
#include "video_core/memory_manager.h"
|
|||
#include "video_core/renderer_opengl/gl_global_cache.h"
|
|||
#include "video_core/renderer_opengl/gl_rasterizer.h"
|
|||
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
|
|||
#include "video_core/renderer_opengl/utils.h"
|
|||
|
|||
namespace OpenGL { |
|||
|
|||
CachedGlobalRegion::CachedGlobalRegion(VAddr cpu_addr, u8* host_ptr, u32 size, u32 max_size) |
|||
: RasterizerCacheObject{host_ptr}, cpu_addr{cpu_addr}, host_ptr{host_ptr}, size{size}, |
|||
max_size{max_size} { |
|||
buffer.Create(); |
|||
LabelGLObject(GL_BUFFER, buffer.handle, cpu_addr, "GlobalMemory"); |
|||
} |
|||
|
|||
CachedGlobalRegion::~CachedGlobalRegion() = default; |
|||
|
|||
void CachedGlobalRegion::Reload(u32 size_) { |
|||
size = size_; |
|||
if (size > max_size) { |
|||
size = max_size; |
|||
LOG_CRITICAL(HW_GPU, "Global region size {} exceeded the supported size {}!", size_, |
|||
max_size); |
|||
} |
|||
glNamedBufferData(buffer.handle, size, host_ptr, GL_STREAM_DRAW); |
|||
} |
|||
|
|||
void CachedGlobalRegion::Flush() { |
|||
LOG_DEBUG(Render_OpenGL, "Flushing {} bytes to CPU memory address 0x{:16}", size, cpu_addr); |
|||
glGetNamedBufferSubData(buffer.handle, 0, static_cast<GLsizeiptr>(size), host_ptr); |
|||
} |
|||
|
|||
GlobalRegion GlobalRegionCacheOpenGL::TryGetReservedGlobalRegion(CacheAddr addr, u32 size) const { |
|||
const auto search{reserve.find(addr)}; |
|||
if (search == reserve.end()) { |
|||
return {}; |
|||
} |
|||
return search->second; |
|||
} |
|||
|
|||
GlobalRegion GlobalRegionCacheOpenGL::GetUncachedGlobalRegion(GPUVAddr addr, u8* host_ptr, |
|||
u32 size) { |
|||
GlobalRegion region{TryGetReservedGlobalRegion(ToCacheAddr(host_ptr), size)}; |
|||
if (!region) { |
|||
// No reserved surface available, create a new one and reserve it
|
|||
auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()}; |
|||
const auto cpu_addr{memory_manager.GpuToCpuAddress(addr)}; |
|||
ASSERT(cpu_addr); |
|||
|
|||
region = std::make_shared<CachedGlobalRegion>(*cpu_addr, host_ptr, size, max_ssbo_size); |
|||
ReserveGlobalRegion(region); |
|||
} |
|||
region->Reload(size); |
|||
return region; |
|||
} |
|||
|
|||
void GlobalRegionCacheOpenGL::ReserveGlobalRegion(GlobalRegion region) { |
|||
reserve.insert_or_assign(region->GetCacheAddr(), std::move(region)); |
|||
} |
|||
|
|||
GlobalRegionCacheOpenGL::GlobalRegionCacheOpenGL(RasterizerOpenGL& rasterizer) |
|||
: RasterizerCache{rasterizer} { |
|||
GLint max_ssbo_size_; |
|||
glGetIntegerv(GL_MAX_SHADER_STORAGE_BLOCK_SIZE, &max_ssbo_size_); |
|||
max_ssbo_size = static_cast<u32>(max_ssbo_size_); |
|||
} |
|||
|
|||
GlobalRegion GlobalRegionCacheOpenGL::GetGlobalRegion( |
|||
const GLShader::GlobalMemoryEntry& global_region, |
|||
Tegra::Engines::Maxwell3D::Regs::ShaderStage stage) { |
|||
std::lock_guard lock{mutex}; |
|||
|
|||
auto& gpu{Core::System::GetInstance().GPU()}; |
|||
auto& memory_manager{gpu.MemoryManager()}; |
|||
const auto cbufs{gpu.Maxwell3D().state.shader_stages[static_cast<std::size_t>(stage)]}; |
|||
const auto addr{cbufs.const_buffers[global_region.GetCbufIndex()].address + |
|||
global_region.GetCbufOffset()}; |
|||
const auto actual_addr{memory_manager.Read<u64>(addr)}; |
|||
const auto size{memory_manager.Read<u32>(addr + 8)}; |
|||
|
|||
// Look up global region in the cache based on address
|
|||
const auto& host_ptr{memory_manager.GetPointer(actual_addr)}; |
|||
GlobalRegion region{TryGet(host_ptr)}; |
|||
|
|||
if (!region) { |
|||
// No global region found - create a new one
|
|||
region = GetUncachedGlobalRegion(actual_addr, host_ptr, size); |
|||
Register(region); |
|||
} |
|||
|
|||
return region; |
|||
} |
|||
|
|||
} // namespace OpenGL
|
|||
@ -1,82 +0,0 @@ |
|||
// Copyright 2018 yuzu Emulator Project |
|||
// Licensed under GPLv2 or any later version |
|||
// Refer to the license.txt file included. |
|||
|
|||
#pragma once |
|||
|
|||
#include <memory> |
|||
#include <unordered_map> |
|||
|
|||
#include <glad/glad.h> |
|||
|
|||
#include "common/assert.h" |
|||
#include "common/common_types.h" |
|||
#include "video_core/engines/maxwell_3d.h" |
|||
#include "video_core/rasterizer_cache.h" |
|||
#include "video_core/renderer_opengl/gl_resource_manager.h" |
|||
|
|||
namespace OpenGL { |
|||
|
|||
namespace GLShader { |
|||
class GlobalMemoryEntry; |
|||
} |
|||
|
|||
class RasterizerOpenGL; |
|||
class CachedGlobalRegion; |
|||
using GlobalRegion = std::shared_ptr<CachedGlobalRegion>; |
|||
|
|||
class CachedGlobalRegion final : public RasterizerCacheObject { |
|||
public: |
|||
explicit CachedGlobalRegion(VAddr cpu_addr, u8* host_ptr, u32 size, u32 max_size); |
|||
~CachedGlobalRegion(); |
|||
|
|||
VAddr GetCpuAddr() const override { |
|||
return cpu_addr; |
|||
} |
|||
|
|||
std::size_t GetSizeInBytes() const override { |
|||
return size; |
|||
} |
|||
|
|||
/// Gets the GL program handle for the buffer |
|||
GLuint GetBufferHandle() const { |
|||
return buffer.handle; |
|||
} |
|||
|
|||
/// Reloads the global region from guest memory |
|||
void Reload(u32 size_); |
|||
|
|||
void Flush(); |
|||
|
|||
private: |
|||
VAddr cpu_addr{}; |
|||
u8* host_ptr{}; |
|||
u32 size{}; |
|||
u32 max_size{}; |
|||
|
|||
OGLBuffer buffer; |
|||
}; |
|||
|
|||
class GlobalRegionCacheOpenGL final : public RasterizerCache<GlobalRegion> { |
|||
public: |
|||
explicit GlobalRegionCacheOpenGL(RasterizerOpenGL& rasterizer); |
|||
|
|||
/// Gets the current specified shader stage program |
|||
GlobalRegion GetGlobalRegion(const GLShader::GlobalMemoryEntry& descriptor, |
|||
Tegra::Engines::Maxwell3D::Regs::ShaderStage stage); |
|||
|
|||
protected: |
|||
void FlushObjectInner(const GlobalRegion& object) override { |
|||
object->Flush(); |
|||
} |
|||
|
|||
private: |
|||
GlobalRegion TryGetReservedGlobalRegion(CacheAddr addr, u32 size) const; |
|||
GlobalRegion GetUncachedGlobalRegion(GPUVAddr addr, u8* host_ptr, u32 size); |
|||
void ReserveGlobalRegion(GlobalRegion region); |
|||
|
|||
std::unordered_map<CacheAddr, GlobalRegion> reserve; |
|||
u32 max_ssbo_size{}; |
|||
}; |
|||
|
|||
} // namespace OpenGL |
|||
Write
Preview
Loading…
Cancel
Save
Reference in new issue