diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp index 08513d1534..e9489b710c 100644 --- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp +++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp @@ -5,6 +5,8 @@ // SPDX-License-Identifier: GPL-3.0-or-later #include +#include +#include #include #include @@ -29,6 +31,8 @@ using namespace Common::Literals; constexpr VkDeviceSize MAX_ALIGNMENT = 256; // Stream buffer size in bytes constexpr VkDeviceSize MAX_STREAM_BUFFER_SIZE = 128_MiB; +// Mobile mega buffer size (per chunk) +constexpr VkDeviceSize MOBILE_MEGABUFFER_SIZE = 32_MiB; size_t GetStreamBufferSize(const Device& device) { VkDeviceSize size{0}; @@ -49,6 +53,118 @@ size_t GetStreamBufferSize(const Device& device) { } } // Anonymous namespace +class MobileMegaBuffer { +public: + MobileMegaBuffer(const Device& device, MemoryAllocator& allocator, Scheduler& scheduler_) + : scheduler{scheduler_} { + VkBufferCreateInfo buffer_ci = { + .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .size = MOBILE_MEGABUFFER_SIZE, + .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | + VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT | + VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE, + .queueFamilyIndexCount = 0, + .pQueueFamilyIndices = nullptr, + }; + buffer = allocator.CreateBuffer(buffer_ci, MemoryUsage::Upload); + if (device.HasDebuggingToolAttached()) { + buffer.SetObjectNameEXT("Mobile MegaBuffer"); + } + data = buffer.Mapped(); + ASSERT_MSG(!data.empty(), "Mobile MegaBuffer must be host visible"); + buffer_size = static_cast(data.size()); + } + + std::optional Allocate(size_t size) { + if (size == 0) { + return std::nullopt; + } + const VkDeviceSize requested = static_cast(size); + if (requested > buffer_size) { + return std::nullopt; + } + const VkDeviceSize aligned_size = + static_cast(Common::AlignUp(requested, MAX_ALIGNMENT)); + if (aligned_size > buffer_size) { + return std::nullopt; + } + Reclaim(); + const std::optional offset = Reserve(aligned_size); + if (!offset) { + return std::nullopt; + } + regions.push_back(Region{ + .tick = scheduler.CurrentTick(), + .offset = *offset, + .size = aligned_size, + }); + return StagingBufferRef{ + .buffer = *buffer, + .offset = *offset, + .mapped_span = data.subspan(static_cast(*offset), + static_cast(aligned_size)), + .usage = MemoryUsage::Upload, + .log2_level = 0, + .index = ++unique_id, + }; + } + + void Tick() { + Reclaim(); + } + +private: + struct Region { + u64 tick; + VkDeviceSize offset; + VkDeviceSize size; + }; + + void Reclaim() { + while (!regions.empty() && scheduler.IsFree(regions.front().tick)) { + regions.pop_front(); + if (regions.empty()) { + write_offset = 0; + } + } + } + + std::optional Reserve(VkDeviceSize size) { + const VkDeviceSize head = regions.empty() ? write_offset : regions.front().offset; + if (write_offset >= head) { + const VkDeviceSize space_at_end = buffer_size - write_offset; + if (space_at_end >= size) { + const VkDeviceSize offset = write_offset; + write_offset += size; + return offset; + } + if (head > 0 && head >= size) { + write_offset = size; + return 0; + } + return std::nullopt; + } + const VkDeviceSize available = head - write_offset; + if (available >= size) { + const VkDeviceSize offset = write_offset; + write_offset += size; + return offset; + } + return std::nullopt; + } + + vk::Buffer buffer; + std::span data; + VkDeviceSize buffer_size{}; + VkDeviceSize write_offset{}; + std::deque regions; + Scheduler& scheduler; + u64 unique_id{}; +}; + StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& memory_allocator_, Scheduler& scheduler_) : device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_}, @@ -74,13 +190,24 @@ StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& mem } stream_pointer = stream_buffer.Mapped(); ASSERT_MSG(!stream_pointer.empty(), "Stream buffer must be host visible!"); + + if (device.ShouldUseMobileMegaBuffer()) { + mobile_megabuffer = std::make_unique(device, memory_allocator, scheduler); + } } StagingBufferPool::~StagingBufferPool() = default; StagingBufferRef StagingBufferPool::Request(size_t size, MemoryUsage usage, bool deferred) { - if (!deferred && usage == MemoryUsage::Upload && size <= region_size) { - return GetStreamBuffer(size); + if (!deferred && usage == MemoryUsage::Upload) { + if (mobile_megabuffer) { + if (const std::optional ref = mobile_megabuffer->Allocate(size)) { + return *ref; + } + } + if (size <= region_size) { + return GetStreamBuffer(size); + } } return GetStagingBuffer(size, usage, deferred); } @@ -100,6 +227,10 @@ void StagingBufferPool::FreeDeferred(StagingBufferRef& ref) { void StagingBufferPool::TickFrame() { current_delete_level = (current_delete_level + 1) % NUM_LEVELS; + if (mobile_megabuffer) { + mobile_megabuffer->Tick(); + } + ReleaseCache(MemoryUsage::DeviceLocal); ReleaseCache(MemoryUsage::Upload); ReleaseCache(MemoryUsage::Download); diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h index f63a203272..bed47253c2 100644 --- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h +++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h @@ -1,9 +1,14 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + // SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later #pragma once #include +#include +#include #include #include "common/common_types.h" @@ -15,6 +20,7 @@ namespace Vulkan { class Device; class Scheduler; +class MobileMegaBuffer; struct StagingBufferRef { VkBuffer buffer; @@ -116,6 +122,8 @@ private: StagingBuffersCache upload_cache; StagingBuffersCache download_cache; + std::unique_ptr mobile_megabuffer; + size_t current_delete_level = 0; u64 buffer_index = 0; u64 unique_ids{}; diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index af2eab9700..384183d31f 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -577,11 +577,39 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR } } + const bool needs_mobile_alignment_clamp = is_qualcomm || is_arm; + use_mobile_megabuffer = needs_mobile_alignment_clamp; + if (is_qualcomm) { const u32 version = (properties.properties.driverVersion << 3) >> 3; if (version < VK_MAKE_API_VERSION(0, 255, 615, 512)) { has_broken_parallel_compiling = true; } + } + + if (needs_mobile_alignment_clamp) { + const char* driver_label = is_qualcomm ? "Qualcomm" : "ARM"; + + constexpr VkDeviceSize MIN_UNIFORM_ALIGNMENT = 256; + const VkDeviceSize reported_uniform_alignment = + properties.properties.limits.minUniformBufferOffsetAlignment; + if (reported_uniform_alignment < MIN_UNIFORM_ALIGNMENT) { + uniform_buffer_alignment_minimum = MIN_UNIFORM_ALIGNMENT; + LOG_WARNING(Render_Vulkan, + "{} driver reports {}-byte minUniformBufferOffsetAlignment; clamping to {}", + driver_label, reported_uniform_alignment, uniform_buffer_alignment_minimum); + } + + constexpr VkDeviceSize MIN_STORAGE_ALIGNMENT = 64; + const VkDeviceSize reported_storage_alignment = + properties.properties.limits.minStorageBufferOffsetAlignment; + if (reported_storage_alignment < MIN_STORAGE_ALIGNMENT) { + storage_buffer_alignment_minimum = MIN_STORAGE_ALIGNMENT; + LOG_WARNING(Render_Vulkan, + "{} driver reports {}-byte minStorageBufferOffsetAlignment; clamping to {}", + driver_label, reported_storage_alignment, storage_buffer_alignment_minimum); + } + const size_t sampler_limit = properties.properties.limits.maxSamplerAllocationCount; if (sampler_limit > 0) { constexpr size_t MIN_SAMPLER_BUDGET = 1024U; @@ -590,9 +618,9 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR (std::max)(MIN_SAMPLER_BUDGET, sampler_limit - reserved); sampler_heap_budget = derived_budget; LOG_WARNING(Render_Vulkan, - "Qualcomm driver reports max {} samplers; reserving {} (25%) and " + "{} driver reports max {} samplers; reserving {} (25%) and " "allowing Eden to use {} (75%) to avoid heap exhaustion", - sampler_limit, reserved, sampler_heap_budget); + driver_label, sampler_limit, reserved, sampler_heap_budget); } } diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index 2926f78aac..4f0880904f 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -6,6 +6,7 @@ #pragma once +#include #include #include #include @@ -315,12 +316,14 @@ public: /// Returns uniform buffer alignment requirement. VkDeviceSize GetUniformBufferAlignment() const { - return properties.properties.limits.minUniformBufferOffsetAlignment; + return (std::max)(properties.properties.limits.minUniformBufferOffsetAlignment, + uniform_buffer_alignment_minimum); } /// Returns storage alignment requirement. VkDeviceSize GetStorageBufferAlignment() const { - return properties.properties.limits.minStorageBufferOffsetAlignment; + return (std::max)(properties.properties.limits.minStorageBufferOffsetAlignment, + storage_buffer_alignment_minimum); } /// Returns the maximum range for storage buffers. @@ -373,6 +376,11 @@ public: return GetDriverID() != VK_DRIVER_ID_QUALCOMM_PROPRIETARY; } + /// Returns true when the driver should use the mobile mega buffer allocator. + bool ShouldUseMobileMegaBuffer() const { + return use_mobile_megabuffer; + } + /// Returns true if the device supports float64 natively. bool IsFloat64Supported() const { return features.features.shaderFloat64; @@ -1076,6 +1084,7 @@ private: bool cant_blit_msaa{}; ///< Does not support MSAA<->MSAA blitting. bool must_emulate_scaled_formats{}; ///< Requires scaled vertex format emulation bool must_emulate_bgr565{}; ///< Emulates BGR565 by swizzling RGB565 format. + bool use_mobile_megabuffer{}; ///< Use the Android mega buffer path. bool dynamic_state3_blending{}; ///< Has blending features of dynamic_state3. bool dynamic_state3_enables{}; ///< Has at least one enable feature of dynamic_state3. bool dynamic_state3_depth_clamp_enable{}; @@ -1087,6 +1096,8 @@ private: bool dynamic_state3_alpha_to_one{}; bool supports_conditional_barriers{}; ///< Allows barriers in conditional control flow. size_t sampler_heap_budget{}; ///< Sampler budget for buggy drivers (0 = unlimited). + VkDeviceSize uniform_buffer_alignment_minimum{}; ///< Minimum enforced UBO alignment. + VkDeviceSize storage_buffer_alignment_minimum{}; ///< Minimum enforced SSBO alignment. u64 device_access_memory{}; ///< Total size of device local memory in bytes. u32 sets_per_pool{}; ///< Sets per Description Pool NvidiaArchitecture nvidia_arch{NvidiaArchitecture::Arch_AmpereOrNewer};