Browse Source

[vk, vendor, mobile] Improved mobile staging buffer data

pull/3115/head
CamilleLaVey 1 month ago
parent
commit
741e59ab87
  1. 135
      src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
  2. 8
      src/video_core/renderer_vulkan/vk_staging_buffer_pool.h
  3. 32
      src/video_core/vulkan_common/vulkan_device.cpp
  4. 15
      src/video_core/vulkan_common/vulkan_device.h

135
src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp

@ -5,6 +5,8 @@
// SPDX-License-Identifier: GPL-3.0-or-later
#include <algorithm>
#include <deque>
#include <optional>
#include <utility>
#include <vector>
@ -29,6 +31,8 @@ using namespace Common::Literals;
constexpr VkDeviceSize MAX_ALIGNMENT = 256;
// Stream buffer size in bytes
constexpr VkDeviceSize MAX_STREAM_BUFFER_SIZE = 128_MiB;
// Mobile mega buffer size (per chunk)
constexpr VkDeviceSize MOBILE_MEGABUFFER_SIZE = 32_MiB;
size_t GetStreamBufferSize(const Device& device) {
VkDeviceSize size{0};
@ -49,6 +53,118 @@ size_t GetStreamBufferSize(const Device& device) {
}
} // Anonymous namespace
class MobileMegaBuffer {
public:
MobileMegaBuffer(const Device& device, MemoryAllocator& allocator, Scheduler& scheduler_)
: scheduler{scheduler_} {
VkBufferCreateInfo buffer_ci = {
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.size = MOBILE_MEGABUFFER_SIZE,
.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT |
VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT |
VK_BUFFER_USAGE_VERTEX_BUFFER_BIT,
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr,
};
buffer = allocator.CreateBuffer(buffer_ci, MemoryUsage::Upload);
if (device.HasDebuggingToolAttached()) {
buffer.SetObjectNameEXT("Mobile MegaBuffer");
}
data = buffer.Mapped();
ASSERT_MSG(!data.empty(), "Mobile MegaBuffer must be host visible");
buffer_size = static_cast<VkDeviceSize>(data.size());
}
std::optional<StagingBufferRef> Allocate(size_t size) {
if (size == 0) {
return std::nullopt;
}
const VkDeviceSize requested = static_cast<VkDeviceSize>(size);
if (requested > buffer_size) {
return std::nullopt;
}
const VkDeviceSize aligned_size =
static_cast<VkDeviceSize>(Common::AlignUp(requested, MAX_ALIGNMENT));
if (aligned_size > buffer_size) {
return std::nullopt;
}
Reclaim();
const std::optional<VkDeviceSize> offset = Reserve(aligned_size);
if (!offset) {
return std::nullopt;
}
regions.push_back(Region{
.tick = scheduler.CurrentTick(),
.offset = *offset,
.size = aligned_size,
});
return StagingBufferRef{
.buffer = *buffer,
.offset = *offset,
.mapped_span = data.subspan(static_cast<size_t>(*offset),
static_cast<size_t>(aligned_size)),
.usage = MemoryUsage::Upload,
.log2_level = 0,
.index = ++unique_id,
};
}
void Tick() {
Reclaim();
}
private:
struct Region {
u64 tick;
VkDeviceSize offset;
VkDeviceSize size;
};
void Reclaim() {
while (!regions.empty() && scheduler.IsFree(regions.front().tick)) {
regions.pop_front();
if (regions.empty()) {
write_offset = 0;
}
}
}
std::optional<VkDeviceSize> Reserve(VkDeviceSize size) {
const VkDeviceSize head = regions.empty() ? write_offset : regions.front().offset;
if (write_offset >= head) {
const VkDeviceSize space_at_end = buffer_size - write_offset;
if (space_at_end >= size) {
const VkDeviceSize offset = write_offset;
write_offset += size;
return offset;
}
if (head > 0 && head >= size) {
write_offset = size;
return 0;
}
return std::nullopt;
}
const VkDeviceSize available = head - write_offset;
if (available >= size) {
const VkDeviceSize offset = write_offset;
write_offset += size;
return offset;
}
return std::nullopt;
}
vk::Buffer buffer;
std::span<u8> data;
VkDeviceSize buffer_size{};
VkDeviceSize write_offset{};
std::deque<Region> regions;
Scheduler& scheduler;
u64 unique_id{};
};
StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& memory_allocator_,
Scheduler& scheduler_)
: device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_},
@ -74,13 +190,24 @@ StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& mem
}
stream_pointer = stream_buffer.Mapped();
ASSERT_MSG(!stream_pointer.empty(), "Stream buffer must be host visible!");
if (device.ShouldUseMobileMegaBuffer()) {
mobile_megabuffer = std::make_unique<MobileMegaBuffer>(device, memory_allocator, scheduler);
}
}
StagingBufferPool::~StagingBufferPool() = default;
StagingBufferRef StagingBufferPool::Request(size_t size, MemoryUsage usage, bool deferred) {
if (!deferred && usage == MemoryUsage::Upload && size <= region_size) {
return GetStreamBuffer(size);
if (!deferred && usage == MemoryUsage::Upload) {
if (mobile_megabuffer) {
if (const std::optional<StagingBufferRef> ref = mobile_megabuffer->Allocate(size)) {
return *ref;
}
}
if (size <= region_size) {
return GetStreamBuffer(size);
}
}
return GetStagingBuffer(size, usage, deferred);
}
@ -100,6 +227,10 @@ void StagingBufferPool::FreeDeferred(StagingBufferRef& ref) {
void StagingBufferPool::TickFrame() {
current_delete_level = (current_delete_level + 1) % NUM_LEVELS;
if (mobile_megabuffer) {
mobile_megabuffer->Tick();
}
ReleaseCache(MemoryUsage::DeviceLocal);
ReleaseCache(MemoryUsage::Upload);
ReleaseCache(MemoryUsage::Download);

8
src/video_core/renderer_vulkan/vk_staging_buffer_pool.h

@ -1,9 +1,14 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
#pragma once
#include <climits>
#include <memory>
#include <optional>
#include <vector>
#include "common/common_types.h"
@ -15,6 +20,7 @@ namespace Vulkan {
class Device;
class Scheduler;
class MobileMegaBuffer;
struct StagingBufferRef {
VkBuffer buffer;
@ -116,6 +122,8 @@ private:
StagingBuffersCache upload_cache;
StagingBuffersCache download_cache;
std::unique_ptr<MobileMegaBuffer> mobile_megabuffer;
size_t current_delete_level = 0;
u64 buffer_index = 0;
u64 unique_ids{};

32
src/video_core/vulkan_common/vulkan_device.cpp

@ -577,11 +577,39 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
}
}
const bool needs_mobile_alignment_clamp = is_qualcomm || is_arm;
use_mobile_megabuffer = needs_mobile_alignment_clamp;
if (is_qualcomm) {
const u32 version = (properties.properties.driverVersion << 3) >> 3;
if (version < VK_MAKE_API_VERSION(0, 255, 615, 512)) {
has_broken_parallel_compiling = true;
}
}
if (needs_mobile_alignment_clamp) {
const char* driver_label = is_qualcomm ? "Qualcomm" : "ARM";
constexpr VkDeviceSize MIN_UNIFORM_ALIGNMENT = 256;
const VkDeviceSize reported_uniform_alignment =
properties.properties.limits.minUniformBufferOffsetAlignment;
if (reported_uniform_alignment < MIN_UNIFORM_ALIGNMENT) {
uniform_buffer_alignment_minimum = MIN_UNIFORM_ALIGNMENT;
LOG_WARNING(Render_Vulkan,
"{} driver reports {}-byte minUniformBufferOffsetAlignment; clamping to {}",
driver_label, reported_uniform_alignment, uniform_buffer_alignment_minimum);
}
constexpr VkDeviceSize MIN_STORAGE_ALIGNMENT = 64;
const VkDeviceSize reported_storage_alignment =
properties.properties.limits.minStorageBufferOffsetAlignment;
if (reported_storage_alignment < MIN_STORAGE_ALIGNMENT) {
storage_buffer_alignment_minimum = MIN_STORAGE_ALIGNMENT;
LOG_WARNING(Render_Vulkan,
"{} driver reports {}-byte minStorageBufferOffsetAlignment; clamping to {}",
driver_label, reported_storage_alignment, storage_buffer_alignment_minimum);
}
const size_t sampler_limit = properties.properties.limits.maxSamplerAllocationCount;
if (sampler_limit > 0) {
constexpr size_t MIN_SAMPLER_BUDGET = 1024U;
@ -590,9 +618,9 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
(std::max)(MIN_SAMPLER_BUDGET, sampler_limit - reserved);
sampler_heap_budget = derived_budget;
LOG_WARNING(Render_Vulkan,
"Qualcomm driver reports max {} samplers; reserving {} (25%) and "
"{} driver reports max {} samplers; reserving {} (25%) and "
"allowing Eden to use {} (75%) to avoid heap exhaustion",
sampler_limit, reserved, sampler_heap_budget);
driver_label, sampler_limit, reserved, sampler_heap_budget);
}
}

15
src/video_core/vulkan_common/vulkan_device.h

@ -6,6 +6,7 @@
#pragma once
#include <algorithm>
#include <optional>
#include <set>
#include <span>
@ -315,12 +316,14 @@ public:
/// Returns uniform buffer alignment requirement.
VkDeviceSize GetUniformBufferAlignment() const {
return properties.properties.limits.minUniformBufferOffsetAlignment;
return (std::max)(properties.properties.limits.minUniformBufferOffsetAlignment,
uniform_buffer_alignment_minimum);
}
/// Returns storage alignment requirement.
VkDeviceSize GetStorageBufferAlignment() const {
return properties.properties.limits.minStorageBufferOffsetAlignment;
return (std::max)(properties.properties.limits.minStorageBufferOffsetAlignment,
storage_buffer_alignment_minimum);
}
/// Returns the maximum range for storage buffers.
@ -373,6 +376,11 @@ public:
return GetDriverID() != VK_DRIVER_ID_QUALCOMM_PROPRIETARY;
}
/// Returns true when the driver should use the mobile mega buffer allocator.
bool ShouldUseMobileMegaBuffer() const {
return use_mobile_megabuffer;
}
/// Returns true if the device supports float64 natively.
bool IsFloat64Supported() const {
return features.features.shaderFloat64;
@ -1076,6 +1084,7 @@ private:
bool cant_blit_msaa{}; ///< Does not support MSAA<->MSAA blitting.
bool must_emulate_scaled_formats{}; ///< Requires scaled vertex format emulation
bool must_emulate_bgr565{}; ///< Emulates BGR565 by swizzling RGB565 format.
bool use_mobile_megabuffer{}; ///< Use the Android mega buffer path.
bool dynamic_state3_blending{}; ///< Has blending features of dynamic_state3.
bool dynamic_state3_enables{}; ///< Has at least one enable feature of dynamic_state3.
bool dynamic_state3_depth_clamp_enable{};
@ -1087,6 +1096,8 @@ private:
bool dynamic_state3_alpha_to_one{};
bool supports_conditional_barriers{}; ///< Allows barriers in conditional control flow.
size_t sampler_heap_budget{}; ///< Sampler budget for buggy drivers (0 = unlimited).
VkDeviceSize uniform_buffer_alignment_minimum{}; ///< Minimum enforced UBO alignment.
VkDeviceSize storage_buffer_alignment_minimum{}; ///< Minimum enforced SSBO alignment.
u64 device_access_memory{}; ///< Total size of device local memory in bytes.
u32 sets_per_pool{}; ///< Sets per Description Pool
NvidiaArchitecture nvidia_arch{NvidiaArchitecture::Arch_AmpereOrNewer};

Loading…
Cancel
Save