diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index 5b5dc1c219..b368ffea05 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -407,6 +407,12 @@ void BufferCache
::SetComputeUniformBufferState(u32 mask,
template
void BufferCache::UnbindGraphicsStorageBuffers(size_t stage) {
+ if constexpr (requires { runtime.ShouldLimitDynamicStorageBuffers(); }) {
+ if (runtime.ShouldLimitDynamicStorageBuffers()) {
+ channel_state->total_graphics_storage_buffers -=
+ static_cast(std::popcount(channel_state->enabled_storage_buffers[stage]));
+ }
+ }
channel_state->enabled_storage_buffers[stage] = 0;
channel_state->written_storage_buffers[stage] = 0;
}
@@ -414,8 +420,26 @@ void BufferCache::UnbindGraphicsStorageBuffers(size_t stage) {
template
bool BufferCache::BindGraphicsStorageBuffer(size_t stage, size_t ssbo_index, u32 cbuf_index,
u32 cbuf_offset, bool is_written) {
+ const bool already_enabled =
+ ((channel_state->enabled_storage_buffers[stage] >> ssbo_index) & 1U) != 0;
+ if constexpr (requires { runtime.ShouldLimitDynamicStorageBuffers(); }) {
+ if (runtime.ShouldLimitDynamicStorageBuffers() && !already_enabled) {
+ const u32 max_bindings = runtime.GetMaxDynamicStorageBuffers();
+ if (channel_state->total_graphics_storage_buffers >= max_bindings) {
+ LOG_WARNING(HW_GPU,
+ "Skipping graphics storage buffer {} due to driver limit {}",
+ ssbo_index, max_bindings);
+ return false;
+ }
+ }
+ }
channel_state->enabled_storage_buffers[stage] |= 1U << ssbo_index;
channel_state->written_storage_buffers[stage] |= (is_written ? 1U : 0U) << ssbo_index;
+ if constexpr (requires { runtime.ShouldLimitDynamicStorageBuffers(); }) {
+ if (runtime.ShouldLimitDynamicStorageBuffers() && !already_enabled) {
+ ++channel_state->total_graphics_storage_buffers;
+ }
+ }
const auto& cbufs = maxwell3d->state.shader_stages[stage];
const GPUVAddr ssbo_addr = cbufs.const_buffers[cbuf_index].address + cbuf_offset;
@@ -446,6 +470,12 @@ void BufferCache
::BindGraphicsTextureBuffer(size_t stage, size_t tbo_index, G
template
void BufferCache::UnbindComputeStorageBuffers() {
+ if constexpr (requires { runtime.ShouldLimitDynamicStorageBuffers(); }) {
+ if (runtime.ShouldLimitDynamicStorageBuffers()) {
+ channel_state->total_compute_storage_buffers -=
+ static_cast(std::popcount(channel_state->enabled_compute_storage_buffers));
+ }
+ }
channel_state->enabled_compute_storage_buffers = 0;
channel_state->written_compute_storage_buffers = 0;
channel_state->image_compute_texture_buffers = 0;
@@ -459,8 +489,26 @@ void BufferCache::BindComputeStorageBuffer(size_t ssbo_index, u32 cbuf_index,
ssbo_index);
return;
}
+ const bool already_enabled =
+ ((channel_state->enabled_compute_storage_buffers >> ssbo_index) & 1U) != 0;
+ if constexpr (requires { runtime.ShouldLimitDynamicStorageBuffers(); }) {
+ if (runtime.ShouldLimitDynamicStorageBuffers() && !already_enabled) {
+ const u32 max_bindings = runtime.GetMaxDynamicStorageBuffers();
+ if (channel_state->total_compute_storage_buffers >= max_bindings) {
+ LOG_WARNING(HW_GPU,
+ "Skipping compute storage buffer {} due to driver limit {}",
+ ssbo_index, max_bindings);
+ return;
+ }
+ }
+ }
channel_state->enabled_compute_storage_buffers |= 1U << ssbo_index;
channel_state->written_compute_storage_buffers |= (is_written ? 1U : 0U) << ssbo_index;
+ if constexpr (requires { runtime.ShouldLimitDynamicStorageBuffers(); }) {
+ if (runtime.ShouldLimitDynamicStorageBuffers() && !already_enabled) {
+ ++channel_state->total_compute_storage_buffers;
+ }
+ }
const auto& launch_desc = kepler_compute->launch_description;
if (((launch_desc.const_buffer_enable_mask >> cbuf_index) & 1) == 0) {
@@ -793,9 +841,23 @@ void BufferCache
::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
const u32 size = (std::min)(binding.size, (*channel_state->uniform_buffer_sizes)[stage][index]);
Buffer& buffer = slot_buffers[binding.buffer_id];
TouchBuffer(buffer, binding.buffer_id);
- const bool use_fast_buffer = binding.buffer_id != NULL_BUFFER_ID &&
- size <= channel_state->uniform_buffer_skip_cache_size &&
- !memory_tracker.IsRegionGpuModified(device_addr, size);
+ const bool has_host_buffer = binding.buffer_id != NULL_BUFFER_ID;
+ const u32 offset = has_host_buffer ? buffer.Offset(device_addr) : 0;
+ const bool needs_alignment_stream = [&]() {
+ if constexpr (IS_OPENGL) {
+ return false;
+ } else {
+ if (!has_host_buffer) {
+ return false;
+ }
+ const u32 alignment = runtime.GetUniformBufferAlignment();
+ return alignment > 1 && (offset % alignment) != 0;
+ }
+ }();
+ const bool use_fast_buffer = needs_alignment_stream ||
+ (has_host_buffer &&
+ size <= channel_state->uniform_buffer_skip_cache_size &&
+ !memory_tracker.IsRegionGpuModified(device_addr, size));
if (use_fast_buffer) {
if constexpr (IS_OPENGL) {
if (runtime.HasFastBufferSubData()) {
@@ -834,7 +896,6 @@ void BufferCache
::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
if (!needs_bind) {
return;
}
- const u32 offset = buffer.Offset(device_addr);
if constexpr (IS_OPENGL) {
// Mark the index as dirty if offset doesn't match
const bool is_copy_bind = offset != 0 && !runtime.SupportsNonZeroUniformOffset();
@@ -951,9 +1012,30 @@ void BufferCache
::BindHostComputeUniformBuffers() {
TouchBuffer(buffer, binding.buffer_id);
const u32 size =
(std::min)(binding.size, (*channel_state->compute_uniform_buffer_sizes)[index]);
+ const bool has_host_buffer = binding.buffer_id != NULL_BUFFER_ID;
+ const u32 offset = has_host_buffer ? buffer.Offset(binding.device_addr) : 0;
+ const bool needs_alignment_stream = [&]() {
+ if constexpr (IS_OPENGL) {
+ return false;
+ } else {
+ if (!has_host_buffer) {
+ return false;
+ }
+ const u32 alignment = runtime.GetUniformBufferAlignment();
+ return alignment > 1 && (offset % alignment) != 0;
+ }
+ }();
+ if constexpr (!IS_OPENGL) {
+ if (needs_alignment_stream) {
+ const std::span span =
+ runtime.BindMappedUniformBuffer(0, binding_index, size);
+ device_memory.ReadBlockUnsafe(binding.device_addr, span.data(), size);
+ return;
+ }
+ }
+
SynchronizeBuffer(buffer, binding.device_addr, size);
- const u32 offset = buffer.Offset(binding.device_addr);
buffer.MarkUsage(offset, size);
if constexpr (NEEDS_BIND_UNIFORM_INDEX) {
runtime.BindComputeUniformBuffer(binding_index, buffer, offset, size);
diff --git a/src/video_core/buffer_cache/buffer_cache_base.h b/src/video_core/buffer_cache/buffer_cache_base.h
index 1b551931a4..ed50634683 100644
--- a/src/video_core/buffer_cache/buffer_cache_base.h
+++ b/src/video_core/buffer_cache/buffer_cache_base.h
@@ -8,6 +8,7 @@
#include
#include
+#include
#include
#include
#include
@@ -132,6 +133,9 @@ public:
u32 enabled_compute_storage_buffers = 0;
u32 written_compute_storage_buffers = 0;
+ u32 total_graphics_storage_buffers = 0;
+ u32 total_compute_storage_buffers = 0;
+
std::array enabled_texture_buffers{};
std::array written_texture_buffers{};
std::array image_texture_buffers{};
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h
index 4188dec7cc..23ebe50196 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.h
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.h
@@ -198,6 +198,10 @@ public:
return device.CanReportMemoryUsage();
}
+ u32 GetUniformBufferAlignment() const {
+ return static_cast(device.GetUniformBufferAlignment());
+ }
+
u32 GetStorageBufferAlignment() const {
return static_cast(device.GetShaderStorageBufferAlignment());
}
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
index f4ef8f1883..8077860ae9 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
@@ -333,6 +333,13 @@ BufferCacheRuntime::BufferCacheRuntime(const Device& device_, MemoryAllocator& m
staging_pool{staging_pool_}, guest_descriptor_queue{guest_descriptor_queue_},
quad_index_pass(device, scheduler, descriptor_pool, staging_pool,
compute_pass_descriptor_queue) {
+ const VkDriverIdKHR driver_id = device.GetDriverID();
+ limit_dynamic_storage_buffers = driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY ||
+ driver_id == VK_DRIVER_ID_MESA_TURNIP ||
+ driver_id == VK_DRIVER_ID_ARM_PROPRIETARY;
+ if (limit_dynamic_storage_buffers) {
+ max_dynamic_storage_buffers = device.GetMaxDescriptorSetStorageBuffersDynamic();
+ }
if (device.GetDriverID() != VK_DRIVER_ID_QUALCOMM_PROPRIETARY) {
// TODO: FixMe: Uint8Pass compute shader does not build on some Qualcomm drivers.
uint8_pass = std::make_unique(device, scheduler, descriptor_pool, staging_pool,
@@ -368,6 +375,10 @@ bool BufferCacheRuntime::CanReportMemoryUsage() const {
return device.CanReportMemoryUsage();
}
+u32 BufferCacheRuntime::GetUniformBufferAlignment() const {
+ return static_cast(device.GetUniformBufferAlignment());
+}
+
u32 BufferCacheRuntime::GetStorageBufferAlignment() const {
return static_cast(device.GetStorageBufferAlignment());
}
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h
index 0d284decc2..d5d806fc69 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.h
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h
@@ -6,6 +6,8 @@
#pragma once
+#include
+
#include "video_core/buffer_cache/buffer_cache_base.h"
#include "video_core/buffer_cache/memory_tracker_base.h"
#include "video_core/buffer_cache/usage_tracker.h"
@@ -94,6 +96,8 @@ public:
bool CanReportMemoryUsage() const;
+ u32 GetUniformBufferAlignment() const;
+
u32 GetStorageBufferAlignment() const;
[[nodiscard]] StagingBufferRef UploadStagingBuffer(size_t size);
@@ -149,6 +153,14 @@ public:
guest_descriptor_queue.AddTexelBuffer(buffer.View(offset, size, format));
}
+ bool ShouldLimitDynamicStorageBuffers() const {
+ return limit_dynamic_storage_buffers;
+ }
+
+ u32 GetMaxDynamicStorageBuffers() const {
+ return max_dynamic_storage_buffers;
+ }
+
private:
void BindBuffer(VkBuffer buffer, u32 offset, u32 size) {
guest_descriptor_queue.AddBuffer(buffer, offset, size);
@@ -170,6 +182,9 @@ private:
std::unique_ptr uint8_pass;
QuadIndexedPass quad_index_pass;
+
+ bool limit_dynamic_storage_buffers = false;
+ u32 max_dynamic_storage_buffers = std::numeric_limits::max();
};
struct BufferCacheParams {
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index 41b8c72e65..f81b63ef44 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -278,8 +278,9 @@ size_t GetTotalPipelineWorkers() {
const size_t max_core_threads =
std::max(static_cast(std::thread::hardware_concurrency()), 2ULL) - 1ULL;
#ifdef ANDROID
- // Leave at least a few cores free in android
- constexpr size_t free_cores = 3ULL;
+ // Leave at least one core free on Android. Previously we reserved two, but
+ // shipping builds benefit from one extra compilation worker.
+ constexpr size_t free_cores = 1ULL;
if (max_core_threads <= free_cores) {
return 1ULL;
}
@@ -797,6 +798,19 @@ std::unique_ptr PipelineCache::CreateComputePipeline(
}
auto program{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)};
+ const VkDriverIdKHR driver_id = device.GetDriverID();
+ const bool needs_shared_mem_clamp =
+ driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY ||
+ driver_id == VK_DRIVER_ID_ARM_PROPRIETARY;
+ const u32 max_shared_memory = device.GetMaxComputeSharedMemorySize();
+ if (needs_shared_mem_clamp && program.shared_memory_size > max_shared_memory) {
+ LOG_WARNING(Render_Vulkan,
+ "Compute shader 0x{:016x} requests {}KB shared memory but device max is {}KB - clamping",
+ key.unique_hash,
+ program.shared_memory_size / 1024,
+ max_shared_memory / 1024);
+ program.shared_memory_size = max_shared_memory;
+ }
const std::vector code{EmitSPIRV(profile, program, this->optimize_spirv_output)};
device.SaveShader(code);
vk::ShaderModule spv_module{BuildShader(device, code)};
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
index 07d72189bb..64d2f6b586 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
@@ -1527,6 +1527,10 @@ bool TextureCacheRuntime::CanReportMemoryUsage() const {
return device.CanReportMemoryUsage();
}
+std::optional TextureCacheRuntime::GetSamplerHeapBudget() const {
+ return device.GetSamplerHeapBudget();
+}
+
void TextureCacheRuntime::TickFrame() {}
Image::Image(TextureCacheRuntime& runtime_, const ImageInfo& info_, GPUVAddr gpu_addr_,
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h
index cd11cc8fc7..570a3cb335 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.h
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.h
@@ -1,3 +1,6 @@
+// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
+// SPDX-License-Identifier: GPL-3.0-or-later
+
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
@@ -62,6 +65,8 @@ public:
bool CanReportMemoryUsage() const;
+ std::optional GetSamplerHeapBudget() const;
+
void BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src,
const Region2D& dst_region, const Region2D& src_region,
Tegra::Engines::Fermi2D::Filter filter,
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index 2a44a5e8b2..c580fb10ef 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -6,6 +6,8 @@
#pragma once
+#include
+#include
#include
#include
@@ -1736,11 +1738,89 @@ SamplerId TextureCache::FindSampler(const TSCEntry& config) {
}
const auto [pair, is_new] = channel_state->samplers.try_emplace(config);
if (is_new) {
+ EnforceSamplerBudget();
pair->second = slot_samplers.insert(runtime, config);
}
return pair->second;
}
+template
+std::optional TextureCache::QuerySamplerBudget() const {
+ if constexpr (requires { runtime.GetSamplerHeapBudget(); }) {
+ return runtime.GetSamplerHeapBudget();
+ } else {
+ return std::nullopt;
+ }
+}
+
+template
+void TextureCache::EnforceSamplerBudget() {
+ const auto budget = QuerySamplerBudget();
+ if (!budget) {
+ return;
+ }
+ if (slot_samplers.size() < *budget) {
+ return;
+ }
+ if (!channel_state) {
+ return;
+ }
+ if (last_sampler_gc_frame == frame_tick) {
+ return;
+ }
+ last_sampler_gc_frame = frame_tick;
+ TrimInactiveSamplers(*budget);
+}
+
+template
+void TextureCache::TrimInactiveSamplers(size_t budget) {
+ if (channel_state->samplers.empty()) {
+ return;
+ }
+ static constexpr size_t SAMPLER_GC_SLACK = 1024;
+ auto mark_active = [](auto& set, SamplerId id) {
+ if (!id || id == CORRUPT_ID || id == NULL_SAMPLER_ID) {
+ return;
+ }
+ set.insert(id);
+ };
+ std::unordered_set active;
+ active.reserve(channel_state->graphics_sampler_ids.size() +
+ channel_state->compute_sampler_ids.size());
+ for (const SamplerId id : channel_state->graphics_sampler_ids) {
+ mark_active(active, id);
+ }
+ for (const SamplerId id : channel_state->compute_sampler_ids) {
+ mark_active(active, id);
+ }
+
+ size_t removed = 0;
+ auto& sampler_map = channel_state->samplers;
+ for (auto it = sampler_map.begin(); it != sampler_map.end();) {
+ const SamplerId sampler_id = it->second;
+ if (!sampler_id || sampler_id == CORRUPT_ID) {
+ it = sampler_map.erase(it);
+ continue;
+ }
+ if (active.find(sampler_id) != active.end()) {
+ ++it;
+ continue;
+ }
+ slot_samplers.erase(sampler_id);
+ it = sampler_map.erase(it);
+ ++removed;
+ if (slot_samplers.size() + SAMPLER_GC_SLACK <= budget) {
+ break;
+ }
+ }
+
+ if (removed != 0) {
+ LOG_WARNING(HW_GPU,
+ "Sampler cache exceeded {} entries on this driver; reclaimed {} inactive samplers",
+ budget, removed);
+ }
+}
+
template
ImageViewId TextureCache::FindColorBuffer(size_t index) {
const auto& regs = maxwell3d->regs;
diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h
index 01a9a6a3f1..5146a8c291 100644
--- a/src/video_core/texture_cache/texture_cache_base.h
+++ b/src/video_core/texture_cache/texture_cache_base.h
@@ -429,6 +429,9 @@ private:
void QueueAsyncDecode(Image& image, ImageId image_id);
void TickAsyncDecode();
+ void EnforceSamplerBudget();
+ void TrimInactiveSamplers(size_t budget);
+ std::optional QuerySamplerBudget() const;
Runtime& runtime;
@@ -500,6 +503,7 @@ private:
u64 modification_tick = 0;
u64 frame_tick = 0;
+ u64 last_sampler_gc_frame = (std::numeric_limits::max)();
Common::ThreadWorker texture_decode_worker{1, "TextureDecoder"};
std::vector> async_decodes;
diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp
index d68cc4b9a4..2ae5052640 100644
--- a/src/video_core/vulkan_common/vulkan_device.cpp
+++ b/src/video_core/vulkan_common/vulkan_device.cpp
@@ -605,6 +605,18 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
if (version < VK_MAKE_API_VERSION(0, 255, 615, 512)) {
has_broken_parallel_compiling = true;
}
+ const size_t sampler_limit = properties.properties.limits.maxSamplerAllocationCount;
+ if (sampler_limit > 0) {
+ constexpr size_t MIN_SAMPLER_BUDGET = 1024U;
+ const size_t reserved = sampler_limit / 4U;
+ const size_t derived_budget =
+ (std::max)(MIN_SAMPLER_BUDGET, sampler_limit - reserved);
+ sampler_heap_budget = derived_budget;
+ LOG_WARNING(Render_Vulkan,
+ "Qualcomm driver reports max {} samplers; reserving {} (25%) and "
+ "allowing Eden to use {} (75%) to avoid heap exhaustion",
+ sampler_limit, reserved, sampler_heap_budget);
+ }
}
if (extensions.sampler_filter_minmax && is_amd) {
@@ -1529,6 +1541,13 @@ void Device::SetupFamilies(VkSurfaceKHR surface) {
}
}
+std::optional Device::GetSamplerHeapBudget() const {
+ if (sampler_heap_budget == 0) {
+ return std::nullopt;
+ }
+ return sampler_heap_budget;
+}
+
u64 Device::GetDeviceMemoryUsage() const {
VkPhysicalDeviceMemoryBudgetPropertiesEXT budget;
budget.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT;
diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h
index 726b17e47e..9b08dc2926 100644
--- a/src/video_core/vulkan_common/vulkan_device.h
+++ b/src/video_core/vulkan_common/vulkan_device.h
@@ -6,6 +6,7 @@
#pragma once
+#include
#include
#include
#include
@@ -326,6 +327,16 @@ public:
return properties.properties.limits.maxComputeSharedMemorySize;
}
+ /// Returns the maximum number of dynamic storage buffer descriptors per set.
+ u32 GetMaxDescriptorSetStorageBuffersDynamic() const {
+ return properties.properties.limits.maxDescriptorSetStorageBuffersDynamic;
+ }
+
+ /// Returns the maximum number of dynamic uniform buffer descriptors per set.
+ u32 GetMaxDescriptorSetUniformBuffersDynamic() const {
+ return properties.properties.limits.maxDescriptorSetUniformBuffersDynamic;
+ }
+
/// Returns float control properties of the device.
const VkPhysicalDeviceFloatControlsPropertiesKHR& FloatControlProperties() const {
return properties.float_controls;
@@ -744,6 +755,8 @@ public:
return has_broken_parallel_compiling;
}
+ std::optional GetSamplerHeapBudget() const;
+
/// Returns the vendor name reported from Vulkan.
std::string_view GetVendorName() const {
return properties.driver.driverName;
@@ -1040,6 +1053,7 @@ private:
bool dynamic_state3_alpha_to_coverage{};
bool dynamic_state3_alpha_to_one{};
bool supports_conditional_barriers{}; ///< Allows barriers in conditional control flow.
+ size_t sampler_heap_budget{}; ///< Sampler budget for buggy drivers (0 = unlimited).
u64 device_access_memory{}; ///< Total size of device local memory in bytes.
u32 sets_per_pool{}; ///< Sets per Description Pool
NvidiaArchitecture nvidia_arch{NvidiaArchitecture::Arch_AmpereOrNewer};