Browse Source

[vk, qcom] Memory, Samplers, BindingBuffers and UniformBufferAlignment set by hardware capabilities for QCOM. (#3280)

This PR adjust resources usage of Eden based on driver specification, improving stability and smarter resources usage, extends the amount of TotalPipelineWorkers on Android.

Co-authored-by: Caio Oliveira <caiooliveirafarias0@gmail.com>
Reviewed-on: https://git.eden-emu.dev/eden-emu/eden/pulls/3280
Reviewed-by: Lizzie <lizzie@eden-emu.dev>
Reviewed-by: DraVee <dravee@eden-emu.dev>
Co-authored-by: CamilleLaVey <camillelavey99@gmail.com>
Co-committed-by: CamilleLaVey <camillelavey99@gmail.com>
pull/3293/head
CamilleLaVey 4 days ago
committed by crueter
parent
commit
a27d35362d
No known key found for this signature in database GPG Key ID: 425ACD2D4830EBC6
  1. 92
      src/video_core/buffer_cache/buffer_cache.h
  2. 4
      src/video_core/buffer_cache/buffer_cache_base.h
  3. 4
      src/video_core/renderer_opengl/gl_buffer_cache.h
  4. 11
      src/video_core/renderer_vulkan/vk_buffer_cache.cpp
  5. 15
      src/video_core/renderer_vulkan/vk_buffer_cache.h
  6. 18
      src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
  7. 4
      src/video_core/renderer_vulkan/vk_texture_cache.cpp
  8. 5
      src/video_core/renderer_vulkan/vk_texture_cache.h
  9. 80
      src/video_core/texture_cache/texture_cache.h
  10. 4
      src/video_core/texture_cache/texture_cache_base.h
  11. 19
      src/video_core/vulkan_common/vulkan_device.cpp
  12. 14
      src/video_core/vulkan_common/vulkan_device.h

92
src/video_core/buffer_cache/buffer_cache.h

@ -407,6 +407,12 @@ void BufferCache<P>::SetComputeUniformBufferState(u32 mask,
template <class P> template <class P>
void BufferCache<P>::UnbindGraphicsStorageBuffers(size_t stage) { void BufferCache<P>::UnbindGraphicsStorageBuffers(size_t stage) {
if constexpr (requires { runtime.ShouldLimitDynamicStorageBuffers(); }) {
if (runtime.ShouldLimitDynamicStorageBuffers()) {
channel_state->total_graphics_storage_buffers -=
static_cast<u32>(std::popcount(channel_state->enabled_storage_buffers[stage]));
}
}
channel_state->enabled_storage_buffers[stage] = 0; channel_state->enabled_storage_buffers[stage] = 0;
channel_state->written_storage_buffers[stage] = 0; channel_state->written_storage_buffers[stage] = 0;
} }
@ -414,8 +420,26 @@ void BufferCache<P>::UnbindGraphicsStorageBuffers(size_t stage) {
template <class P> template <class P>
bool BufferCache<P>::BindGraphicsStorageBuffer(size_t stage, size_t ssbo_index, u32 cbuf_index, bool BufferCache<P>::BindGraphicsStorageBuffer(size_t stage, size_t ssbo_index, u32 cbuf_index,
u32 cbuf_offset, bool is_written) { u32 cbuf_offset, bool is_written) {
const bool already_enabled =
((channel_state->enabled_storage_buffers[stage] >> ssbo_index) & 1U) != 0;
if constexpr (requires { runtime.ShouldLimitDynamicStorageBuffers(); }) {
if (runtime.ShouldLimitDynamicStorageBuffers() && !already_enabled) {
const u32 max_bindings = runtime.GetMaxDynamicStorageBuffers();
if (channel_state->total_graphics_storage_buffers >= max_bindings) {
LOG_WARNING(HW_GPU,
"Skipping graphics storage buffer {} due to driver limit {}",
ssbo_index, max_bindings);
return false;
}
}
}
channel_state->enabled_storage_buffers[stage] |= 1U << ssbo_index; channel_state->enabled_storage_buffers[stage] |= 1U << ssbo_index;
channel_state->written_storage_buffers[stage] |= (is_written ? 1U : 0U) << ssbo_index; channel_state->written_storage_buffers[stage] |= (is_written ? 1U : 0U) << ssbo_index;
if constexpr (requires { runtime.ShouldLimitDynamicStorageBuffers(); }) {
if (runtime.ShouldLimitDynamicStorageBuffers() && !already_enabled) {
++channel_state->total_graphics_storage_buffers;
}
}
const auto& cbufs = maxwell3d->state.shader_stages[stage]; const auto& cbufs = maxwell3d->state.shader_stages[stage];
const GPUVAddr ssbo_addr = cbufs.const_buffers[cbuf_index].address + cbuf_offset; const GPUVAddr ssbo_addr = cbufs.const_buffers[cbuf_index].address + cbuf_offset;
@ -446,6 +470,12 @@ void BufferCache<P>::BindGraphicsTextureBuffer(size_t stage, size_t tbo_index, G
template <class P> template <class P>
void BufferCache<P>::UnbindComputeStorageBuffers() { void BufferCache<P>::UnbindComputeStorageBuffers() {
if constexpr (requires { runtime.ShouldLimitDynamicStorageBuffers(); }) {
if (runtime.ShouldLimitDynamicStorageBuffers()) {
channel_state->total_compute_storage_buffers -=
static_cast<u32>(std::popcount(channel_state->enabled_compute_storage_buffers));
}
}
channel_state->enabled_compute_storage_buffers = 0; channel_state->enabled_compute_storage_buffers = 0;
channel_state->written_compute_storage_buffers = 0; channel_state->written_compute_storage_buffers = 0;
channel_state->image_compute_texture_buffers = 0; channel_state->image_compute_texture_buffers = 0;
@ -459,8 +489,26 @@ void BufferCache<P>::BindComputeStorageBuffer(size_t ssbo_index, u32 cbuf_index,
ssbo_index); ssbo_index);
return; return;
} }
const bool already_enabled =
((channel_state->enabled_compute_storage_buffers >> ssbo_index) & 1U) != 0;
if constexpr (requires { runtime.ShouldLimitDynamicStorageBuffers(); }) {
if (runtime.ShouldLimitDynamicStorageBuffers() && !already_enabled) {
const u32 max_bindings = runtime.GetMaxDynamicStorageBuffers();
if (channel_state->total_compute_storage_buffers >= max_bindings) {
LOG_WARNING(HW_GPU,
"Skipping compute storage buffer {} due to driver limit {}",
ssbo_index, max_bindings);
return;
}
}
}
channel_state->enabled_compute_storage_buffers |= 1U << ssbo_index; channel_state->enabled_compute_storage_buffers |= 1U << ssbo_index;
channel_state->written_compute_storage_buffers |= (is_written ? 1U : 0U) << ssbo_index; channel_state->written_compute_storage_buffers |= (is_written ? 1U : 0U) << ssbo_index;
if constexpr (requires { runtime.ShouldLimitDynamicStorageBuffers(); }) {
if (runtime.ShouldLimitDynamicStorageBuffers() && !already_enabled) {
++channel_state->total_compute_storage_buffers;
}
}
const auto& launch_desc = kepler_compute->launch_description; const auto& launch_desc = kepler_compute->launch_description;
if (((launch_desc.const_buffer_enable_mask >> cbuf_index) & 1) == 0) { if (((launch_desc.const_buffer_enable_mask >> cbuf_index) & 1) == 0) {
@ -793,9 +841,23 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
const u32 size = (std::min)(binding.size, (*channel_state->uniform_buffer_sizes)[stage][index]); const u32 size = (std::min)(binding.size, (*channel_state->uniform_buffer_sizes)[stage][index]);
Buffer& buffer = slot_buffers[binding.buffer_id]; Buffer& buffer = slot_buffers[binding.buffer_id];
TouchBuffer(buffer, binding.buffer_id); TouchBuffer(buffer, binding.buffer_id);
const bool use_fast_buffer = binding.buffer_id != NULL_BUFFER_ID &&
size <= channel_state->uniform_buffer_skip_cache_size &&
!memory_tracker.IsRegionGpuModified(device_addr, size);
const bool has_host_buffer = binding.buffer_id != NULL_BUFFER_ID;
const u32 offset = has_host_buffer ? buffer.Offset(device_addr) : 0;
const bool needs_alignment_stream = [&]() {
if constexpr (IS_OPENGL) {
return false;
} else {
if (!has_host_buffer) {
return false;
}
const u32 alignment = runtime.GetUniformBufferAlignment();
return alignment > 1 && (offset % alignment) != 0;
}
}();
const bool use_fast_buffer = needs_alignment_stream ||
(has_host_buffer &&
size <= channel_state->uniform_buffer_skip_cache_size &&
!memory_tracker.IsRegionGpuModified(device_addr, size));
if (use_fast_buffer) { if (use_fast_buffer) {
if constexpr (IS_OPENGL) { if constexpr (IS_OPENGL) {
if (runtime.HasFastBufferSubData()) { if (runtime.HasFastBufferSubData()) {
@ -834,7 +896,6 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
if (!needs_bind) { if (!needs_bind) {
return; return;
} }
const u32 offset = buffer.Offset(device_addr);
if constexpr (IS_OPENGL) { if constexpr (IS_OPENGL) {
// Mark the index as dirty if offset doesn't match // Mark the index as dirty if offset doesn't match
const bool is_copy_bind = offset != 0 && !runtime.SupportsNonZeroUniformOffset(); const bool is_copy_bind = offset != 0 && !runtime.SupportsNonZeroUniformOffset();
@ -951,9 +1012,30 @@ void BufferCache<P>::BindHostComputeUniformBuffers() {
TouchBuffer(buffer, binding.buffer_id); TouchBuffer(buffer, binding.buffer_id);
const u32 size = const u32 size =
(std::min)(binding.size, (*channel_state->compute_uniform_buffer_sizes)[index]); (std::min)(binding.size, (*channel_state->compute_uniform_buffer_sizes)[index]);
const bool has_host_buffer = binding.buffer_id != NULL_BUFFER_ID;
const u32 offset = has_host_buffer ? buffer.Offset(binding.device_addr) : 0;
const bool needs_alignment_stream = [&]() {
if constexpr (IS_OPENGL) {
return false;
} else {
if (!has_host_buffer) {
return false;
}
const u32 alignment = runtime.GetUniformBufferAlignment();
return alignment > 1 && (offset % alignment) != 0;
}
}();
if constexpr (!IS_OPENGL) {
if (needs_alignment_stream) {
const std::span<u8> span =
runtime.BindMappedUniformBuffer(0, binding_index, size);
device_memory.ReadBlockUnsafe(binding.device_addr, span.data(), size);
return;
}
}
SynchronizeBuffer(buffer, binding.device_addr, size); SynchronizeBuffer(buffer, binding.device_addr, size);
const u32 offset = buffer.Offset(binding.device_addr);
buffer.MarkUsage(offset, size); buffer.MarkUsage(offset, size);
if constexpr (NEEDS_BIND_UNIFORM_INDEX) { if constexpr (NEEDS_BIND_UNIFORM_INDEX) {
runtime.BindComputeUniformBuffer(binding_index, buffer, offset, size); runtime.BindComputeUniformBuffer(binding_index, buffer, offset, size);

4
src/video_core/buffer_cache/buffer_cache_base.h

@ -8,6 +8,7 @@
#include <algorithm> #include <algorithm>
#include <array> #include <array>
#include <bit>
#include <functional> #include <functional>
#include <memory> #include <memory>
#include <mutex> #include <mutex>
@ -132,6 +133,9 @@ public:
u32 enabled_compute_storage_buffers = 0; u32 enabled_compute_storage_buffers = 0;
u32 written_compute_storage_buffers = 0; u32 written_compute_storage_buffers = 0;
u32 total_graphics_storage_buffers = 0;
u32 total_compute_storage_buffers = 0;
std::array<u32, NUM_STAGES> enabled_texture_buffers{}; std::array<u32, NUM_STAGES> enabled_texture_buffers{};
std::array<u32, NUM_STAGES> written_texture_buffers{}; std::array<u32, NUM_STAGES> written_texture_buffers{};
std::array<u32, NUM_STAGES> image_texture_buffers{}; std::array<u32, NUM_STAGES> image_texture_buffers{};

4
src/video_core/renderer_opengl/gl_buffer_cache.h

@ -198,6 +198,10 @@ public:
return device.CanReportMemoryUsage(); return device.CanReportMemoryUsage();
} }
u32 GetUniformBufferAlignment() const {
return static_cast<u32>(device.GetUniformBufferAlignment());
}
u32 GetStorageBufferAlignment() const { u32 GetStorageBufferAlignment() const {
return static_cast<u32>(device.GetShaderStorageBufferAlignment()); return static_cast<u32>(device.GetShaderStorageBufferAlignment());
} }

11
src/video_core/renderer_vulkan/vk_buffer_cache.cpp

@ -333,6 +333,13 @@ BufferCacheRuntime::BufferCacheRuntime(const Device& device_, MemoryAllocator& m
staging_pool{staging_pool_}, guest_descriptor_queue{guest_descriptor_queue_}, staging_pool{staging_pool_}, guest_descriptor_queue{guest_descriptor_queue_},
quad_index_pass(device, scheduler, descriptor_pool, staging_pool, quad_index_pass(device, scheduler, descriptor_pool, staging_pool,
compute_pass_descriptor_queue) { compute_pass_descriptor_queue) {
const VkDriverIdKHR driver_id = device.GetDriverID();
limit_dynamic_storage_buffers = driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY ||
driver_id == VK_DRIVER_ID_MESA_TURNIP ||
driver_id == VK_DRIVER_ID_ARM_PROPRIETARY;
if (limit_dynamic_storage_buffers) {
max_dynamic_storage_buffers = device.GetMaxDescriptorSetStorageBuffersDynamic();
}
if (device.GetDriverID() != VK_DRIVER_ID_QUALCOMM_PROPRIETARY) { if (device.GetDriverID() != VK_DRIVER_ID_QUALCOMM_PROPRIETARY) {
// TODO: FixMe: Uint8Pass compute shader does not build on some Qualcomm drivers. // TODO: FixMe: Uint8Pass compute shader does not build on some Qualcomm drivers.
uint8_pass = std::make_unique<Uint8Pass>(device, scheduler, descriptor_pool, staging_pool, uint8_pass = std::make_unique<Uint8Pass>(device, scheduler, descriptor_pool, staging_pool,
@ -368,6 +375,10 @@ bool BufferCacheRuntime::CanReportMemoryUsage() const {
return device.CanReportMemoryUsage(); return device.CanReportMemoryUsage();
} }
u32 BufferCacheRuntime::GetUniformBufferAlignment() const {
return static_cast<u32>(device.GetUniformBufferAlignment());
}
u32 BufferCacheRuntime::GetStorageBufferAlignment() const { u32 BufferCacheRuntime::GetStorageBufferAlignment() const {
return static_cast<u32>(device.GetStorageBufferAlignment()); return static_cast<u32>(device.GetStorageBufferAlignment());
} }

15
src/video_core/renderer_vulkan/vk_buffer_cache.h

@ -6,6 +6,8 @@
#pragma once #pragma once
#include <limits>
#include "video_core/buffer_cache/buffer_cache_base.h" #include "video_core/buffer_cache/buffer_cache_base.h"
#include "video_core/buffer_cache/memory_tracker_base.h" #include "video_core/buffer_cache/memory_tracker_base.h"
#include "video_core/buffer_cache/usage_tracker.h" #include "video_core/buffer_cache/usage_tracker.h"
@ -94,6 +96,8 @@ public:
bool CanReportMemoryUsage() const; bool CanReportMemoryUsage() const;
u32 GetUniformBufferAlignment() const;
u32 GetStorageBufferAlignment() const; u32 GetStorageBufferAlignment() const;
[[nodiscard]] StagingBufferRef UploadStagingBuffer(size_t size); [[nodiscard]] StagingBufferRef UploadStagingBuffer(size_t size);
@ -149,6 +153,14 @@ public:
guest_descriptor_queue.AddTexelBuffer(buffer.View(offset, size, format)); guest_descriptor_queue.AddTexelBuffer(buffer.View(offset, size, format));
} }
bool ShouldLimitDynamicStorageBuffers() const {
return limit_dynamic_storage_buffers;
}
u32 GetMaxDynamicStorageBuffers() const {
return max_dynamic_storage_buffers;
}
private: private:
void BindBuffer(VkBuffer buffer, u32 offset, u32 size) { void BindBuffer(VkBuffer buffer, u32 offset, u32 size) {
guest_descriptor_queue.AddBuffer(buffer, offset, size); guest_descriptor_queue.AddBuffer(buffer, offset, size);
@ -170,6 +182,9 @@ private:
std::unique_ptr<Uint8Pass> uint8_pass; std::unique_ptr<Uint8Pass> uint8_pass;
QuadIndexedPass quad_index_pass; QuadIndexedPass quad_index_pass;
bool limit_dynamic_storage_buffers = false;
u32 max_dynamic_storage_buffers = std::numeric_limits<u32>::max();
}; };
struct BufferCacheParams { struct BufferCacheParams {

18
src/video_core/renderer_vulkan/vk_pipeline_cache.cpp

@ -278,8 +278,9 @@ size_t GetTotalPipelineWorkers() {
const size_t max_core_threads = const size_t max_core_threads =
std::max<size_t>(static_cast<size_t>(std::thread::hardware_concurrency()), 2ULL) - 1ULL; std::max<size_t>(static_cast<size_t>(std::thread::hardware_concurrency()), 2ULL) - 1ULL;
#ifdef ANDROID #ifdef ANDROID
// Leave at least a few cores free in android
constexpr size_t free_cores = 3ULL;
// Leave at least one core free on Android. Previously we reserved two, but
// shipping builds benefit from one extra compilation worker.
constexpr size_t free_cores = 1ULL;
if (max_core_threads <= free_cores) { if (max_core_threads <= free_cores) {
return 1ULL; return 1ULL;
} }
@ -797,6 +798,19 @@ std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline(
} }
auto program{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)}; auto program{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)};
const VkDriverIdKHR driver_id = device.GetDriverID();
const bool needs_shared_mem_clamp =
driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY ||
driver_id == VK_DRIVER_ID_ARM_PROPRIETARY;
const u32 max_shared_memory = device.GetMaxComputeSharedMemorySize();
if (needs_shared_mem_clamp && program.shared_memory_size > max_shared_memory) {
LOG_WARNING(Render_Vulkan,
"Compute shader 0x{:016x} requests {}KB shared memory but device max is {}KB - clamping",
key.unique_hash,
program.shared_memory_size / 1024,
max_shared_memory / 1024);
program.shared_memory_size = max_shared_memory;
}
const std::vector<u32> code{EmitSPIRV(profile, program, this->optimize_spirv_output)}; const std::vector<u32> code{EmitSPIRV(profile, program, this->optimize_spirv_output)};
device.SaveShader(code); device.SaveShader(code);
vk::ShaderModule spv_module{BuildShader(device, code)}; vk::ShaderModule spv_module{BuildShader(device, code)};

4
src/video_core/renderer_vulkan/vk_texture_cache.cpp

@ -1527,6 +1527,10 @@ bool TextureCacheRuntime::CanReportMemoryUsage() const {
return device.CanReportMemoryUsage(); return device.CanReportMemoryUsage();
} }
std::optional<size_t> TextureCacheRuntime::GetSamplerHeapBudget() const {
return device.GetSamplerHeapBudget();
}
void TextureCacheRuntime::TickFrame() {} void TextureCacheRuntime::TickFrame() {}
Image::Image(TextureCacheRuntime& runtime_, const ImageInfo& info_, GPUVAddr gpu_addr_, Image::Image(TextureCacheRuntime& runtime_, const ImageInfo& info_, GPUVAddr gpu_addr_,

5
src/video_core/renderer_vulkan/vk_texture_cache.h

@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project // SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later // SPDX-License-Identifier: GPL-3.0-or-later
@ -62,6 +65,8 @@ public:
bool CanReportMemoryUsage() const; bool CanReportMemoryUsage() const;
std::optional<size_t> GetSamplerHeapBudget() const;
void BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src, void BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src,
const Region2D& dst_region, const Region2D& src_region, const Region2D& dst_region, const Region2D& src_region,
Tegra::Engines::Fermi2D::Filter filter, Tegra::Engines::Fermi2D::Filter filter,

80
src/video_core/texture_cache/texture_cache.h

@ -6,6 +6,8 @@
#pragma once #pragma once
#include <limits>
#include <optional>
#include <unordered_set> #include <unordered_set>
#include <boost/container/small_vector.hpp> #include <boost/container/small_vector.hpp>
@ -1736,11 +1738,89 @@ SamplerId TextureCache<P>::FindSampler(const TSCEntry& config) {
} }
const auto [pair, is_new] = channel_state->samplers.try_emplace(config); const auto [pair, is_new] = channel_state->samplers.try_emplace(config);
if (is_new) { if (is_new) {
EnforceSamplerBudget();
pair->second = slot_samplers.insert(runtime, config); pair->second = slot_samplers.insert(runtime, config);
} }
return pair->second; return pair->second;
} }
template <class P>
std::optional<size_t> TextureCache<P>::QuerySamplerBudget() const {
if constexpr (requires { runtime.GetSamplerHeapBudget(); }) {
return runtime.GetSamplerHeapBudget();
} else {
return std::nullopt;
}
}
template <class P>
void TextureCache<P>::EnforceSamplerBudget() {
const auto budget = QuerySamplerBudget();
if (!budget) {
return;
}
if (slot_samplers.size() < *budget) {
return;
}
if (!channel_state) {
return;
}
if (last_sampler_gc_frame == frame_tick) {
return;
}
last_sampler_gc_frame = frame_tick;
TrimInactiveSamplers(*budget);
}
template <class P>
void TextureCache<P>::TrimInactiveSamplers(size_t budget) {
if (channel_state->samplers.empty()) {
return;
}
static constexpr size_t SAMPLER_GC_SLACK = 1024;
auto mark_active = [](auto& set, SamplerId id) {
if (!id || id == CORRUPT_ID || id == NULL_SAMPLER_ID) {
return;
}
set.insert(id);
};
std::unordered_set<SamplerId> active;
active.reserve(channel_state->graphics_sampler_ids.size() +
channel_state->compute_sampler_ids.size());
for (const SamplerId id : channel_state->graphics_sampler_ids) {
mark_active(active, id);
}
for (const SamplerId id : channel_state->compute_sampler_ids) {
mark_active(active, id);
}
size_t removed = 0;
auto& sampler_map = channel_state->samplers;
for (auto it = sampler_map.begin(); it != sampler_map.end();) {
const SamplerId sampler_id = it->second;
if (!sampler_id || sampler_id == CORRUPT_ID) {
it = sampler_map.erase(it);
continue;
}
if (active.find(sampler_id) != active.end()) {
++it;
continue;
}
slot_samplers.erase(sampler_id);
it = sampler_map.erase(it);
++removed;
if (slot_samplers.size() + SAMPLER_GC_SLACK <= budget) {
break;
}
}
if (removed != 0) {
LOG_WARNING(HW_GPU,
"Sampler cache exceeded {} entries on this driver; reclaimed {} inactive samplers",
budget, removed);
}
}
template <class P> template <class P>
ImageViewId TextureCache<P>::FindColorBuffer(size_t index) { ImageViewId TextureCache<P>::FindColorBuffer(size_t index) {
const auto& regs = maxwell3d->regs; const auto& regs = maxwell3d->regs;

4
src/video_core/texture_cache/texture_cache_base.h

@ -429,6 +429,9 @@ private:
void QueueAsyncDecode(Image& image, ImageId image_id); void QueueAsyncDecode(Image& image, ImageId image_id);
void TickAsyncDecode(); void TickAsyncDecode();
void EnforceSamplerBudget();
void TrimInactiveSamplers(size_t budget);
std::optional<size_t> QuerySamplerBudget() const;
Runtime& runtime; Runtime& runtime;
@ -500,6 +503,7 @@ private:
u64 modification_tick = 0; u64 modification_tick = 0;
u64 frame_tick = 0; u64 frame_tick = 0;
u64 last_sampler_gc_frame = (std::numeric_limits<u64>::max)();
Common::ThreadWorker texture_decode_worker{1, "TextureDecoder"}; Common::ThreadWorker texture_decode_worker{1, "TextureDecoder"};
std::vector<std::unique_ptr<AsyncDecodeContext>> async_decodes; std::vector<std::unique_ptr<AsyncDecodeContext>> async_decodes;

19
src/video_core/vulkan_common/vulkan_device.cpp

@ -605,6 +605,18 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
if (version < VK_MAKE_API_VERSION(0, 255, 615, 512)) { if (version < VK_MAKE_API_VERSION(0, 255, 615, 512)) {
has_broken_parallel_compiling = true; has_broken_parallel_compiling = true;
} }
const size_t sampler_limit = properties.properties.limits.maxSamplerAllocationCount;
if (sampler_limit > 0) {
constexpr size_t MIN_SAMPLER_BUDGET = 1024U;
const size_t reserved = sampler_limit / 4U;
const size_t derived_budget =
(std::max)(MIN_SAMPLER_BUDGET, sampler_limit - reserved);
sampler_heap_budget = derived_budget;
LOG_WARNING(Render_Vulkan,
"Qualcomm driver reports max {} samplers; reserving {} (25%) and "
"allowing Eden to use {} (75%) to avoid heap exhaustion",
sampler_limit, reserved, sampler_heap_budget);
}
} }
if (extensions.sampler_filter_minmax && is_amd) { if (extensions.sampler_filter_minmax && is_amd) {
@ -1529,6 +1541,13 @@ void Device::SetupFamilies(VkSurfaceKHR surface) {
} }
} }
std::optional<size_t> Device::GetSamplerHeapBudget() const {
if (sampler_heap_budget == 0) {
return std::nullopt;
}
return sampler_heap_budget;
}
u64 Device::GetDeviceMemoryUsage() const { u64 Device::GetDeviceMemoryUsage() const {
VkPhysicalDeviceMemoryBudgetPropertiesEXT budget; VkPhysicalDeviceMemoryBudgetPropertiesEXT budget;
budget.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT; budget.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT;

14
src/video_core/vulkan_common/vulkan_device.h

@ -6,6 +6,7 @@
#pragma once #pragma once
#include <optional>
#include <set> #include <set>
#include <span> #include <span>
#include <string> #include <string>
@ -326,6 +327,16 @@ public:
return properties.properties.limits.maxComputeSharedMemorySize; return properties.properties.limits.maxComputeSharedMemorySize;
} }
/// Returns the maximum number of dynamic storage buffer descriptors per set.
u32 GetMaxDescriptorSetStorageBuffersDynamic() const {
return properties.properties.limits.maxDescriptorSetStorageBuffersDynamic;
}
/// Returns the maximum number of dynamic uniform buffer descriptors per set.
u32 GetMaxDescriptorSetUniformBuffersDynamic() const {
return properties.properties.limits.maxDescriptorSetUniformBuffersDynamic;
}
/// Returns float control properties of the device. /// Returns float control properties of the device.
const VkPhysicalDeviceFloatControlsPropertiesKHR& FloatControlProperties() const { const VkPhysicalDeviceFloatControlsPropertiesKHR& FloatControlProperties() const {
return properties.float_controls; return properties.float_controls;
@ -744,6 +755,8 @@ public:
return has_broken_parallel_compiling; return has_broken_parallel_compiling;
} }
std::optional<size_t> GetSamplerHeapBudget() const;
/// Returns the vendor name reported from Vulkan. /// Returns the vendor name reported from Vulkan.
std::string_view GetVendorName() const { std::string_view GetVendorName() const {
return properties.driver.driverName; return properties.driver.driverName;
@ -1040,6 +1053,7 @@ private:
bool dynamic_state3_alpha_to_coverage{}; bool dynamic_state3_alpha_to_coverage{};
bool dynamic_state3_alpha_to_one{}; bool dynamic_state3_alpha_to_one{};
bool supports_conditional_barriers{}; ///< Allows barriers in conditional control flow. bool supports_conditional_barriers{}; ///< Allows barriers in conditional control flow.
size_t sampler_heap_budget{}; ///< Sampler budget for buggy drivers (0 = unlimited).
u64 device_access_memory{}; ///< Total size of device local memory in bytes. u64 device_access_memory{}; ///< Total size of device local memory in bytes.
u32 sets_per_pool{}; ///< Sets per Description Pool u32 sets_per_pool{}; ///< Sets per Description Pool
NvidiaArchitecture nvidia_arch{NvidiaArchitecture::Arch_AmpereOrNewer}; NvidiaArchitecture nvidia_arch{NvidiaArchitecture::Arch_AmpereOrNewer};

Loading…
Cancel
Save