From 8543f346b35b7e53794afb0e5581870c519c049b Mon Sep 17 00:00:00 2001 From: Ribbit Date: Sat, 11 Oct 2025 22:12:03 -0700 Subject: [PATCH] my very last idea --- src/video_core/buffer_cache/buffer_cache.h | 70 ++++++++++++------- .../buffer_cache/buffer_cache_base.h | 2 + .../renderer_vulkan/vk_buffer_cache.h | 10 +++ 3 files changed, 58 insertions(+), 24 deletions(-) diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index b5949c8b61..f31ee7ac9e 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -7,6 +7,7 @@ #pragma once #include +#include #include #include @@ -790,15 +791,20 @@ void BufferCache

::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 const Binding& binding = channel_state->uniform_buffers[stage][index]; const DAddr device_addr = binding.device_addr; const u32 size = (std::min)(binding.size, (*channel_state->uniform_buffer_sizes)[stage][index]); + const bool force_old_ubo = ForceOldUBOMethod(); u32 size_for_bind = size; + u32 max_range = 0; if constexpr (!IS_OPENGL) { if constexpr (requires(const Runtime& r) { r.GetMaxUniformBufferRange(); }) { - size_for_bind = (std::min)(size_for_bind, runtime.GetMaxUniformBufferRange()); + max_range = runtime.GetMaxUniformBufferRange(); + if (max_range != 0) { + size_for_bind = (std::min)(size_for_bind, max_range); + } } } Buffer& buffer = slot_buffers[binding.buffer_id]; TouchBuffer(buffer, binding.buffer_id); - const bool use_fast_buffer = binding.buffer_id != NULL_BUFFER_ID && + const bool use_fast_buffer = !force_old_ubo && binding.buffer_id != NULL_BUFFER_ID && size <= channel_state->uniform_buffer_skip_cache_size && !memory_tracker.IsRegionGpuModified(device_addr, size); if (use_fast_buffer) { @@ -811,8 +817,8 @@ void BufferCache

::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 if (should_fast_bind) { // We only have to bind when the currently bound buffer is not the fast version channel_state->fast_bound_uniform_buffers[stage] |= 1u << binding_index; - channel_state->uniform_buffer_binding_sizes[stage][binding_index] = size; - runtime.BindFastUniformBuffer(stage, binding_index, size); + channel_state->uniform_buffer_binding_sizes[stage][binding_index] = size_for_bind; + runtime.BindFastUniformBuffer(stage, binding_index, size_for_bind); } const auto span = ImmediateBufferWithData(device_addr, size); runtime.PushFastUniformBuffer(stage, binding_index, span); @@ -840,21 +846,22 @@ void BufferCache

::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 return; } const u32 offset = buffer.Offset(device_addr); + u32 ubo_align = 0; if constexpr (!IS_OPENGL) { - // Vulkan requires aligned uniform buffer offsets. If unaligned, stream into the - // aligned uniform ring as a correctness fallback (stock Qualcomm) if constexpr (requires(const Runtime& r) { r.GetUniformBufferAlignment(); }) { - const u32 ubo_align = runtime.GetUniformBufferAlignment(); - if (ubo_align != 0 && (offset % ubo_align) != 0) { - if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { - channel_state->uniform_buffer_binding_sizes[stage][binding_index] = size_for_bind; - } - const std::span span = runtime.BindMappedUniformBuffer(stage, binding_index, size_for_bind); - device_memory.ReadBlockUnsafe(device_addr, span.data(), size_for_bind); - return; + ubo_align = runtime.GetUniformBufferAlignment(); + } + if (!force_old_ubo && ubo_align != 0 && (offset % ubo_align) != 0) { + if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { + channel_state->uniform_buffer_binding_sizes[stage][binding_index] = size_for_bind; } + const std::span span = + runtime.BindMappedUniformBuffer(stage, binding_index, size_for_bind); + device_memory.ReadBlockUnsafe(device_addr, span.data(), size_for_bind); + return; } - } else { + } + if constexpr (IS_OPENGL) { // Mark the index as dirty if offset doesn't match const bool is_copy_bind = offset != 0 && !runtime.SupportsNonZeroUniformOffset(); channel_state->dirty_uniform_buffers[stage] |= (is_copy_bind ? 1U : 0U) << index; @@ -970,26 +977,33 @@ void BufferCache

::BindHostComputeUniformBuffers() { TouchBuffer(buffer, binding.buffer_id); const u32 size = (std::min)(binding.size, (*channel_state->compute_uniform_buffer_sizes)[index]); + const bool force_old_ubo = ForceOldUBOMethod(); u32 size_for_bind = size; + u32 max_range = 0; if constexpr (!IS_OPENGL) { if constexpr (requires(const Runtime& r) { r.GetMaxUniformBufferRange(); }) { - size_for_bind = (std::min)(size_for_bind, runtime.GetMaxUniformBufferRange()); + max_range = runtime.GetMaxUniformBufferRange(); + if (max_range != 0) { + size_for_bind = (std::min)(size_for_bind, max_range); + } } } SynchronizeBuffer(buffer, binding.device_addr, size); const u32 offset = buffer.Offset(binding.device_addr); + u32 ubo_align = 0; if constexpr (!IS_OPENGL) { if constexpr (requires(const Runtime& r) { r.GetUniformBufferAlignment(); }) { - const u32 ubo_align = runtime.GetUniformBufferAlignment(); - if (ubo_align != 0 && (offset % ubo_align) != 0) { - const std::span span = runtime.BindMappedUniformBuffer(0, binding_index, size_for_bind); - device_memory.ReadBlockUnsafe(binding.device_addr, span.data(), size_for_bind); - if constexpr (NEEDS_BIND_UNIFORM_INDEX) { - ++binding_index; - } - return; + ubo_align = runtime.GetUniformBufferAlignment(); + } + if (!force_old_ubo && ubo_align != 0 && (offset % ubo_align) != 0) { + const std::span span = + runtime.BindMappedUniformBuffer(0, binding_index, size_for_bind); + device_memory.ReadBlockUnsafe(binding.device_addr, span.data(), size_for_bind); + if constexpr (NEEDS_BIND_UNIFORM_INDEX) { + ++binding_index; } + return; } } buffer.MarkUsage(offset, size_for_bind); @@ -1820,6 +1834,14 @@ std::span BufferCache

::ImmediateBuffer(size_t wanted_capacity) { return std::span(immediate_buffer_alloc.data(), wanted_capacity); } +template +bool BufferCache

::ForceOldUBOMethod() const noexcept { + if constexpr (requires(const Runtime& r) { r.ForceOldUniformBufferMethod(); }) { + return runtime.ForceOldUniformBufferMethod(); + } + return false; +} + template bool BufferCache

::HasFastUniformBufferBound(size_t stage, u32 binding_index) const noexcept { return ((channel_state->fast_bound_uniform_buffers[stage] >> binding_index) & 1u) != 0; diff --git a/src/video_core/buffer_cache/buffer_cache_base.h b/src/video_core/buffer_cache/buffer_cache_base.h index 1b551931a4..4512bcfe26 100644 --- a/src/video_core/buffer_cache/buffer_cache_base.h +++ b/src/video_core/buffer_cache/buffer_cache_base.h @@ -447,6 +447,8 @@ private: [[nodiscard]] std::span ImmediateBuffer(size_t wanted_capacity); + [[nodiscard]] bool ForceOldUBOMethod() const noexcept; + [[nodiscard]] bool HasFastUniformBufferBound(size_t stage, u32 binding_index) const noexcept; void ClearDownload(DAddr base_addr, u64 size); diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h index ccba987df5..dd13f9b29f 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.h +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h @@ -6,6 +6,7 @@ #pragma once +#include "common/string_util.h" #include "video_core/buffer_cache/buffer_cache_base.h" #include "video_core/buffer_cache/memory_tracker_base.h" #include "video_core/buffer_cache/usage_tracker.h" @@ -94,6 +95,15 @@ public: bool CanReportMemoryUsage() const; + [[nodiscard]] bool ForceOldUniformBufferMethod() const noexcept { + const VkDriverIdKHR driver_id = device.GetDriverID(); + if (driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY) { + return true; + } + const std::string driver_name = Common::ToLower(std::string{device.GetDriverName()}); + return driver_name.find("qualcomm") != std::string::npos; + } + u32 GetUniformBufferAlignment() const { return static_cast(device.GetUniformBufferAlignment()); }