Browse Source

my very last idea

pull/2723/head
Ribbit 5 months ago
parent
commit
8543f346b3
  1. 70
      src/video_core/buffer_cache/buffer_cache.h
  2. 2
      src/video_core/buffer_cache/buffer_cache_base.h
  3. 10
      src/video_core/renderer_vulkan/vk_buffer_cache.h

70
src/video_core/buffer_cache/buffer_cache.h

@ -7,6 +7,7 @@
#pragma once #pragma once
#include <algorithm> #include <algorithm>
#include <array>
#include <memory> #include <memory>
#include <numeric> #include <numeric>
@ -790,15 +791,20 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
const Binding& binding = channel_state->uniform_buffers[stage][index]; const Binding& binding = channel_state->uniform_buffers[stage][index];
const DAddr device_addr = binding.device_addr; const DAddr device_addr = binding.device_addr;
const u32 size = (std::min)(binding.size, (*channel_state->uniform_buffer_sizes)[stage][index]); const u32 size = (std::min)(binding.size, (*channel_state->uniform_buffer_sizes)[stage][index]);
const bool force_old_ubo = ForceOldUBOMethod();
u32 size_for_bind = size; u32 size_for_bind = size;
u32 max_range = 0;
if constexpr (!IS_OPENGL) { if constexpr (!IS_OPENGL) {
if constexpr (requires(const Runtime& r) { r.GetMaxUniformBufferRange(); }) { if constexpr (requires(const Runtime& r) { r.GetMaxUniformBufferRange(); }) {
size_for_bind = (std::min)(size_for_bind, runtime.GetMaxUniformBufferRange());
max_range = runtime.GetMaxUniformBufferRange();
if (max_range != 0) {
size_for_bind = (std::min)(size_for_bind, max_range);
}
} }
} }
Buffer& buffer = slot_buffers[binding.buffer_id]; Buffer& buffer = slot_buffers[binding.buffer_id];
TouchBuffer(buffer, binding.buffer_id); TouchBuffer(buffer, binding.buffer_id);
const bool use_fast_buffer = binding.buffer_id != NULL_BUFFER_ID &&
const bool use_fast_buffer = !force_old_ubo && binding.buffer_id != NULL_BUFFER_ID &&
size <= channel_state->uniform_buffer_skip_cache_size && size <= channel_state->uniform_buffer_skip_cache_size &&
!memory_tracker.IsRegionGpuModified(device_addr, size); !memory_tracker.IsRegionGpuModified(device_addr, size);
if (use_fast_buffer) { if (use_fast_buffer) {
@ -811,8 +817,8 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
if (should_fast_bind) { if (should_fast_bind) {
// We only have to bind when the currently bound buffer is not the fast version // We only have to bind when the currently bound buffer is not the fast version
channel_state->fast_bound_uniform_buffers[stage] |= 1u << binding_index; channel_state->fast_bound_uniform_buffers[stage] |= 1u << binding_index;
channel_state->uniform_buffer_binding_sizes[stage][binding_index] = size;
runtime.BindFastUniformBuffer(stage, binding_index, size);
channel_state->uniform_buffer_binding_sizes[stage][binding_index] = size_for_bind;
runtime.BindFastUniformBuffer(stage, binding_index, size_for_bind);
} }
const auto span = ImmediateBufferWithData(device_addr, size); const auto span = ImmediateBufferWithData(device_addr, size);
runtime.PushFastUniformBuffer(stage, binding_index, span); runtime.PushFastUniformBuffer(stage, binding_index, span);
@ -840,21 +846,22 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
return; return;
} }
const u32 offset = buffer.Offset(device_addr); const u32 offset = buffer.Offset(device_addr);
u32 ubo_align = 0;
if constexpr (!IS_OPENGL) { if constexpr (!IS_OPENGL) {
// Vulkan requires aligned uniform buffer offsets. If unaligned, stream into the
// aligned uniform ring as a correctness fallback (stock Qualcomm)
if constexpr (requires(const Runtime& r) { r.GetUniformBufferAlignment(); }) { if constexpr (requires(const Runtime& r) { r.GetUniformBufferAlignment(); }) {
const u32 ubo_align = runtime.GetUniformBufferAlignment();
if (ubo_align != 0 && (offset % ubo_align) != 0) {
if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) {
channel_state->uniform_buffer_binding_sizes[stage][binding_index] = size_for_bind;
}
const std::span<u8> span = runtime.BindMappedUniformBuffer(stage, binding_index, size_for_bind);
device_memory.ReadBlockUnsafe(device_addr, span.data(), size_for_bind);
return;
ubo_align = runtime.GetUniformBufferAlignment();
}
if (!force_old_ubo && ubo_align != 0 && (offset % ubo_align) != 0) {
if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) {
channel_state->uniform_buffer_binding_sizes[stage][binding_index] = size_for_bind;
} }
const std::span<u8> span =
runtime.BindMappedUniformBuffer(stage, binding_index, size_for_bind);
device_memory.ReadBlockUnsafe(device_addr, span.data(), size_for_bind);
return;
} }
} else {
}
if constexpr (IS_OPENGL) {
// Mark the index as dirty if offset doesn't match // Mark the index as dirty if offset doesn't match
const bool is_copy_bind = offset != 0 && !runtime.SupportsNonZeroUniformOffset(); const bool is_copy_bind = offset != 0 && !runtime.SupportsNonZeroUniformOffset();
channel_state->dirty_uniform_buffers[stage] |= (is_copy_bind ? 1U : 0U) << index; channel_state->dirty_uniform_buffers[stage] |= (is_copy_bind ? 1U : 0U) << index;
@ -970,26 +977,33 @@ void BufferCache<P>::BindHostComputeUniformBuffers() {
TouchBuffer(buffer, binding.buffer_id); TouchBuffer(buffer, binding.buffer_id);
const u32 size = const u32 size =
(std::min)(binding.size, (*channel_state->compute_uniform_buffer_sizes)[index]); (std::min)(binding.size, (*channel_state->compute_uniform_buffer_sizes)[index]);
const bool force_old_ubo = ForceOldUBOMethod();
u32 size_for_bind = size; u32 size_for_bind = size;
u32 max_range = 0;
if constexpr (!IS_OPENGL) { if constexpr (!IS_OPENGL) {
if constexpr (requires(const Runtime& r) { r.GetMaxUniformBufferRange(); }) { if constexpr (requires(const Runtime& r) { r.GetMaxUniformBufferRange(); }) {
size_for_bind = (std::min)(size_for_bind, runtime.GetMaxUniformBufferRange());
max_range = runtime.GetMaxUniformBufferRange();
if (max_range != 0) {
size_for_bind = (std::min)(size_for_bind, max_range);
}
} }
} }
SynchronizeBuffer(buffer, binding.device_addr, size); SynchronizeBuffer(buffer, binding.device_addr, size);
const u32 offset = buffer.Offset(binding.device_addr); const u32 offset = buffer.Offset(binding.device_addr);
u32 ubo_align = 0;
if constexpr (!IS_OPENGL) { if constexpr (!IS_OPENGL) {
if constexpr (requires(const Runtime& r) { r.GetUniformBufferAlignment(); }) { if constexpr (requires(const Runtime& r) { r.GetUniformBufferAlignment(); }) {
const u32 ubo_align = runtime.GetUniformBufferAlignment();
if (ubo_align != 0 && (offset % ubo_align) != 0) {
const std::span<u8> span = runtime.BindMappedUniformBuffer(0, binding_index, size_for_bind);
device_memory.ReadBlockUnsafe(binding.device_addr, span.data(), size_for_bind);
if constexpr (NEEDS_BIND_UNIFORM_INDEX) {
++binding_index;
}
return;
ubo_align = runtime.GetUniformBufferAlignment();
}
if (!force_old_ubo && ubo_align != 0 && (offset % ubo_align) != 0) {
const std::span<u8> span =
runtime.BindMappedUniformBuffer(0, binding_index, size_for_bind);
device_memory.ReadBlockUnsafe(binding.device_addr, span.data(), size_for_bind);
if constexpr (NEEDS_BIND_UNIFORM_INDEX) {
++binding_index;
} }
return;
} }
} }
buffer.MarkUsage(offset, size_for_bind); buffer.MarkUsage(offset, size_for_bind);
@ -1820,6 +1834,14 @@ std::span<u8> BufferCache<P>::ImmediateBuffer(size_t wanted_capacity) {
return std::span<u8>(immediate_buffer_alloc.data(), wanted_capacity); return std::span<u8>(immediate_buffer_alloc.data(), wanted_capacity);
} }
template <class P>
bool BufferCache<P>::ForceOldUBOMethod() const noexcept {
if constexpr (requires(const Runtime& r) { r.ForceOldUniformBufferMethod(); }) {
return runtime.ForceOldUniformBufferMethod();
}
return false;
}
template <class P> template <class P>
bool BufferCache<P>::HasFastUniformBufferBound(size_t stage, u32 binding_index) const noexcept { bool BufferCache<P>::HasFastUniformBufferBound(size_t stage, u32 binding_index) const noexcept {
return ((channel_state->fast_bound_uniform_buffers[stage] >> binding_index) & 1u) != 0; return ((channel_state->fast_bound_uniform_buffers[stage] >> binding_index) & 1u) != 0;

2
src/video_core/buffer_cache/buffer_cache_base.h

@ -447,6 +447,8 @@ private:
[[nodiscard]] std::span<u8> ImmediateBuffer(size_t wanted_capacity); [[nodiscard]] std::span<u8> ImmediateBuffer(size_t wanted_capacity);
[[nodiscard]] bool ForceOldUBOMethod() const noexcept;
[[nodiscard]] bool HasFastUniformBufferBound(size_t stage, u32 binding_index) const noexcept; [[nodiscard]] bool HasFastUniformBufferBound(size_t stage, u32 binding_index) const noexcept;
void ClearDownload(DAddr base_addr, u64 size); void ClearDownload(DAddr base_addr, u64 size);

10
src/video_core/renderer_vulkan/vk_buffer_cache.h

@ -6,6 +6,7 @@
#pragma once #pragma once
#include "common/string_util.h"
#include "video_core/buffer_cache/buffer_cache_base.h" #include "video_core/buffer_cache/buffer_cache_base.h"
#include "video_core/buffer_cache/memory_tracker_base.h" #include "video_core/buffer_cache/memory_tracker_base.h"
#include "video_core/buffer_cache/usage_tracker.h" #include "video_core/buffer_cache/usage_tracker.h"
@ -94,6 +95,15 @@ public:
bool CanReportMemoryUsage() const; bool CanReportMemoryUsage() const;
[[nodiscard]] bool ForceOldUniformBufferMethod() const noexcept {
const VkDriverIdKHR driver_id = device.GetDriverID();
if (driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY) {
return true;
}
const std::string driver_name = Common::ToLower(std::string{device.GetDriverName()});
return driver_name.find("qualcomm") != std::string::npos;
}
u32 GetUniformBufferAlignment() const { u32 GetUniformBufferAlignment() const {
return static_cast<u32>(device.GetUniformBufferAlignment()); return static_cast<u32>(device.GetUniformBufferAlignment());
} }

Loading…
Cancel
Save