Browse Source

[vk, qcom] Samplers Budget Management

pull/3115/head
CamilleLaVey 4 weeks ago
parent
commit
d2d95cd1cc
  1. 4
      src/video_core/renderer_vulkan/vk_texture_cache.cpp
  2. 2
      src/video_core/renderer_vulkan/vk_texture_cache.h
  3. 80
      src/video_core/texture_cache/texture_cache.h
  4. 4
      src/video_core/texture_cache/texture_cache_base.h
  5. 18
      src/video_core/vulkan_common/vulkan_device.cpp
  6. 4
      src/video_core/vulkan_common/vulkan_device.h

4
src/video_core/renderer_vulkan/vk_texture_cache.cpp

@ -1516,6 +1516,10 @@ bool TextureCacheRuntime::CanReportMemoryUsage() const {
return device.CanReportMemoryUsage();
}
std::optional<size_t> TextureCacheRuntime::GetSamplerHeapBudget() const {
return device.GetSamplerHeapBudget();
}
void TextureCacheRuntime::TickFrame() {}
Image::Image(TextureCacheRuntime& runtime_, const ImageInfo& info_, GPUVAddr gpu_addr_,

2
src/video_core/renderer_vulkan/vk_texture_cache.h

@ -65,6 +65,8 @@ public:
bool CanReportMemoryUsage() const;
std::optional<size_t> GetSamplerHeapBudget() const;
void BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src,
const Region2D& dst_region, const Region2D& src_region,
Tegra::Engines::Fermi2D::Filter filter,

80
src/video_core/texture_cache/texture_cache.h

@ -6,6 +6,8 @@
#pragma once
#include <limits>
#include <optional>
#include <unordered_set>
#include <boost/container/small_vector.hpp>
@ -1736,11 +1738,89 @@ SamplerId TextureCache<P>::FindSampler(const TSCEntry& config) {
}
const auto [pair, is_new] = channel_state->samplers.try_emplace(config);
if (is_new) {
EnforceSamplerBudget();
pair->second = slot_samplers.insert(runtime, config);
}
return pair->second;
}
template <class P>
std::optional<size_t> TextureCache<P>::QuerySamplerBudget() const {
if constexpr (requires { runtime.GetSamplerHeapBudget(); }) {
return runtime.GetSamplerHeapBudget();
} else {
return std::nullopt;
}
}
template <class P>
void TextureCache<P>::EnforceSamplerBudget() {
const auto budget = QuerySamplerBudget();
if (!budget) {
return;
}
if (slot_samplers.size() < *budget) {
return;
}
if (!channel_state) {
return;
}
if (last_sampler_gc_frame == frame_tick) {
return;
}
last_sampler_gc_frame = frame_tick;
TrimInactiveSamplers(*budget);
}
template <class P>
void TextureCache<P>::TrimInactiveSamplers(size_t budget) {
if (channel_state->samplers.empty()) {
return;
}
static constexpr size_t SAMPLER_GC_SLACK = 1024;
auto mark_active = [](auto& set, SamplerId id) {
if (!id || id == CORRUPT_ID || id == NULL_SAMPLER_ID) {
return;
}
set.insert(id);
};
std::unordered_set<SamplerId> active;
active.reserve(channel_state->graphics_sampler_ids.size() +
channel_state->compute_sampler_ids.size());
for (const SamplerId id : channel_state->graphics_sampler_ids) {
mark_active(active, id);
}
for (const SamplerId id : channel_state->compute_sampler_ids) {
mark_active(active, id);
}
size_t removed = 0;
auto& sampler_map = channel_state->samplers;
for (auto it = sampler_map.begin(); it != sampler_map.end();) {
const SamplerId sampler_id = it->second;
if (!sampler_id || sampler_id == CORRUPT_ID) {
it = sampler_map.erase(it);
continue;
}
if (active.find(sampler_id) != active.end()) {
++it;
continue;
}
slot_samplers.erase(sampler_id);
it = sampler_map.erase(it);
++removed;
if (slot_samplers.size() + SAMPLER_GC_SLACK <= budget) {
break;
}
}
if (removed != 0) {
LOG_WARNING(HW_GPU,
"Sampler cache exceeded {} entries on this driver; reclaimed {} inactive samplers",
budget, removed);
}
}
template <class P>
ImageViewId TextureCache<P>::FindColorBuffer(size_t index) {
const auto& regs = maxwell3d->regs;

4
src/video_core/texture_cache/texture_cache_base.h

@ -429,6 +429,9 @@ private:
void QueueAsyncDecode(Image& image, ImageId image_id);
void TickAsyncDecode();
void EnforceSamplerBudget();
void TrimInactiveSamplers(size_t budget);
std::optional<size_t> QuerySamplerBudget() const;
Runtime& runtime;
@ -500,6 +503,7 @@ private:
u64 modification_tick = 0;
u64 frame_tick = 0;
u64 last_sampler_gc_frame = (std::numeric_limits<u64>::max)();
Common::ThreadWorker texture_decode_worker{1, "TextureDecoder"};
std::vector<std::unique_ptr<AsyncDecodeContext>> async_decodes;

18
src/video_core/vulkan_common/vulkan_device.cpp

@ -583,6 +583,17 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
version == VK_MAKE_API_VERSION(0, 512, 800, 51)) {
has_broken_parallel_compiling = true;
}
const size_t sampler_limit = properties.properties.limits.maxSamplerAllocationCount;
if (sampler_limit > 0) {
constexpr size_t MIN_SAMPLER_BUDGET = 1024U;
const size_t derived_budget =
(std::max)(MIN_SAMPLER_BUDGET, sampler_limit / 4U);
sampler_heap_budget = derived_budget;
LOG_WARNING(Render_Vulkan,
"Qualcomm driver reports max {} samplers; clamping cache to {} (25%) to "
"avoid heap exhaustion",
sampler_limit, sampler_heap_budget);
}
}
if (extensions.sampler_filter_minmax && is_amd) {
@ -1516,6 +1527,13 @@ void Device::SetupFamilies(VkSurfaceKHR surface) {
}
}
std::optional<size_t> Device::GetSamplerHeapBudget() const {
if (sampler_heap_budget == 0) {
return std::nullopt;
}
return sampler_heap_budget;
}
u64 Device::GetDeviceMemoryUsage() const {
VkPhysicalDeviceMemoryBudgetPropertiesEXT budget;
budget.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT;

4
src/video_core/vulkan_common/vulkan_device.h

@ -6,6 +6,7 @@
#pragma once
#include <optional>
#include <set>
#include <span>
#include <string>
@ -759,6 +760,8 @@ public:
return has_broken_parallel_compiling;
}
std::optional<size_t> GetSamplerHeapBudget() const;
/// Returns the vendor name reported from Vulkan.
std::string_view GetVendorName() const {
return properties.driver.driverName;
@ -1055,6 +1058,7 @@ private:
bool dynamic_state3_alpha_to_coverage{};
bool dynamic_state3_alpha_to_one{};
bool supports_conditional_barriers{}; ///< Allows barriers in conditional control flow.
size_t sampler_heap_budget{}; ///< Sampler budget for buggy drivers (0 = unlimited).
u64 device_access_memory{}; ///< Total size of device local memory in bytes.
u32 sets_per_pool{}; ///< Sets per Description Pool
NvidiaArchitecture nvidia_arch{NvidiaArchitecture::Arch_AmpereOrNewer};

Loading…
Cancel
Save