Browse Source

Added dynamic batch sizing to prevent backlogging (Untested)

pull/3737/head
Forrest Mark X 2 weeks ago
committed by crueter
parent
commit
f064eeffeb
  1. 54
      src/video_core/texture_cache/texture_cache.h
  2. 2
      src/video_core/texture_cache/texture_cache_base.h

54
src/video_core/texture_cache/texture_cache.h

@ -1397,6 +1397,39 @@ void TextureCache<P>::TickAsyncDecode() {
} }
} }
template <class P>
u32 TextureCache<P>::GetAdaptiveBatchSize(const PendingUnswizzle& task, size_t queue_size) const {
const u32 base_slices = swizzle_slices_per_batch;
const size_t texture_slices = task.info.size.depth;
const size_t texture_bytes = task.total_size;
constexpr size_t LARGE_BACKLOG = 4;
constexpr size_t MODERATE_BACKLOG = 2;
constexpr size_t LARGE_TEXTURE_BYTES = 64_MiB;
constexpr size_t HUGE_TEXTURE_BYTES = 256_MiB;
const bool aggressive = queue_size > LARGE_BACKLOG;
if (aggressive && texture_bytes < LARGE_TEXTURE_BYTES) {
return 0xFFFFFFFF;
}
if (queue_size > LARGE_BACKLOG) {
u32 multiplier = 4;
if (texture_bytes < HUGE_TEXTURE_BYTES) {
multiplier = 8;
}
const u32 dynamic_slices = base_slices * multiplier;
return std::min(dynamic_slices, static_cast<u32>(texture_slices));
}
if (queue_size > MODERATE_BACKLOG) {
const u32 dynamic_slices = base_slices * 2;
return std::min(dynamic_slices, static_cast<u32>(texture_slices));
}
return base_slices;
}
template <class P> template <class P>
void TextureCache<P>::TickAsyncUnswizzle() { void TextureCache<P>::TickAsyncUnswizzle() {
if (unswizzle_queue.empty()) { if (unswizzle_queue.empty()) {
@ -1455,15 +1488,26 @@ void TextureCache<P>::TickAsyncUnswizzle() {
runtime.AccelerateImageUpload(image, task.staging_buffer, FixSmallVectorADL(FullUploadSwizzles(task.info)), 0, image.info.size.depth); runtime.AccelerateImageUpload(image, task.staging_buffer, FixSmallVectorADL(FullUploadSwizzles(task.info)), 0, image.info.size.depth);
task.last_submitted_offset += (static_cast<size_t>(image.info.size.depth) * task.bytes_per_slice); task.last_submitted_offset += (static_cast<size_t>(image.info.size.depth) * task.bytes_per_slice);
} }
else if (complete_slices >= swizzle_slices_per_batch || (is_final_batch && complete_slices > 0)) {
else {
const u32 adaptive_batch = GetAdaptiveBatchSize(task, unswizzle_queue.size());
const bool whole_texture = adaptive_batch == 0xFFFFFFFF;
const u32 z_start = static_cast<u32>(task.last_submitted_offset / task.bytes_per_slice); const u32 z_start = static_cast<u32>(task.last_submitted_offset / task.bytes_per_slice);
const u32 slices_to_process = (std::min)(complete_slices, swizzle_slices_per_batch); const u32 slices_to_process = (std::min)(complete_slices, swizzle_slices_per_batch);
const u32 z_count = (std::min)(slices_to_process, image.info.size.depth - z_start);
if (z_count > 0) {
if (whole_texture) {
const auto uploads = FullUploadSwizzles(task.info); const auto uploads = FullUploadSwizzles(task.info);
runtime.AccelerateImageUpload(image, task.staging_buffer, FixSmallVectorADL(uploads), z_start, z_count);
task.last_submitted_offset += (static_cast<size_t>(z_count) * task.bytes_per_slice);
runtime.AccelerateImageUpload(image, task.staging_buffer, FixSmallVectorADL(uploads),
z_start, slices_to_process);
task.last_submitted_offset = task.total_size;
} else if (complete_slices >= slices_to_process || (is_final_batch && complete_slices > 0)) {
const u32 z_count = std::min(slices_to_process, task.info.size.depth - z_start);
if (z_count > 0) {
const auto uploads = FullUploadSwizzles(task.info);
runtime.AccelerateImageUpload(image, task.staging_buffer, FixSmallVectorADL(uploads),
z_start, z_count);
task.last_submitted_offset += (static_cast<size_t>(z_count) * task.bytes_per_slice);
}
} }
} }

2
src/video_core/texture_cache/texture_cache_base.h

@ -418,6 +418,7 @@ private:
void QueueAsyncDecode(Image& image, ImageId image_id); void QueueAsyncDecode(Image& image, ImageId image_id);
void TickAsyncDecode(); void TickAsyncDecode();
u32 GetAdaptiveBatchSize(const PendingUnswizzle& task, size_t queue_size) const;
void EnforceSamplerBudget(); void EnforceSamplerBudget();
void TrimInactiveSamplers(size_t budget); void TrimInactiveSamplers(size_t budget);
std::optional<size_t> QuerySamplerBudget() const; std::optional<size_t> QuerySamplerBudget() const;
@ -513,7 +514,6 @@ private:
std::vector<std::unique_ptr<AsyncDecodeContext>> async_decodes; std::vector<std::unique_ptr<AsyncDecodeContext>> async_decodes;
std::deque<PendingUnswizzle> unswizzle_queue; std::deque<PendingUnswizzle> unswizzle_queue;
u8 current_unswizzle_frame;
// Join caching // Join caching
boost::container::small_vector<ImageId, 4> join_overlap_ids; boost::container::small_vector<ImageId, 4> join_overlap_ids;

Loading…
Cancel
Save