Browse Source

Fixed complier issues

Added adaptive batching for the memory copy
Fixed issue where the system was reading from memory and then unsiwzzling the texture when the workflow should have been to copy from memory until it finishes and then unswizzle
pull/3737/head
Forrest Mark X 2 weeks ago
committed by crueter
parent
commit
a32b97c0ed
  1. 49
      src/video_core/texture_cache/texture_cache.h
  2. 3
      src/video_core/texture_cache/texture_cache_base.h

49
src/video_core/texture_cache/texture_cache.h

@ -1419,17 +1419,40 @@ u32 TextureCache<P>::GetAdaptiveBatchSize(const PendingUnswizzle& task, size_t q
multiplier = 8;
}
const u32 dynamic_slices = base_slices * multiplier;
return std::min(dynamic_slices, static_cast<u32>(texture_slices));
return (std::min)(dynamic_slices, static_cast<u32>(texture_slices));
}
if (queue_size > MODERATE_BACKLOG) {
const u32 dynamic_slices = base_slices * 2;
return std::min(dynamic_slices, static_cast<u32>(texture_slices));
return (std::min)(dynamic_slices, static_cast<u32>(texture_slices));
}
return base_slices;
}
template <class P>
u32 TextureCache<P>::GetAdaptiveChunkSize(const PendingUnswizzle& task, size_t queue_size) const {
const u32 base_chunk = swizzle_chunk_size;
if (base_chunk == 0)
return 0;
constexpr size_t LARGE_BACKLOG = 4;
constexpr size_t MODERATE_BACKLOG = 2;
constexpr size_t LARGE_TEXTURE_BYTES = 64_MiB;
constexpr size_t HUGE_TEXTURE_BYTES = 256_MiB;
if (queue_size > LARGE_BACKLOG) {
u32 multiplier = 4;
if (task.total_size < HUGE_TEXTURE_BYTES)
multiplier = 8;
return (std::min)(base_chunk * multiplier, static_cast<u32>(task.total_size));
}
if (queue_size > MODERATE_BACKLOG) {
return (std::min)(base_chunk * 2, static_cast<u32>(task.total_size));
}
return base_chunk;
}
template <class P>
void TextureCache<P>::TickAsyncUnswizzle() {
if (unswizzle_queue.empty()) {
@ -1465,9 +1488,12 @@ void TextureCache<P>::TickAsyncUnswizzle() {
const size_t remaining = task.total_size - task.current_offset;
size_t copy_amount = 0;
if( swizzle_chunk_size == 0 )
if (swizzle_chunk_size == 0) {
copy_amount = remaining;
else copy_amount = (std::min)(swizzle_chunk_size, remaining);
} else {
const u32 dynamic_chunk = GetAdaptiveChunkSize(task, unswizzle_queue.size());
copy_amount = std::min<size_t>(dynamic_chunk, remaining);
}
if (remaining > swizzle_chunk_size) {
copy_amount = (copy_amount / task.bytes_per_slice) * task.bytes_per_slice;
@ -1480,6 +1506,10 @@ void TextureCache<P>::TickAsyncUnswizzle() {
task.current_offset += copy_amount;
}
if (task.current_offset < task.total_size) {
return;
}
const bool is_final_batch = task.current_offset >= task.total_size;
const size_t bytes_ready = task.current_offset - task.last_submitted_offset;
const u32 complete_slices = static_cast<u32>(bytes_ready / task.bytes_per_slice);
@ -1496,12 +1526,13 @@ void TextureCache<P>::TickAsyncUnswizzle() {
const u32 slices_to_process = (std::min)(complete_slices, adaptive_batch);
if (whole_texture) {
const auto uploads = FullUploadSwizzles(task.info);
runtime.AccelerateImageUpload(image, task.staging_buffer, FixSmallVectorADL(uploads),
z_start, slices_to_process);
task.last_submitted_offset = task.total_size;
runtime.AccelerateImageUpload(image, task.staging_buffer,
FixSmallVectorADL(FullUploadSwizzles(task.info)), 0,
image.info.size.depth);
task.last_submitted_offset +=
(static_cast<size_t>(image.info.size.depth) * task.bytes_per_slice);
} else if (complete_slices >= slices_to_process || (is_final_batch && complete_slices > 0)) {
const u32 z_count = std::min(slices_to_process, task.info.size.depth - z_start);
const u32 z_count = (std::min)(slices_to_process, task.info.size.depth - z_start);
if (z_count > 0) {
const auto uploads = FullUploadSwizzles(task.info);
runtime.AccelerateImageUpload(image, task.staging_buffer, FixSmallVectorADL(uploads),

3
src/video_core/texture_cache/texture_cache_base.h

@ -419,6 +419,9 @@ private:
void QueueAsyncDecode(Image& image, ImageId image_id);
void TickAsyncDecode();
u32 GetAdaptiveBatchSize(const PendingUnswizzle& task, size_t queue_size) const;
u32 GetAdaptiveChunkSize(const PendingUnswizzle &task, size_t queue_size) const;
void EnforceSamplerBudget();
void TrimInactiveSamplers(size_t budget);
std::optional<size_t> QuerySamplerBudget() const;

Loading…
Cancel
Save