Browse Source

Remove whitespaces

pull/3246/head
Forrest Keller 4 weeks ago
committed by crueter
parent
commit
080af4f7e3
  1. 4
      src/common/settings.h
  2. 54
      src/video_core/renderer_vulkan/vk_compute_pass.cpp
  3. 4
      src/video_core/renderer_vulkan/vk_compute_pass.h
  4. 2
      src/video_core/renderer_vulkan/vk_scheduler.cpp
  5. 14
      src/video_core/renderer_vulkan/vk_texture_cache.cpp
  6. 10
      src/video_core/renderer_vulkan/vk_texture_cache.h
  7. 2
      src/video_core/texture_cache/image_base.h
  8. 102
      src/video_core/texture_cache/texture_cache.h
  9. 4
      src/video_core/texture_cache/texture_cache_base.h
  10. 2
      src/video_core/texture_cache/util.h

4
src/common/settings.h

@ -512,13 +512,13 @@ struct Values {
SwitchableSetting<bool> use_asynchronous_shaders{linkage, false, "use_asynchronous_shaders", SwitchableSetting<bool> use_asynchronous_shaders{linkage, false, "use_asynchronous_shaders",
Category::RendererHacks}; Category::RendererHacks};
SwitchableSetting<GpuUnswizzle> gpu_unzwizzle_stream_size{linkage, SwitchableSetting<GpuUnswizzle> gpu_unzwizzle_stream_size{linkage,
GpuUnswizzle::Medium, GpuUnswizzle::Medium,
"gpu_unzwizzle_stream_size", "gpu_unzwizzle_stream_size",
Category::RendererHacks, Category::RendererHacks,
Specialization::Default}; Specialization::Default};
SwitchableSetting<GpuUnswizzleChunk> gpu_unzwizzle_chunk_size{linkage, SwitchableSetting<GpuUnswizzleChunk> gpu_unzwizzle_chunk_size{linkage,
GpuUnswizzleChunk::Medium, GpuUnswizzleChunk::Medium,
"gpu_unzwizzle_chunk_size", "gpu_unzwizzle_chunk_size",

54
src/video_core/renderer_vulkan/vk_compute_pass.cpp

@ -711,19 +711,19 @@ constexpr std::array<VkDescriptorUpdateTemplateEntry, 3>
struct alignas(16) BlockLinearUnswizzle3DPushConstants { struct alignas(16) BlockLinearUnswizzle3DPushConstants {
u32 blocks_dim[3]; // Offset 0 u32 blocks_dim[3]; // Offset 0
u32 bytes_per_block_log2; // Offset 12 u32 bytes_per_block_log2; // Offset 12
u32 origin[3]; // Offset 16 u32 origin[3]; // Offset 16
u32 slice_size; // Offset 28 u32 slice_size; // Offset 28
u32 block_size; // Offset 32 u32 block_size; // Offset 32
u32 x_shift; // Offset 36 u32 x_shift; // Offset 36
u32 block_height; // Offset 40 u32 block_height; // Offset 40
u32 block_height_mask; // Offset 44 u32 block_height_mask; // Offset 44
u32 block_depth; // Offset 48 u32 block_depth; // Offset 48
u32 block_depth_mask; // Offset 52 u32 block_depth_mask; // Offset 52
s32 _pad; // Offset 56 s32 _pad; // Offset 56
s32 destination[3]; // Offset 60 s32 destination[3]; // Offset 60
s32 _pad_end; // Offset 72 s32 _pad_end; // Offset 72
}; };
@ -755,9 +755,9 @@ void BlockLinearUnswizzle3DPass::Unswizzle(
u32 z_start, u32 z_count) u32 z_start, u32 z_count)
{ {
using namespace VideoCommon::Accelerated; using namespace VideoCommon::Accelerated;
const u32 MAX_BATCH_SLICES = std::min(z_count, image.info.size.depth); const u32 MAX_BATCH_SLICES = std::min(z_count, image.info.size.depth);
if (!image.has_compute_unswizzle_buffer) { if (!image.has_compute_unswizzle_buffer) {
// Allocate exactly what this batch needs // Allocate exactly what this batch needs
image.AllocateComputeUnswizzleBuffer(MAX_BATCH_SLICES); image.AllocateComputeUnswizzleBuffer(MAX_BATCH_SLICES);
@ -769,12 +769,12 @@ void BlockLinearUnswizzle3DPass::Unswizzle(
const u32 blocks_x = (image.info.size.width + 3) / 4; const u32 blocks_x = (image.info.size.width + 3) / 4;
const u32 blocks_y = (image.info.size.height + 3) / 4; const u32 blocks_y = (image.info.size.height + 3) / 4;
scheduler.RequestOutsideRenderPassOperationContext(); scheduler.RequestOutsideRenderPassOperationContext();
for (u32 z_offset = 0; z_offset < z_count; z_offset += MAX_BATCH_SLICES) { for (u32 z_offset = 0; z_offset < z_count; z_offset += MAX_BATCH_SLICES) {
const u32 current_chunk_slices = std::min(MAX_BATCH_SLICES, z_count - z_offset); const u32 current_chunk_slices = std::min(MAX_BATCH_SLICES, z_count - z_offset);
const u32 current_z_start = z_start + z_offset; const u32 current_z_start = z_start + z_offset;
UnswizzleChunk(image, swizzled, sw, params, blocks_x, blocks_y, UnswizzleChunk(image, swizzled, sw, params, blocks_x, blocks_y,
current_z_start, current_chunk_slices); current_z_start, current_chunk_slices);
} }
@ -811,12 +811,12 @@ void BlockLinearUnswizzle3DPass::UnswizzleChunk(
pc.blocks_dim[2] = z_count; // Only process the count pc.blocks_dim[2] = z_count; // Only process the count
compute_pass_descriptor_queue.Acquire(); compute_pass_descriptor_queue.Acquire();
compute_pass_descriptor_queue.AddBuffer(*image.runtime->swizzle_table_buffer, 0,
compute_pass_descriptor_queue.AddBuffer(*image.runtime->swizzle_table_buffer, 0,
image.runtime->swizzle_table_size); image.runtime->swizzle_table_size);
compute_pass_descriptor_queue.AddBuffer(swizzled.buffer,
sw.buffer_offset + swizzled.offset,
compute_pass_descriptor_queue.AddBuffer(swizzled.buffer,
sw.buffer_offset + swizzled.offset,
image.guest_size_bytes - sw.buffer_offset); image.guest_size_bytes - sw.buffer_offset);
compute_pass_descriptor_queue.AddBuffer(*image.compute_unswizzle_buffer, 0,
compute_pass_descriptor_queue.AddBuffer(*image.compute_unswizzle_buffer, 0,
image.compute_unswizzle_buffer_size); image.compute_unswizzle_buffer_size);
const void* descriptor_data = compute_pass_descriptor_queue.UpdateData(); const void* descriptor_data = compute_pass_descriptor_queue.UpdateData();
@ -825,12 +825,12 @@ void BlockLinearUnswizzle3DPass::UnswizzleChunk(
const u32 gx = Common::DivCeil(blocks_x, 8u); const u32 gx = Common::DivCeil(blocks_x, 8u);
const u32 gy = Common::DivCeil(blocks_y, 8u); const u32 gy = Common::DivCeil(blocks_y, 8u);
const u32 gz = Common::DivCeil(z_count, 4u); const u32 gz = Common::DivCeil(z_count, 4u);
const u32 bytes_per_block = 1u << pc.bytes_per_block_log2; const u32 bytes_per_block = 1u << pc.bytes_per_block_log2;
const VkDeviceSize output_slice_size = const VkDeviceSize output_slice_size =
static_cast<VkDeviceSize>(blocks_x) * blocks_y * bytes_per_block; static_cast<VkDeviceSize>(blocks_x) * blocks_y * bytes_per_block;
const VkDeviceSize barrier_size = output_slice_size * z_count; const VkDeviceSize barrier_size = output_slice_size * z_count;
const bool is_first_chunk = (z_start == 0); const bool is_first_chunk = (z_start == 0);
const VkBuffer out_buffer = *image.compute_unswizzle_buffer; const VkBuffer out_buffer = *image.compute_unswizzle_buffer;
@ -843,11 +843,11 @@ void BlockLinearUnswizzle3DPass::UnswizzleChunk(
barrier_size, is_first_chunk, out_buffer, dst_image, aspect, barrier_size, is_first_chunk, out_buffer, dst_image, aspect,
image_width, image_height image_width, image_height
](vk::CommandBuffer cmdbuf) { ](vk::CommandBuffer cmdbuf) {
if (dst_image == VK_NULL_HANDLE || out_buffer == VK_NULL_HANDLE) { if (dst_image == VK_NULL_HANDLE || out_buffer == VK_NULL_HANDLE) {
return; return;
} }
device.GetLogical().UpdateDescriptorSet(set, *descriptor_template, descriptor_data); device.GetLogical().UpdateDescriptorSet(set, *descriptor_template, descriptor_data);
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline); cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *layout, 0, set, {}); cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *layout, 0, set, {});
@ -866,15 +866,15 @@ void BlockLinearUnswizzle3DPass::UnswizzleChunk(
.offset = 0, .offset = 0,
.size = barrier_size, .size = barrier_size,
}; };
// Image layout transition // Image layout transition
const VkImageMemoryBarrier pre_barrier{ const VkImageMemoryBarrier pre_barrier{
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.pNext = nullptr, .pNext = nullptr,
.srcAccessMask = is_first_chunk ? VkAccessFlags{} :
.srcAccessMask = is_first_chunk ? VkAccessFlags{} :
static_cast<VkAccessFlags>(VK_ACCESS_TRANSFER_WRITE_BIT), static_cast<VkAccessFlags>(VK_ACCESS_TRANSFER_WRITE_BIT),
.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
.oldLayout = is_first_chunk ? VK_IMAGE_LAYOUT_UNDEFINED :
.oldLayout = is_first_chunk ? VK_IMAGE_LAYOUT_UNDEFINED :
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, .newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
@ -882,12 +882,12 @@ void BlockLinearUnswizzle3DPass::UnswizzleChunk(
.image = dst_image, .image = dst_image,
.subresourceRange = {aspect, 0, 1, 0, 1}, .subresourceRange = {aspect, 0, 1, 0, 1},
}; };
// Single barrier handles both buffer and image // Single barrier handles both buffer and image
cmdbuf.PipelineBarrier( cmdbuf.PipelineBarrier(
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
VK_PIPELINE_STAGE_TRANSFER_BIT,
0,
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
VK_PIPELINE_STAGE_TRANSFER_BIT,
0,
nullptr, buffer_barrier, pre_barrier nullptr, buffer_barrier, pre_barrier
); );
@ -900,7 +900,7 @@ void BlockLinearUnswizzle3DPass::UnswizzleChunk(
.imageOffset = {0, 0, static_cast<s32>(z_start)}, // Write to correct Z .imageOffset = {0, 0, static_cast<s32>(z_start)}, // Write to correct Z
.imageExtent = {image_width, image_height, z_count}, .imageExtent = {image_width, image_height, z_count},
}; };
cmdbuf.CopyBufferToImage(out_buffer, dst_image,
cmdbuf.CopyBufferToImage(out_buffer, dst_image,
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, copy); VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, copy);
// Post-copy transition // Post-copy transition
@ -918,9 +918,9 @@ void BlockLinearUnswizzle3DPass::UnswizzleChunk(
}; };
cmdbuf.PipelineBarrier( cmdbuf.PipelineBarrier(
VK_PIPELINE_STAGE_TRANSFER_BIT,
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
0,
VK_PIPELINE_STAGE_TRANSFER_BIT,
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
0,
nullptr, nullptr, post_barrier nullptr, nullptr, post_barrier
); );
}); });

4
src/video_core/renderer_vulkan/vk_compute_pass.h

@ -144,12 +144,12 @@ public:
StagingBufferPool& staging_buffer_pool_, StagingBufferPool& staging_buffer_pool_,
ComputePassDescriptorQueue& compute_pass_descriptor_queue_); ComputePassDescriptorQueue& compute_pass_descriptor_queue_);
~BlockLinearUnswizzle3DPass(); ~BlockLinearUnswizzle3DPass();
void Unswizzle(Image& image, void Unswizzle(Image& image,
const StagingBufferRef& swizzled, const StagingBufferRef& swizzled,
std::span<const VideoCommon::SwizzleParameters> swizzles, std::span<const VideoCommon::SwizzleParameters> swizzles,
u32 z_start, u32 z_count); u32 z_start, u32 z_count);
void UnswizzleChunk( void UnswizzleChunk(
Image& image, Image& image,
const StagingBufferRef& swizzled, const StagingBufferRef& swizzled,

2
src/video_core/renderer_vulkan/vk_scheduler.cpp

@ -43,7 +43,7 @@ Scheduler::Scheduler(const Device& device_, StateTracker& state_tracker_)
: device{device_}, state_tracker{state_tracker_}, : device{device_}, state_tracker{state_tracker_},
master_semaphore{std::make_unique<MasterSemaphore>(device)}, master_semaphore{std::make_unique<MasterSemaphore>(device)},
command_pool{std::make_unique<CommandPool>(*master_semaphore, device)} { command_pool{std::make_unique<CommandPool>(*master_semaphore, device)} {
// PRE-OPTIMIZATION: Warm up the pool to prevent mid-frame spikes // PRE-OPTIMIZATION: Warm up the pool to prevent mid-frame spikes
{ {
std::scoped_lock rl{reserve_mutex}; std::scoped_lock rl{reserve_mutex};

14
src/video_core/renderer_vulkan/vk_texture_cache.cpp

@ -880,14 +880,14 @@ TextureCacheRuntime::TextureCacheRuntime(const Device& device_, Scheduler& sched
} }
} }
} }
bl3d_unswizzle_pass.emplace(device, scheduler, descriptor_pool, bl3d_unswizzle_pass.emplace(device, scheduler, descriptor_pool,
staging_buffer_pool, compute_pass_descriptor_queue); staging_buffer_pool, compute_pass_descriptor_queue);
// --- Create swizzle table buffer --- // --- Create swizzle table buffer ---
{ {
auto table = Tegra::Texture::MakeSwizzleTable(); auto table = Tegra::Texture::MakeSwizzleTable();
swizzle_table_size = static_cast<VkDeviceSize>(table.size() * sizeof(table[0])); swizzle_table_size = static_cast<VkDeviceSize>(table.size() * sizeof(table[0]));
auto staging = staging_buffer_pool.Request(swizzle_table_size, MemoryUsage::Upload); auto staging = staging_buffer_pool.Request(swizzle_table_size, MemoryUsage::Upload);
@ -896,19 +896,19 @@ TextureCacheRuntime::TextureCacheRuntime(const Device& device_, Scheduler& sched
VkBufferCreateInfo ci{ VkBufferCreateInfo ci{
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.size = swizzle_table_size, .size = swizzle_table_size,
.usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
VK_BUFFER_USAGE_TRANSFER_DST_BIT |
.usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
VK_BUFFER_USAGE_TRANSFER_DST_BIT |
VK_BUFFER_USAGE_TRANSFER_SRC_BIT, VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
.sharingMode = VK_SHARING_MODE_EXCLUSIVE, .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
}; };
swizzle_table_buffer = memory_allocator.CreateBuffer(ci, MemoryUsage::DeviceLocal); swizzle_table_buffer = memory_allocator.CreateBuffer(ci, MemoryUsage::DeviceLocal);
scheduler.RequestOutsideRenderPassOperationContext(); scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([staging_buf = staging.buffer,
dst_buf = *swizzle_table_buffer,
scheduler.Record([staging_buf = staging.buffer,
dst_buf = *swizzle_table_buffer,
size = swizzle_table_size, size = swizzle_table_size,
src_off = staging.offset](vk::CommandBuffer cmdbuf) { src_off = staging.offset](vk::CommandBuffer cmdbuf) {
const VkBufferCopy region{ const VkBufferCopy region{
.srcOffset = src_off, .srcOffset = src_off,
.dstOffset = 0, .dstOffset = 0,

10
src/video_core/renderer_vulkan/vk_texture_cache.h

@ -81,7 +81,7 @@ public:
void ReinterpretImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies); void ReinterpretImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies);
void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view); void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view);
bool IsSteamDeck() const; bool IsSteamDeck() const;
bool CanAccelerateImageUpload(Image&) const noexcept { bool CanAccelerateImageUpload(Image&) const noexcept {
@ -130,7 +130,7 @@ public:
BlitImageHelper& blit_image_helper; BlitImageHelper& blit_image_helper;
RenderPassCache& render_pass_cache; RenderPassCache& render_pass_cache;
std::optional<ASTCDecoderPass> astc_decoder_pass; std::optional<ASTCDecoderPass> astc_decoder_pass;
std::optional<BlockLinearUnswizzle3DPass> bl3d_unswizzle_pass; std::optional<BlockLinearUnswizzle3DPass> bl3d_unswizzle_pass;
vk::Buffer swizzle_table_buffer; vk::Buffer swizzle_table_buffer;
VkDeviceSize swizzle_table_size = 0; VkDeviceSize swizzle_table_size = 0;
@ -171,7 +171,7 @@ public:
void DownloadMemory(const StagingBufferRef& map, void DownloadMemory(const StagingBufferRef& map,
std::span<const VideoCommon::BufferImageCopy> copies); std::span<const VideoCommon::BufferImageCopy> copies);
void AllocateComputeUnswizzleImage(); void AllocateComputeUnswizzleImage();
[[nodiscard]] VkImage Handle() const noexcept { [[nodiscard]] VkImage Handle() const noexcept {
@ -200,7 +200,7 @@ public:
bool ScaleDown(bool ignore = false); bool ScaleDown(bool ignore = false);
u64 allocation_tick; u64 allocation_tick;
friend class BlockLinearUnswizzle3DPass; friend class BlockLinearUnswizzle3DPass;
private: private:
@ -213,7 +213,7 @@ private:
vk::Image original_image; vk::Image original_image;
vk::Image scaled_image; vk::Image scaled_image;
vk::Buffer compute_unswizzle_buffer; vk::Buffer compute_unswizzle_buffer;
VkDeviceSize compute_unswizzle_buffer_size = 0; VkDeviceSize compute_unswizzle_buffer_size = 0;
bool has_compute_unswizzle_buffer = false; bool has_compute_unswizzle_buffer = false;

2
src/video_core/texture_cache/image_base.h

@ -125,7 +125,7 @@ struct ImageBase {
std::vector<AliasedImage> aliased_images; std::vector<AliasedImage> aliased_images;
std::vector<ImageId> overlapping_images; std::vector<ImageId> overlapping_images;
ImageMapId map_view_id{}; ImageMapId map_view_id{};
boost::container::small_vector<u64, 16> dirty_offsets; boost::container::small_vector<u64, 16> dirty_offsets;
std::unordered_map<GPUVAddr, SparseBinding> sparse_bindings; std::unordered_map<GPUVAddr, SparseBinding> sparse_bindings;
u32 sparse_tile_size = 65536; u32 sparse_tile_size = 65536;

102
src/video_core/texture_cache/texture_cache.h

@ -76,7 +76,7 @@ TextureCache<P>::TextureCache(Runtime& runtime_, Tegra::MaxwellDeviceMemoryManag
expected_memory = DEFAULT_EXPECTED_MEMORY + 512_MiB; expected_memory = DEFAULT_EXPECTED_MEMORY + 512_MiB;
critical_memory = DEFAULT_CRITICAL_MEMORY + 1_GiB; critical_memory = DEFAULT_CRITICAL_MEMORY + 1_GiB;
minimum_memory = 0; minimum_memory = 0;
lowmemorydevice = true; lowmemorydevice = true;
} }
} }
@ -94,7 +94,7 @@ void TextureCache<P>::RunGarbageCollector() {
ticks_to_destroy = aggressive_mode ? 10ULL : high_priority_mode ? 25ULL : 50ULL; ticks_to_destroy = aggressive_mode ? 10ULL : high_priority_mode ? 25ULL : 50ULL;
num_iterations = aggressive_mode ? 40 : (high_priority_mode ? 20 : 10); num_iterations = aggressive_mode ? 40 : (high_priority_mode ? 20 : 10);
}; };
const auto Cleanup = [this, &num_iterations, &high_priority_mode, const auto Cleanup = [this, &num_iterations, &high_priority_mode,
&aggressive_mode](ImageId image_id) { &aggressive_mode](ImageId image_id) {
if (num_iterations == 0) { if (num_iterations == 0) {
@ -102,34 +102,34 @@ void TextureCache<P>::RunGarbageCollector() {
} }
--num_iterations; --num_iterations;
auto& image = slot_images[image_id]; auto& image = slot_images[image_id];
// Never delete recently allocated sparse textures (within 3 frames) // Never delete recently allocated sparse textures (within 3 frames)
const bool is_recently_allocated = image.allocation_tick >= frame_tick - 3; const bool is_recently_allocated = image.allocation_tick >= frame_tick - 3;
if (is_recently_allocated && image.info.is_sparse) { if (is_recently_allocated && image.info.is_sparse) {
return false; return false;
} }
if (True(image.flags & ImageFlagBits::IsDecoding)) { if (True(image.flags & ImageFlagBits::IsDecoding)) {
// This image is still being decoded, deleting it will invalidate the slot // This image is still being decoded, deleting it will invalidate the slot
// used by the async decoder thread. // used by the async decoder thread.
return false; return false;
} }
// Prioritize large sparse textures for cleanup // Prioritize large sparse textures for cleanup
const bool is_large_sparse = image.info.is_sparse &&
const bool is_large_sparse = image.info.is_sparse &&
image.guest_size_bytes >= 256_MiB; image.guest_size_bytes >= 256_MiB;
if (!aggressive_mode && !is_large_sparse &&
if (!aggressive_mode && !is_large_sparse &&
True(image.flags & ImageFlagBits::CostlyLoad)) { True(image.flags & ImageFlagBits::CostlyLoad)) {
return false; return false;
} }
const bool must_download = const bool must_download =
image.IsSafeDownload() && False(image.flags & ImageFlagBits::BadOverlap); image.IsSafeDownload() && False(image.flags & ImageFlagBits::BadOverlap);
if (!high_priority_mode && !is_large_sparse && must_download) { if (!high_priority_mode && !is_large_sparse && must_download) {
return false; return false;
} }
if (must_download && !is_large_sparse) { if (must_download && !is_large_sparse) {
auto map = runtime.DownloadStagingBuffer(image.unswizzled_size_bytes); auto map = runtime.DownloadStagingBuffer(image.unswizzled_size_bytes);
const auto copies = FixSmallVectorADL(FullDownloadCopies(image.info)); const auto copies = FixSmallVectorADL(FullDownloadCopies(image.info));
@ -138,13 +138,13 @@ void TextureCache<P>::RunGarbageCollector() {
SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span, SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span,
swizzle_data_buffer); swizzle_data_buffer);
} }
if (True(image.flags & ImageFlagBits::Tracked)) { if (True(image.flags & ImageFlagBits::Tracked)) {
UntrackImage(image, image_id); UntrackImage(image, image_id);
} }
UnregisterImage(image_id); UnregisterImage(image_id);
DeleteImage(image_id, image.scale_tick > frame_tick + 5); DeleteImage(image_id, image.scale_tick > frame_tick + 5);
if (total_used_memory < critical_memory) { if (total_used_memory < critical_memory) {
if (aggressive_mode) { if (aggressive_mode) {
// Sink the aggresiveness. // Sink the aggresiveness.
@ -165,10 +165,10 @@ void TextureCache<P>::RunGarbageCollector() {
lru_cache.ForEachItemBelow(frame_tick, [&](ImageId image_id) { lru_cache.ForEachItemBelow(frame_tick, [&](ImageId image_id) {
auto& image = slot_images[image_id]; auto& image = slot_images[image_id];
// Only target sparse textures that are old enough // Only target sparse textures that are old enough
if (image.info.is_sparse &&
if (image.info.is_sparse &&
image.guest_size_bytes >= 256_MiB && image.guest_size_bytes >= 256_MiB &&
image.allocation_tick < frame_tick - 3) { image.allocation_tick < frame_tick - 3) {
LOG_DEBUG(HW_GPU, "GC targeting old sparse texture at 0x{:X} ({} MiB, age: {} frames)",
LOG_DEBUG(HW_GPU, "GC targeting old sparse texture at 0x{:X} ({} MiB, age: {} frames)",
image.gpu_addr, image.guest_size_bytes / (1024 * 1024), image.gpu_addr, image.guest_size_bytes / (1024 * 1024),
frame_tick - image.allocation_tick); frame_tick - image.allocation_tick);
return Cleanup(image_id); return Cleanup(image_id);
@ -658,24 +658,24 @@ void TextureCache<P>::UnmapMemory(DAddr cpu_addr, size_t size) {
template <class P> template <class P>
std::optional<SparseBinding> TextureCache<P>::CalculateSparseBinding( std::optional<SparseBinding> TextureCache<P>::CalculateSparseBinding(
const Image& image, GPUVAddr gpu_addr, DAddr dev_addr) { const Image& image, GPUVAddr gpu_addr, DAddr dev_addr) {
if (!image.info.is_sparse) { if (!image.info.is_sparse) {
return std::nullopt; return std::nullopt;
} }
const u64 offset = gpu_addr - image.gpu_addr; const u64 offset = gpu_addr - image.gpu_addr;
const u64 tile_index = offset / image.sparse_tile_size; const u64 tile_index = offset / image.sparse_tile_size;
const u32 tile_width_blocks = 128; const u32 tile_width_blocks = 128;
const u32 tile_height_blocks = 32; const u32 tile_height_blocks = 32;
const u32 width_in_tiles = (image.info.size.width / 4 + tile_width_blocks - 1) / tile_width_blocks; const u32 width_in_tiles = (image.info.size.width / 4 + tile_width_blocks - 1) / tile_width_blocks;
const u32 height_in_tiles = (image.info.size.height / 4 + tile_height_blocks - 1) / tile_height_blocks; const u32 height_in_tiles = (image.info.size.height / 4 + tile_height_blocks - 1) / tile_height_blocks;
const u32 tile_x = static_cast<u32>((tile_index % width_in_tiles) * tile_width_blocks * 4); const u32 tile_x = static_cast<u32>((tile_index % width_in_tiles) * tile_width_blocks * 4);
const u32 tile_y = static_cast<u32>(((tile_index / width_in_tiles) % height_in_tiles) * tile_height_blocks * 4); const u32 tile_y = static_cast<u32>(((tile_index / width_in_tiles) % height_in_tiles) * tile_height_blocks * 4);
const u32 tile_z = static_cast<u32>(tile_index / (width_in_tiles * height_in_tiles)); const u32 tile_z = static_cast<u32>(tile_index / (width_in_tiles * height_in_tiles));
return SparseBinding{ return SparseBinding{
.gpu_addr = gpu_addr, .gpu_addr = gpu_addr,
.device_addr = dev_addr, .device_addr = dev_addr,
@ -1133,14 +1133,14 @@ void TextureCache<P>::RefreshContents(Image& image, ImageId image_id) {
// Only upload modified images // Only upload modified images
return; return;
} }
image.flags &= ~ImageFlagBits::CpuModified; image.flags &= ~ImageFlagBits::CpuModified;
if( lowmemorydevice && image.info.format == PixelFormat::BC1_RGBA_UNORM && MapSizeBytes(image) >= 256_MiB ) { if( lowmemorydevice && image.info.format == PixelFormat::BC1_RGBA_UNORM && MapSizeBytes(image) >= 256_MiB ) {
return; return;
} }
TrackImage(image, image_id); TrackImage(image, image_id);
if (image.info.num_samples > 1 && !runtime.CanUploadMSAA()) { if (image.info.num_samples > 1 && !runtime.CanUploadMSAA()) {
LOG_WARNING(HW_GPU, "MSAA image uploads are not implemented"); LOG_WARNING(HW_GPU, "MSAA image uploads are not implemented");
runtime.TransitionImageLayout(image); runtime.TransitionImageLayout(image);
@ -1156,7 +1156,7 @@ void TextureCache<P>::RefreshContents(Image& image, ImageId image_id) {
image.info.resources.layers == 1 && image.info.resources.layers == 1 &&
MapSizeBytes(image) >= 128_MiB && MapSizeBytes(image) >= 128_MiB &&
False(image.flags & ImageFlagBits::GpuModified)) { False(image.flags & ImageFlagBits::GpuModified)) {
QueueAsyncUnswizzle(image, image_id); QueueAsyncUnswizzle(image, image_id);
return; return;
} }
@ -1411,7 +1411,7 @@ void TextureCache<P>::QueueAsyncUnswizzle(Image& image, ImageId image_id) {
} }
image.flags |= ImageFlagBits::IsDecoding; image.flags |= ImageFlagBits::IsDecoding;
unswizzle_queue.push_back({ unswizzle_queue.push_back({
.image_id = image_id, .image_id = image_id,
.info = image.info .info = image.info
@ -1448,31 +1448,31 @@ void TextureCache<P>::TickAsyncUnswizzle() {
if (unswizzle_queue.empty()) { if (unswizzle_queue.empty()) {
return; return;
} }
if(current_unswizzle_frame > 0) { if(current_unswizzle_frame > 0) {
current_unswizzle_frame--; current_unswizzle_frame--;
return; return;
} }
PendingUnswizzle& task = unswizzle_queue.front(); PendingUnswizzle& task = unswizzle_queue.front();
Image& image = slot_images[task.image_id]; Image& image = slot_images[task.image_id];
if (!task.initialized) { if (!task.initialized) {
task.total_size = MapSizeBytes(image); task.total_size = MapSizeBytes(image);
task.staging_buffer = runtime.UploadStagingBuffer(task.total_size, true); task.staging_buffer = runtime.UploadStagingBuffer(task.total_size, true);
const auto& info = image.info; const auto& info = image.info;
const u32 bytes_per_block = BytesPerBlock(info.format); const u32 bytes_per_block = BytesPerBlock(info.format);
const u32 width_blocks = Common::DivCeil(info.size.width, 4u); const u32 width_blocks = Common::DivCeil(info.size.width, 4u);
const u32 height_blocks = Common::DivCeil(info.size.height, 4u); const u32 height_blocks = Common::DivCeil(info.size.height, 4u);
const u32 stride = width_blocks * bytes_per_block; const u32 stride = width_blocks * bytes_per_block;
const u32 aligned_height = height_blocks; const u32 aligned_height = height_blocks;
task.bytes_per_slice = static_cast<size_t>(stride) * aligned_height; task.bytes_per_slice = static_cast<size_t>(stride) * aligned_height;
task.last_submitted_offset = 0; task.last_submitted_offset = 0;
task.initialized = true; task.initialized = true;
} }
size_t CHUNK_SIZE; size_t CHUNK_SIZE;
switch (Settings::values.gpu_unzwizzle_stream_size.GetValue()) { switch (Settings::values.gpu_unzwizzle_stream_size.GetValue()) {
case Settings::GpuUnswizzle::VeryLow: CHUNK_SIZE = 4_MiB; break; case Settings::GpuUnswizzle::VeryLow: CHUNK_SIZE = 4_MiB; break;
@ -1492,28 +1492,28 @@ void TextureCache<P>::TickAsyncUnswizzle() {
case Settings::GpuUnswizzleChunk::High: SLICES_PER_BATCH = 512; break; case Settings::GpuUnswizzleChunk::High: SLICES_PER_BATCH = 512; break;
default: SLICES_PER_BATCH = 128; default: SLICES_PER_BATCH = 128;
} }
// Read data // Read data
if (task.current_offset < task.total_size) { if (task.current_offset < task.total_size) {
const size_t remaining = task.total_size - task.current_offset; const size_t remaining = task.total_size - task.current_offset;
size_t copy_amount = std::min(CHUNK_SIZE, remaining); size_t copy_amount = std::min(CHUNK_SIZE, remaining);
if (remaining > CHUNK_SIZE) { if (remaining > CHUNK_SIZE) {
copy_amount = (copy_amount / task.bytes_per_slice) * task.bytes_per_slice; copy_amount = (copy_amount / task.bytes_per_slice) * task.bytes_per_slice;
if (copy_amount == 0) copy_amount = task.bytes_per_slice; if (copy_amount == 0) copy_amount = task.bytes_per_slice;
} }
gpu_memory->ReadBlock(image.gpu_addr + task.current_offset,
task.staging_buffer.mapped_span.data() + task.current_offset,
gpu_memory->ReadBlock(image.gpu_addr + task.current_offset,
task.staging_buffer.mapped_span.data() + task.current_offset,
copy_amount); copy_amount);
task.current_offset += copy_amount; task.current_offset += copy_amount;
} }
const bool is_final_batch = task.current_offset >= task.total_size; const bool is_final_batch = task.current_offset >= task.total_size;
const size_t bytes_ready = task.current_offset - task.last_submitted_offset; const size_t bytes_ready = task.current_offset - task.last_submitted_offset;
const u32 complete_slices = static_cast<u32>(bytes_ready / task.bytes_per_slice); const u32 complete_slices = static_cast<u32>(bytes_ready / task.bytes_per_slice);
if (complete_slices >= SLICES_PER_BATCH || (is_final_batch && complete_slices > 0)) { if (complete_slices >= SLICES_PER_BATCH || (is_final_batch && complete_slices > 0)) {
const u32 z_start = static_cast<u32>(task.last_submitted_offset / task.bytes_per_slice); const u32 z_start = static_cast<u32>(task.last_submitted_offset / task.bytes_per_slice);
const u32 slices_to_process = std::min(complete_slices, SLICES_PER_BATCH); const u32 slices_to_process = std::min(complete_slices, SLICES_PER_BATCH);
@ -1525,16 +1525,16 @@ void TextureCache<P>::TickAsyncUnswizzle() {
task.last_submitted_offset += (static_cast<size_t>(z_count) * task.bytes_per_slice); task.last_submitted_offset += (static_cast<size_t>(z_count) * task.bytes_per_slice);
} }
} }
// Check if complete // Check if complete
const u32 slices_submitted = static_cast<u32>(task.last_submitted_offset / task.bytes_per_slice); const u32 slices_submitted = static_cast<u32>(task.last_submitted_offset / task.bytes_per_slice);
const bool all_slices_submitted = slices_submitted >= image.info.size.depth; const bool all_slices_submitted = slices_submitted >= image.info.size.depth;
if (is_final_batch && all_slices_submitted) { if (is_final_batch && all_slices_submitted) {
runtime.FreeDeferredStagingBuffer(task.staging_buffer); runtime.FreeDeferredStagingBuffer(task.staging_buffer);
image.flags &= ~ImageFlagBits::IsDecoding; image.flags &= ~ImageFlagBits::IsDecoding;
unswizzle_queue.pop_front(); unswizzle_queue.pop_front();
// Wait 4 frames to process the next entry // Wait 4 frames to process the next entry
current_unswizzle_frame = 4u; current_unswizzle_frame = 4u;
} }
@ -1578,29 +1578,29 @@ ImageId TextureCache<P>::InsertImage(const ImageInfo& info, GPUVAddr gpu_addr,
} }
} }
ASSERT_MSG(cpu_addr, "Tried to insert an image to an invalid gpu_addr=0x{:x}", gpu_addr); ASSERT_MSG(cpu_addr, "Tried to insert an image to an invalid gpu_addr=0x{:x}", gpu_addr);
// For large sparse textures, aggressively clean up old allocations at same address // For large sparse textures, aggressively clean up old allocations at same address
if (info.is_sparse && CalculateGuestSizeInBytes(info) >= 256_MiB) { if (info.is_sparse && CalculateGuestSizeInBytes(info) >= 256_MiB) {
const auto alloc_it = image_allocs_table.find(gpu_addr); const auto alloc_it = image_allocs_table.find(gpu_addr);
if (alloc_it != image_allocs_table.end()) { if (alloc_it != image_allocs_table.end()) {
const ImageAllocId alloc_id = alloc_it->second; const ImageAllocId alloc_id = alloc_it->second;
auto& alloc_images = slot_image_allocs[alloc_id].images; auto& alloc_images = slot_image_allocs[alloc_id].images;
// Collect old images at this address that were created more than 2 frames ago // Collect old images at this address that were created more than 2 frames ago
boost::container::small_vector<ImageId, 4> to_delete; boost::container::small_vector<ImageId, 4> to_delete;
for (ImageId old_image_id : alloc_images) { for (ImageId old_image_id : alloc_images) {
Image& old_image = slot_images[old_image_id]; Image& old_image = slot_images[old_image_id];
if (old_image.info.is_sparse &&
if (old_image.info.is_sparse &&
old_image.gpu_addr == gpu_addr && old_image.gpu_addr == gpu_addr &&
old_image.allocation_tick < frame_tick - 2) { // Try not to delete fresh textures old_image.allocation_tick < frame_tick - 2) { // Try not to delete fresh textures
to_delete.push_back(old_image_id); to_delete.push_back(old_image_id);
} }
} }
// Delete old images immediately // Delete old images immediately
for (ImageId old_id : to_delete) { for (ImageId old_id : to_delete) {
Image& old_image = slot_images[old_id]; Image& old_image = slot_images[old_id];
LOG_DEBUG(HW_GPU, "Immediately deleting old sparse texture at 0x{:X} ({} MiB)",
LOG_DEBUG(HW_GPU, "Immediately deleting old sparse texture at 0x{:X} ({} MiB)",
gpu_addr, old_image.guest_size_bytes / (1024 * 1024)); gpu_addr, old_image.guest_size_bytes / (1024 * 1024));
if (True(old_image.flags & ImageFlagBits::Tracked)) { if (True(old_image.flags & ImageFlagBits::Tracked)) {
UntrackImage(old_image, old_id); UntrackImage(old_image, old_id);
@ -1610,7 +1610,7 @@ ImageId TextureCache<P>::InsertImage(const ImageInfo& info, GPUVAddr gpu_addr,
} }
} }
} }
const ImageId image_id = JoinImages(info, gpu_addr, *cpu_addr); const ImageId image_id = JoinImages(info, gpu_addr, *cpu_addr);
const Image& image = slot_images[image_id]; const Image& image = slot_images[image_id];
// Using "image.gpu_addr" instead of "gpu_addr" is important because it might be different // Using "image.gpu_addr" instead of "gpu_addr" is important because it might be different
@ -1626,11 +1626,11 @@ template <class P>
ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, DAddr cpu_addr) { ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, DAddr cpu_addr) {
ImageInfo new_info = info; ImageInfo new_info = info;
const size_t size_bytes = CalculateGuestSizeInBytes(new_info); const size_t size_bytes = CalculateGuestSizeInBytes(new_info);
// Proactive cleanup for large sparse texture allocations // Proactive cleanup for large sparse texture allocations
if (new_info.is_sparse && size_bytes >= 256_MiB) { if (new_info.is_sparse && size_bytes >= 256_MiB) {
const u64 estimated_alloc_size = size_bytes; const u64 estimated_alloc_size = size_bytes;
if (total_used_memory + estimated_alloc_size >= critical_memory) { if (total_used_memory + estimated_alloc_size >= critical_memory) {
LOG_DEBUG(HW_GPU, "Large sparse texture allocation ({} MiB) - running aggressive GC. " LOG_DEBUG(HW_GPU, "Large sparse texture allocation ({} MiB) - running aggressive GC. "
"Current memory: {} MiB, Critical: {} MiB", "Current memory: {} MiB, Critical: {} MiB",
@ -1638,7 +1638,7 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, DA
total_used_memory / (1024 * 1024), total_used_memory / (1024 * 1024),
critical_memory / (1024 * 1024)); critical_memory / (1024 * 1024));
RunGarbageCollector(); RunGarbageCollector();
// If still over threshold after GC, try one more aggressive pass // If still over threshold after GC, try one more aggressive pass
if (total_used_memory + estimated_alloc_size >= critical_memory) { if (total_used_memory + estimated_alloc_size >= critical_memory) {
LOG_DEBUG(HW_GPU, "Still critically low on memory, running second GC pass"); LOG_DEBUG(HW_GPU, "Still critically low on memory, running second GC pass");
@ -1646,7 +1646,7 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, DA
} }
} }
} }
const bool broken_views = runtime.HasBrokenTextureViewFormats(); const bool broken_views = runtime.HasBrokenTextureViewFormats();
const bool native_bgr = runtime.HasNativeBgr(); const bool native_bgr = runtime.HasNativeBgr();
join_overlap_ids.clear(); join_overlap_ids.clear();
@ -1742,7 +1742,7 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, DA
const ImageId new_image_id = slot_images.insert(runtime, new_info, gpu_addr, cpu_addr); const ImageId new_image_id = slot_images.insert(runtime, new_info, gpu_addr, cpu_addr);
Image& new_image = slot_images[new_image_id]; Image& new_image = slot_images[new_image_id];
new_image.allocation_tick = frame_tick; new_image.allocation_tick = frame_tick;
if (!gpu_memory->IsContinuousRange(new_image.gpu_addr, new_image.guest_size_bytes) && if (!gpu_memory->IsContinuousRange(new_image.gpu_addr, new_image.guest_size_bytes) &&

4
src/video_core/texture_cache/texture_cache_base.h

@ -224,7 +224,7 @@ public:
/// Remove images in a region /// Remove images in a region
void UnmapGPUMemory(size_t as_id, GPUVAddr gpu_addr, size_t size, DAddr dev_addr); void UnmapGPUMemory(size_t as_id, GPUVAddr gpu_addr, size_t size, DAddr dev_addr);
/// Basic sparse binding /// Basic sparse binding
std::optional<SparseBinding> CalculateSparseBinding( std::optional<SparseBinding> CalculateSparseBinding(
const Image& image, GPUVAddr gpu_addr, DAddr dev_addr); const Image& image, GPUVAddr gpu_addr, DAddr dev_addr);
@ -327,7 +327,7 @@ private:
/// Refresh the contents (pixel data) of an image /// Refresh the contents (pixel data) of an image
void RefreshContents(Image& image, ImageId image_id); void RefreshContents(Image& image, ImageId image_id);
/// Sparse texture partial upload /// Sparse texture partial upload
template <typename StagingBuffer> template <typename StagingBuffer>
void UploadSparseDirtyTiles(Image& image, StagingBuffer& staging); void UploadSparseDirtyTiles(Image& image, StagingBuffer& staging);

2
src/video_core/texture_cache/util.h

@ -73,7 +73,7 @@ struct SparseTileUnswizzleResult {
[[nodiscard]] boost::container::small_vector<BufferImageCopy, 16> UnswizzleImage( [[nodiscard]] boost::container::small_vector<BufferImageCopy, 16> UnswizzleImage(
Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, const ImageInfo& info, Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, const ImageInfo& info,
std::span<const u8> input, std::span<u8> output); std::span<const u8> input, std::span<u8> output);
void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8> output, void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8> output,
std::span<BufferImageCopy> copies); std::span<BufferImageCopy> copies);

Loading…
Cancel
Save