|
|
@ -76,7 +76,7 @@ TextureCache<P>::TextureCache(Runtime& runtime_, Tegra::MaxwellDeviceMemoryManag |
|
|
expected_memory = DEFAULT_EXPECTED_MEMORY + 512_MiB; |
|
|
expected_memory = DEFAULT_EXPECTED_MEMORY + 512_MiB; |
|
|
critical_memory = DEFAULT_CRITICAL_MEMORY + 1_GiB; |
|
|
critical_memory = DEFAULT_CRITICAL_MEMORY + 1_GiB; |
|
|
minimum_memory = 0; |
|
|
minimum_memory = 0; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
lowmemorydevice = true; |
|
|
lowmemorydevice = true; |
|
|
} |
|
|
} |
|
|
} |
|
|
} |
|
|
@ -94,7 +94,7 @@ void TextureCache<P>::RunGarbageCollector() { |
|
|
ticks_to_destroy = aggressive_mode ? 10ULL : high_priority_mode ? 25ULL : 50ULL; |
|
|
ticks_to_destroy = aggressive_mode ? 10ULL : high_priority_mode ? 25ULL : 50ULL; |
|
|
num_iterations = aggressive_mode ? 40 : (high_priority_mode ? 20 : 10); |
|
|
num_iterations = aggressive_mode ? 40 : (high_priority_mode ? 20 : 10); |
|
|
}; |
|
|
}; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
const auto Cleanup = [this, &num_iterations, &high_priority_mode, |
|
|
const auto Cleanup = [this, &num_iterations, &high_priority_mode, |
|
|
&aggressive_mode](ImageId image_id) { |
|
|
&aggressive_mode](ImageId image_id) { |
|
|
if (num_iterations == 0) { |
|
|
if (num_iterations == 0) { |
|
|
@ -102,34 +102,34 @@ void TextureCache<P>::RunGarbageCollector() { |
|
|
} |
|
|
} |
|
|
--num_iterations; |
|
|
--num_iterations; |
|
|
auto& image = slot_images[image_id]; |
|
|
auto& image = slot_images[image_id]; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// Never delete recently allocated sparse textures (within 3 frames) |
|
|
// Never delete recently allocated sparse textures (within 3 frames) |
|
|
const bool is_recently_allocated = image.allocation_tick >= frame_tick - 3; |
|
|
const bool is_recently_allocated = image.allocation_tick >= frame_tick - 3; |
|
|
if (is_recently_allocated && image.info.is_sparse) { |
|
|
if (is_recently_allocated && image.info.is_sparse) { |
|
|
return false; |
|
|
return false; |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if (True(image.flags & ImageFlagBits::IsDecoding)) { |
|
|
if (True(image.flags & ImageFlagBits::IsDecoding)) { |
|
|
// This image is still being decoded, deleting it will invalidate the slot |
|
|
// This image is still being decoded, deleting it will invalidate the slot |
|
|
// used by the async decoder thread. |
|
|
// used by the async decoder thread. |
|
|
return false; |
|
|
return false; |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// Prioritize large sparse textures for cleanup |
|
|
// Prioritize large sparse textures for cleanup |
|
|
const bool is_large_sparse = image.info.is_sparse && |
|
|
|
|
|
|
|
|
const bool is_large_sparse = image.info.is_sparse && |
|
|
image.guest_size_bytes >= 256_MiB; |
|
|
image.guest_size_bytes >= 256_MiB; |
|
|
|
|
|
|
|
|
if (!aggressive_mode && !is_large_sparse && |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if (!aggressive_mode && !is_large_sparse && |
|
|
True(image.flags & ImageFlagBits::CostlyLoad)) { |
|
|
True(image.flags & ImageFlagBits::CostlyLoad)) { |
|
|
return false; |
|
|
return false; |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
const bool must_download = |
|
|
const bool must_download = |
|
|
image.IsSafeDownload() && False(image.flags & ImageFlagBits::BadOverlap); |
|
|
image.IsSafeDownload() && False(image.flags & ImageFlagBits::BadOverlap); |
|
|
if (!high_priority_mode && !is_large_sparse && must_download) { |
|
|
if (!high_priority_mode && !is_large_sparse && must_download) { |
|
|
return false; |
|
|
return false; |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if (must_download && !is_large_sparse) { |
|
|
if (must_download && !is_large_sparse) { |
|
|
auto map = runtime.DownloadStagingBuffer(image.unswizzled_size_bytes); |
|
|
auto map = runtime.DownloadStagingBuffer(image.unswizzled_size_bytes); |
|
|
const auto copies = FixSmallVectorADL(FullDownloadCopies(image.info)); |
|
|
const auto copies = FixSmallVectorADL(FullDownloadCopies(image.info)); |
|
|
@ -138,13 +138,13 @@ void TextureCache<P>::RunGarbageCollector() { |
|
|
SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span, |
|
|
SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span, |
|
|
swizzle_data_buffer); |
|
|
swizzle_data_buffer); |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if (True(image.flags & ImageFlagBits::Tracked)) { |
|
|
if (True(image.flags & ImageFlagBits::Tracked)) { |
|
|
UntrackImage(image, image_id); |
|
|
UntrackImage(image, image_id); |
|
|
} |
|
|
} |
|
|
UnregisterImage(image_id); |
|
|
UnregisterImage(image_id); |
|
|
DeleteImage(image_id, image.scale_tick > frame_tick + 5); |
|
|
DeleteImage(image_id, image.scale_tick > frame_tick + 5); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if (total_used_memory < critical_memory) { |
|
|
if (total_used_memory < critical_memory) { |
|
|
if (aggressive_mode) { |
|
|
if (aggressive_mode) { |
|
|
// Sink the aggresiveness. |
|
|
// Sink the aggresiveness. |
|
|
@ -165,10 +165,10 @@ void TextureCache<P>::RunGarbageCollector() { |
|
|
lru_cache.ForEachItemBelow(frame_tick, [&](ImageId image_id) { |
|
|
lru_cache.ForEachItemBelow(frame_tick, [&](ImageId image_id) { |
|
|
auto& image = slot_images[image_id]; |
|
|
auto& image = slot_images[image_id]; |
|
|
// Only target sparse textures that are old enough |
|
|
// Only target sparse textures that are old enough |
|
|
if (image.info.is_sparse && |
|
|
|
|
|
|
|
|
if (image.info.is_sparse && |
|
|
image.guest_size_bytes >= 256_MiB && |
|
|
image.guest_size_bytes >= 256_MiB && |
|
|
image.allocation_tick < frame_tick - 3) { |
|
|
image.allocation_tick < frame_tick - 3) { |
|
|
LOG_DEBUG(HW_GPU, "GC targeting old sparse texture at 0x{:X} ({} MiB, age: {} frames)", |
|
|
|
|
|
|
|
|
LOG_DEBUG(HW_GPU, "GC targeting old sparse texture at 0x{:X} ({} MiB, age: {} frames)", |
|
|
image.gpu_addr, image.guest_size_bytes / (1024 * 1024), |
|
|
image.gpu_addr, image.guest_size_bytes / (1024 * 1024), |
|
|
frame_tick - image.allocation_tick); |
|
|
frame_tick - image.allocation_tick); |
|
|
return Cleanup(image_id); |
|
|
return Cleanup(image_id); |
|
|
@ -658,24 +658,24 @@ void TextureCache<P>::UnmapMemory(DAddr cpu_addr, size_t size) { |
|
|
template <class P> |
|
|
template <class P> |
|
|
std::optional<SparseBinding> TextureCache<P>::CalculateSparseBinding( |
|
|
std::optional<SparseBinding> TextureCache<P>::CalculateSparseBinding( |
|
|
const Image& image, GPUVAddr gpu_addr, DAddr dev_addr) { |
|
|
const Image& image, GPUVAddr gpu_addr, DAddr dev_addr) { |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if (!image.info.is_sparse) { |
|
|
if (!image.info.is_sparse) { |
|
|
return std::nullopt; |
|
|
return std::nullopt; |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
const u64 offset = gpu_addr - image.gpu_addr; |
|
|
const u64 offset = gpu_addr - image.gpu_addr; |
|
|
const u64 tile_index = offset / image.sparse_tile_size; |
|
|
const u64 tile_index = offset / image.sparse_tile_size; |
|
|
|
|
|
|
|
|
const u32 tile_width_blocks = 128; |
|
|
const u32 tile_width_blocks = 128; |
|
|
const u32 tile_height_blocks = 32; |
|
|
const u32 tile_height_blocks = 32; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
const u32 width_in_tiles = (image.info.size.width / 4 + tile_width_blocks - 1) / tile_width_blocks; |
|
|
const u32 width_in_tiles = (image.info.size.width / 4 + tile_width_blocks - 1) / tile_width_blocks; |
|
|
const u32 height_in_tiles = (image.info.size.height / 4 + tile_height_blocks - 1) / tile_height_blocks; |
|
|
const u32 height_in_tiles = (image.info.size.height / 4 + tile_height_blocks - 1) / tile_height_blocks; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
const u32 tile_x = static_cast<u32>((tile_index % width_in_tiles) * tile_width_blocks * 4); |
|
|
const u32 tile_x = static_cast<u32>((tile_index % width_in_tiles) * tile_width_blocks * 4); |
|
|
const u32 tile_y = static_cast<u32>(((tile_index / width_in_tiles) % height_in_tiles) * tile_height_blocks * 4); |
|
|
const u32 tile_y = static_cast<u32>(((tile_index / width_in_tiles) % height_in_tiles) * tile_height_blocks * 4); |
|
|
const u32 tile_z = static_cast<u32>(tile_index / (width_in_tiles * height_in_tiles)); |
|
|
const u32 tile_z = static_cast<u32>(tile_index / (width_in_tiles * height_in_tiles)); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return SparseBinding{ |
|
|
return SparseBinding{ |
|
|
.gpu_addr = gpu_addr, |
|
|
.gpu_addr = gpu_addr, |
|
|
.device_addr = dev_addr, |
|
|
.device_addr = dev_addr, |
|
|
@ -1133,14 +1133,14 @@ void TextureCache<P>::RefreshContents(Image& image, ImageId image_id) { |
|
|
// Only upload modified images |
|
|
// Only upload modified images |
|
|
return; |
|
|
return; |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
image.flags &= ~ImageFlagBits::CpuModified; |
|
|
image.flags &= ~ImageFlagBits::CpuModified; |
|
|
if( lowmemorydevice && image.info.format == PixelFormat::BC1_RGBA_UNORM && MapSizeBytes(image) >= 256_MiB ) { |
|
|
if( lowmemorydevice && image.info.format == PixelFormat::BC1_RGBA_UNORM && MapSizeBytes(image) >= 256_MiB ) { |
|
|
return; |
|
|
return; |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
TrackImage(image, image_id); |
|
|
TrackImage(image, image_id); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if (image.info.num_samples > 1 && !runtime.CanUploadMSAA()) { |
|
|
if (image.info.num_samples > 1 && !runtime.CanUploadMSAA()) { |
|
|
LOG_WARNING(HW_GPU, "MSAA image uploads are not implemented"); |
|
|
LOG_WARNING(HW_GPU, "MSAA image uploads are not implemented"); |
|
|
runtime.TransitionImageLayout(image); |
|
|
runtime.TransitionImageLayout(image); |
|
|
@ -1156,7 +1156,7 @@ void TextureCache<P>::RefreshContents(Image& image, ImageId image_id) { |
|
|
image.info.resources.layers == 1 && |
|
|
image.info.resources.layers == 1 && |
|
|
MapSizeBytes(image) >= 128_MiB && |
|
|
MapSizeBytes(image) >= 128_MiB && |
|
|
False(image.flags & ImageFlagBits::GpuModified)) { |
|
|
False(image.flags & ImageFlagBits::GpuModified)) { |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
QueueAsyncUnswizzle(image, image_id); |
|
|
QueueAsyncUnswizzle(image, image_id); |
|
|
return; |
|
|
return; |
|
|
} |
|
|
} |
|
|
@ -1411,7 +1411,7 @@ void TextureCache<P>::QueueAsyncUnswizzle(Image& image, ImageId image_id) { |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
image.flags |= ImageFlagBits::IsDecoding; |
|
|
image.flags |= ImageFlagBits::IsDecoding; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
unswizzle_queue.push_back({ |
|
|
unswizzle_queue.push_back({ |
|
|
.image_id = image_id, |
|
|
.image_id = image_id, |
|
|
.info = image.info |
|
|
.info = image.info |
|
|
@ -1448,31 +1448,31 @@ void TextureCache<P>::TickAsyncUnswizzle() { |
|
|
if (unswizzle_queue.empty()) { |
|
|
if (unswizzle_queue.empty()) { |
|
|
return; |
|
|
return; |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if(current_unswizzle_frame > 0) { |
|
|
if(current_unswizzle_frame > 0) { |
|
|
current_unswizzle_frame--; |
|
|
current_unswizzle_frame--; |
|
|
return; |
|
|
return; |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
PendingUnswizzle& task = unswizzle_queue.front(); |
|
|
PendingUnswizzle& task = unswizzle_queue.front(); |
|
|
Image& image = slot_images[task.image_id]; |
|
|
Image& image = slot_images[task.image_id]; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if (!task.initialized) { |
|
|
if (!task.initialized) { |
|
|
task.total_size = MapSizeBytes(image); |
|
|
task.total_size = MapSizeBytes(image); |
|
|
task.staging_buffer = runtime.UploadStagingBuffer(task.total_size, true); |
|
|
task.staging_buffer = runtime.UploadStagingBuffer(task.total_size, true); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
const auto& info = image.info; |
|
|
const auto& info = image.info; |
|
|
const u32 bytes_per_block = BytesPerBlock(info.format); |
|
|
const u32 bytes_per_block = BytesPerBlock(info.format); |
|
|
const u32 width_blocks = Common::DivCeil(info.size.width, 4u); |
|
|
const u32 width_blocks = Common::DivCeil(info.size.width, 4u); |
|
|
const u32 height_blocks = Common::DivCeil(info.size.height, 4u); |
|
|
const u32 height_blocks = Common::DivCeil(info.size.height, 4u); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
const u32 stride = width_blocks * bytes_per_block; |
|
|
const u32 stride = width_blocks * bytes_per_block; |
|
|
const u32 aligned_height = height_blocks; |
|
|
const u32 aligned_height = height_blocks; |
|
|
task.bytes_per_slice = static_cast<size_t>(stride) * aligned_height; |
|
|
task.bytes_per_slice = static_cast<size_t>(stride) * aligned_height; |
|
|
task.last_submitted_offset = 0; |
|
|
task.last_submitted_offset = 0; |
|
|
task.initialized = true; |
|
|
task.initialized = true; |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
size_t CHUNK_SIZE; |
|
|
size_t CHUNK_SIZE; |
|
|
switch (Settings::values.gpu_unzwizzle_stream_size.GetValue()) { |
|
|
switch (Settings::values.gpu_unzwizzle_stream_size.GetValue()) { |
|
|
case Settings::GpuUnswizzle::VeryLow: CHUNK_SIZE = 4_MiB; break; |
|
|
case Settings::GpuUnswizzle::VeryLow: CHUNK_SIZE = 4_MiB; break; |
|
|
@ -1492,28 +1492,28 @@ void TextureCache<P>::TickAsyncUnswizzle() { |
|
|
case Settings::GpuUnswizzleChunk::High: SLICES_PER_BATCH = 512; break; |
|
|
case Settings::GpuUnswizzleChunk::High: SLICES_PER_BATCH = 512; break; |
|
|
default: SLICES_PER_BATCH = 128; |
|
|
default: SLICES_PER_BATCH = 128; |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// Read data |
|
|
// Read data |
|
|
if (task.current_offset < task.total_size) { |
|
|
if (task.current_offset < task.total_size) { |
|
|
const size_t remaining = task.total_size - task.current_offset; |
|
|
const size_t remaining = task.total_size - task.current_offset; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
size_t copy_amount = std::min(CHUNK_SIZE, remaining); |
|
|
size_t copy_amount = std::min(CHUNK_SIZE, remaining); |
|
|
|
|
|
|
|
|
if (remaining > CHUNK_SIZE) { |
|
|
if (remaining > CHUNK_SIZE) { |
|
|
copy_amount = (copy_amount / task.bytes_per_slice) * task.bytes_per_slice; |
|
|
copy_amount = (copy_amount / task.bytes_per_slice) * task.bytes_per_slice; |
|
|
if (copy_amount == 0) copy_amount = task.bytes_per_slice; |
|
|
if (copy_amount == 0) copy_amount = task.bytes_per_slice; |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
gpu_memory->ReadBlock(image.gpu_addr + task.current_offset, |
|
|
|
|
|
task.staging_buffer.mapped_span.data() + task.current_offset, |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
gpu_memory->ReadBlock(image.gpu_addr + task.current_offset, |
|
|
|
|
|
task.staging_buffer.mapped_span.data() + task.current_offset, |
|
|
copy_amount); |
|
|
copy_amount); |
|
|
task.current_offset += copy_amount; |
|
|
task.current_offset += copy_amount; |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
const bool is_final_batch = task.current_offset >= task.total_size; |
|
|
const bool is_final_batch = task.current_offset >= task.total_size; |
|
|
const size_t bytes_ready = task.current_offset - task.last_submitted_offset; |
|
|
const size_t bytes_ready = task.current_offset - task.last_submitted_offset; |
|
|
const u32 complete_slices = static_cast<u32>(bytes_ready / task.bytes_per_slice); |
|
|
const u32 complete_slices = static_cast<u32>(bytes_ready / task.bytes_per_slice); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if (complete_slices >= SLICES_PER_BATCH || (is_final_batch && complete_slices > 0)) { |
|
|
if (complete_slices >= SLICES_PER_BATCH || (is_final_batch && complete_slices > 0)) { |
|
|
const u32 z_start = static_cast<u32>(task.last_submitted_offset / task.bytes_per_slice); |
|
|
const u32 z_start = static_cast<u32>(task.last_submitted_offset / task.bytes_per_slice); |
|
|
const u32 slices_to_process = std::min(complete_slices, SLICES_PER_BATCH); |
|
|
const u32 slices_to_process = std::min(complete_slices, SLICES_PER_BATCH); |
|
|
@ -1525,16 +1525,16 @@ void TextureCache<P>::TickAsyncUnswizzle() { |
|
|
task.last_submitted_offset += (static_cast<size_t>(z_count) * task.bytes_per_slice); |
|
|
task.last_submitted_offset += (static_cast<size_t>(z_count) * task.bytes_per_slice); |
|
|
} |
|
|
} |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// Check if complete |
|
|
// Check if complete |
|
|
const u32 slices_submitted = static_cast<u32>(task.last_submitted_offset / task.bytes_per_slice); |
|
|
const u32 slices_submitted = static_cast<u32>(task.last_submitted_offset / task.bytes_per_slice); |
|
|
const bool all_slices_submitted = slices_submitted >= image.info.size.depth; |
|
|
const bool all_slices_submitted = slices_submitted >= image.info.size.depth; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if (is_final_batch && all_slices_submitted) { |
|
|
if (is_final_batch && all_slices_submitted) { |
|
|
runtime.FreeDeferredStagingBuffer(task.staging_buffer); |
|
|
runtime.FreeDeferredStagingBuffer(task.staging_buffer); |
|
|
image.flags &= ~ImageFlagBits::IsDecoding; |
|
|
image.flags &= ~ImageFlagBits::IsDecoding; |
|
|
unswizzle_queue.pop_front(); |
|
|
unswizzle_queue.pop_front(); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// Wait 4 frames to process the next entry |
|
|
// Wait 4 frames to process the next entry |
|
|
current_unswizzle_frame = 4u; |
|
|
current_unswizzle_frame = 4u; |
|
|
} |
|
|
} |
|
|
@ -1578,29 +1578,29 @@ ImageId TextureCache<P>::InsertImage(const ImageInfo& info, GPUVAddr gpu_addr, |
|
|
} |
|
|
} |
|
|
} |
|
|
} |
|
|
ASSERT_MSG(cpu_addr, "Tried to insert an image to an invalid gpu_addr=0x{:x}", gpu_addr); |
|
|
ASSERT_MSG(cpu_addr, "Tried to insert an image to an invalid gpu_addr=0x{:x}", gpu_addr); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// For large sparse textures, aggressively clean up old allocations at same address |
|
|
// For large sparse textures, aggressively clean up old allocations at same address |
|
|
if (info.is_sparse && CalculateGuestSizeInBytes(info) >= 256_MiB) { |
|
|
if (info.is_sparse && CalculateGuestSizeInBytes(info) >= 256_MiB) { |
|
|
const auto alloc_it = image_allocs_table.find(gpu_addr); |
|
|
const auto alloc_it = image_allocs_table.find(gpu_addr); |
|
|
if (alloc_it != image_allocs_table.end()) { |
|
|
if (alloc_it != image_allocs_table.end()) { |
|
|
const ImageAllocId alloc_id = alloc_it->second; |
|
|
const ImageAllocId alloc_id = alloc_it->second; |
|
|
auto& alloc_images = slot_image_allocs[alloc_id].images; |
|
|
auto& alloc_images = slot_image_allocs[alloc_id].images; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// Collect old images at this address that were created more than 2 frames ago |
|
|
// Collect old images at this address that were created more than 2 frames ago |
|
|
boost::container::small_vector<ImageId, 4> to_delete; |
|
|
boost::container::small_vector<ImageId, 4> to_delete; |
|
|
for (ImageId old_image_id : alloc_images) { |
|
|
for (ImageId old_image_id : alloc_images) { |
|
|
Image& old_image = slot_images[old_image_id]; |
|
|
Image& old_image = slot_images[old_image_id]; |
|
|
if (old_image.info.is_sparse && |
|
|
|
|
|
|
|
|
if (old_image.info.is_sparse && |
|
|
old_image.gpu_addr == gpu_addr && |
|
|
old_image.gpu_addr == gpu_addr && |
|
|
old_image.allocation_tick < frame_tick - 2) { // Try not to delete fresh textures |
|
|
old_image.allocation_tick < frame_tick - 2) { // Try not to delete fresh textures |
|
|
to_delete.push_back(old_image_id); |
|
|
to_delete.push_back(old_image_id); |
|
|
} |
|
|
} |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// Delete old images immediately |
|
|
// Delete old images immediately |
|
|
for (ImageId old_id : to_delete) { |
|
|
for (ImageId old_id : to_delete) { |
|
|
Image& old_image = slot_images[old_id]; |
|
|
Image& old_image = slot_images[old_id]; |
|
|
LOG_DEBUG(HW_GPU, "Immediately deleting old sparse texture at 0x{:X} ({} MiB)", |
|
|
|
|
|
|
|
|
LOG_DEBUG(HW_GPU, "Immediately deleting old sparse texture at 0x{:X} ({} MiB)", |
|
|
gpu_addr, old_image.guest_size_bytes / (1024 * 1024)); |
|
|
gpu_addr, old_image.guest_size_bytes / (1024 * 1024)); |
|
|
if (True(old_image.flags & ImageFlagBits::Tracked)) { |
|
|
if (True(old_image.flags & ImageFlagBits::Tracked)) { |
|
|
UntrackImage(old_image, old_id); |
|
|
UntrackImage(old_image, old_id); |
|
|
@ -1610,7 +1610,7 @@ ImageId TextureCache<P>::InsertImage(const ImageInfo& info, GPUVAddr gpu_addr, |
|
|
} |
|
|
} |
|
|
} |
|
|
} |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
const ImageId image_id = JoinImages(info, gpu_addr, *cpu_addr); |
|
|
const ImageId image_id = JoinImages(info, gpu_addr, *cpu_addr); |
|
|
const Image& image = slot_images[image_id]; |
|
|
const Image& image = slot_images[image_id]; |
|
|
// Using "image.gpu_addr" instead of "gpu_addr" is important because it might be different |
|
|
// Using "image.gpu_addr" instead of "gpu_addr" is important because it might be different |
|
|
@ -1626,11 +1626,11 @@ template <class P> |
|
|
ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, DAddr cpu_addr) { |
|
|
ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, DAddr cpu_addr) { |
|
|
ImageInfo new_info = info; |
|
|
ImageInfo new_info = info; |
|
|
const size_t size_bytes = CalculateGuestSizeInBytes(new_info); |
|
|
const size_t size_bytes = CalculateGuestSizeInBytes(new_info); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// Proactive cleanup for large sparse texture allocations |
|
|
// Proactive cleanup for large sparse texture allocations |
|
|
if (new_info.is_sparse && size_bytes >= 256_MiB) { |
|
|
if (new_info.is_sparse && size_bytes >= 256_MiB) { |
|
|
const u64 estimated_alloc_size = size_bytes; |
|
|
const u64 estimated_alloc_size = size_bytes; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if (total_used_memory + estimated_alloc_size >= critical_memory) { |
|
|
if (total_used_memory + estimated_alloc_size >= critical_memory) { |
|
|
LOG_DEBUG(HW_GPU, "Large sparse texture allocation ({} MiB) - running aggressive GC. " |
|
|
LOG_DEBUG(HW_GPU, "Large sparse texture allocation ({} MiB) - running aggressive GC. " |
|
|
"Current memory: {} MiB, Critical: {} MiB", |
|
|
"Current memory: {} MiB, Critical: {} MiB", |
|
|
@ -1638,7 +1638,7 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, DA |
|
|
total_used_memory / (1024 * 1024), |
|
|
total_used_memory / (1024 * 1024), |
|
|
critical_memory / (1024 * 1024)); |
|
|
critical_memory / (1024 * 1024)); |
|
|
RunGarbageCollector(); |
|
|
RunGarbageCollector(); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// If still over threshold after GC, try one more aggressive pass |
|
|
// If still over threshold after GC, try one more aggressive pass |
|
|
if (total_used_memory + estimated_alloc_size >= critical_memory) { |
|
|
if (total_used_memory + estimated_alloc_size >= critical_memory) { |
|
|
LOG_DEBUG(HW_GPU, "Still critically low on memory, running second GC pass"); |
|
|
LOG_DEBUG(HW_GPU, "Still critically low on memory, running second GC pass"); |
|
|
@ -1646,7 +1646,7 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, DA |
|
|
} |
|
|
} |
|
|
} |
|
|
} |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
const bool broken_views = runtime.HasBrokenTextureViewFormats(); |
|
|
const bool broken_views = runtime.HasBrokenTextureViewFormats(); |
|
|
const bool native_bgr = runtime.HasNativeBgr(); |
|
|
const bool native_bgr = runtime.HasNativeBgr(); |
|
|
join_overlap_ids.clear(); |
|
|
join_overlap_ids.clear(); |
|
|
@ -1742,7 +1742,7 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, DA |
|
|
|
|
|
|
|
|
const ImageId new_image_id = slot_images.insert(runtime, new_info, gpu_addr, cpu_addr); |
|
|
const ImageId new_image_id = slot_images.insert(runtime, new_info, gpu_addr, cpu_addr); |
|
|
Image& new_image = slot_images[new_image_id]; |
|
|
Image& new_image = slot_images[new_image_id]; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
new_image.allocation_tick = frame_tick; |
|
|
new_image.allocation_tick = frame_tick; |
|
|
|
|
|
|
|
|
if (!gpu_memory->IsContinuousRange(new_image.gpu_addr, new_image.guest_size_bytes) && |
|
|
if (!gpu_memory->IsContinuousRange(new_image.gpu_addr, new_image.guest_size_bytes) && |
|
|
|