Browse Source

Removed the extra sparse texture binding system

Only run the aggresive garbage collection on low memory devices (Currently never runs)
pull/3246/head
Forrest Keller 3 weeks ago
committed by crueter
parent
commit
dbaea7df90
  1. 14
      src/video_core/memory_manager.cpp
  2. 2
      src/video_core/rasterizer_interface.h
  3. 2
      src/video_core/renderer_null/null_rasterizer.cpp
  4. 2
      src/video_core/renderer_null/null_rasterizer.h
  5. 4
      src/video_core/renderer_opengl/gl_rasterizer.cpp
  6. 2
      src/video_core/renderer_opengl/gl_rasterizer.h
  7. 4
      src/video_core/renderer_vulkan/vk_rasterizer.cpp
  8. 2
      src/video_core/renderer_vulkan/vk_rasterizer.h
  9. 4
      src/video_core/renderer_vulkan/vk_scheduler.cpp
  10. 11
      src/video_core/texture_cache/image_base.h
  11. 51
      src/video_core/texture_cache/texture_cache.h
  12. 6
      src/video_core/texture_cache/texture_cache_base.h

14
src/video_core/memory_manager.cpp

@ -123,12 +123,7 @@ GPUVAddr MemoryManager::PageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] DAddr de
[[maybe_unused]] const auto current_entry_type = GetEntry<false>(current_gpu_addr); [[maybe_unused]] const auto current_entry_type = GetEntry<false>(current_gpu_addr);
SetEntry<false>(current_gpu_addr, entry_type); SetEntry<false>(current_gpu_addr, entry_type);
if (current_entry_type != entry_type) { if (current_entry_type != entry_type) {
if constexpr (entry_type == EntryType::Mapped) {
const DAddr current_dev_addr = dev_addr + offset;
rasterizer->ModifyGPUMemory(unique_identifier, current_gpu_addr, page_size, current_dev_addr);
} else {
rasterizer->ModifyGPUMemory(unique_identifier, current_gpu_addr, page_size, 0u);
}
rasterizer->ModifyGPUMemory(unique_identifier, current_gpu_addr, page_size);
} }
if constexpr (entry_type == EntryType::Mapped) { if constexpr (entry_type == EntryType::Mapped) {
const DAddr current_dev_addr = dev_addr + offset; const DAddr current_dev_addr = dev_addr + offset;
@ -151,12 +146,7 @@ GPUVAddr MemoryManager::BigPageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] DAddr
[[maybe_unused]] const auto current_entry_type = GetEntry<true>(current_gpu_addr); [[maybe_unused]] const auto current_entry_type = GetEntry<true>(current_gpu_addr);
SetEntry<true>(current_gpu_addr, entry_type); SetEntry<true>(current_gpu_addr, entry_type);
if (current_entry_type != entry_type) { if (current_entry_type != entry_type) {
if constexpr (entry_type == EntryType::Mapped) {
const DAddr current_dev_addr = dev_addr + offset;
rasterizer->ModifyGPUMemory(unique_identifier, current_gpu_addr, page_size, current_dev_addr);
} else {
rasterizer->ModifyGPUMemory(unique_identifier, current_gpu_addr, big_page_size, 0u);
}
rasterizer->ModifyGPUMemory(unique_identifier, current_gpu_addr, big_page_size);
} }
if constexpr (entry_type == EntryType::Mapped) { if constexpr (entry_type == EntryType::Mapped) {
const DAddr current_dev_addr = dev_addr + offset; const DAddr current_dev_addr = dev_addr + offset;

2
src/video_core/rasterizer_interface.h

@ -123,7 +123,7 @@ public:
virtual void UnmapMemory(DAddr addr, u64 size) = 0; virtual void UnmapMemory(DAddr addr, u64 size) = 0;
/// Remap GPU memory range. This means underneath backing memory changed /// Remap GPU memory range. This means underneath backing memory changed
virtual void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size, DAddr d_addr) = 0;
virtual void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) = 0;
/// Notify rasterizer that any caches of the specified region should be flushed to Switch memory /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
/// and invalidated /// and invalidated

2
src/video_core/renderer_null/null_rasterizer.cpp

@ -65,7 +65,7 @@ VideoCore::RasterizerDownloadArea RasterizerNull::GetFlushArea(PAddr addr, u64 s
} }
void RasterizerNull::InvalidateGPUCache() {} void RasterizerNull::InvalidateGPUCache() {}
void RasterizerNull::UnmapMemory(DAddr addr, u64 size) {} void RasterizerNull::UnmapMemory(DAddr addr, u64 size) {}
void RasterizerNull::ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size, DAddr d_addr) {}
void RasterizerNull::ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) {}
void RasterizerNull::SignalFence(std::function<void()>&& func) { void RasterizerNull::SignalFence(std::function<void()>&& func) {
func(); func();
} }

2
src/video_core/renderer_null/null_rasterizer.h

@ -61,7 +61,7 @@ public:
VideoCore::RasterizerDownloadArea GetFlushArea(DAddr addr, u64 size) override; VideoCore::RasterizerDownloadArea GetFlushArea(DAddr addr, u64 size) override;
void InvalidateGPUCache() override; void InvalidateGPUCache() override;
void UnmapMemory(DAddr addr, u64 size) override; void UnmapMemory(DAddr addr, u64 size) override;
void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size, DAddr d_addr) override;
void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) override;
void SignalFence(std::function<void()>&& func) override; void SignalFence(std::function<void()>&& func) override;
void SyncOperation(std::function<void()>&& func) override; void SyncOperation(std::function<void()>&& func) override;
void SignalSyncPoint(u32 value) override; void SignalSyncPoint(u32 value) override;

4
src/video_core/renderer_opengl/gl_rasterizer.cpp

@ -600,10 +600,10 @@ void RasterizerOpenGL::UnmapMemory(DAddr addr, u64 size) {
shader_cache.OnCacheInvalidation(addr, size); shader_cache.OnCacheInvalidation(addr, size);
} }
void RasterizerOpenGL::ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size, DAddr d_addr) {
void RasterizerOpenGL::ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) {
{ {
std::scoped_lock lock{texture_cache.mutex}; std::scoped_lock lock{texture_cache.mutex};
texture_cache.UnmapGPUMemory(as_id, addr, size, d_addr);
texture_cache.UnmapGPUMemory(as_id, addr, size);
} }
} }

2
src/video_core/renderer_opengl/gl_rasterizer.h

@ -106,7 +106,7 @@ public:
bool OnCPUWrite(PAddr addr, u64 size) override; bool OnCPUWrite(PAddr addr, u64 size) override;
void InvalidateGPUCache() override; void InvalidateGPUCache() override;
void UnmapMemory(DAddr addr, u64 size) override; void UnmapMemory(DAddr addr, u64 size) override;
void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size, DAddr d_addr) override;
void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) override;
void SignalFence(std::function<void()>&& func) override; void SignalFence(std::function<void()>&& func) override;
void SyncOperation(std::function<void()>&& func) override; void SyncOperation(std::function<void()>&& func) override;
void SignalSyncPoint(u32 value) override; void SignalSyncPoint(u32 value) override;

4
src/video_core/renderer_vulkan/vk_rasterizer.cpp

@ -748,10 +748,10 @@ void RasterizerVulkan::UnmapMemory(DAddr addr, u64 size) {
pipeline_cache.OnCacheInvalidation(addr, size); pipeline_cache.OnCacheInvalidation(addr, size);
} }
void RasterizerVulkan::ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size, DAddr d_addr) {
void RasterizerVulkan::ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) {
{ {
std::scoped_lock lock{texture_cache.mutex}; std::scoped_lock lock{texture_cache.mutex};
texture_cache.UnmapGPUMemory(as_id, addr, size, d_addr);
texture_cache.UnmapGPUMemory(as_id, addr, size);
} }
} }

2
src/video_core/renderer_vulkan/vk_rasterizer.h

@ -108,7 +108,7 @@ public:
bool OnCPUWrite(DAddr addr, u64 size) override; bool OnCPUWrite(DAddr addr, u64 size) override;
void InvalidateGPUCache() override; void InvalidateGPUCache() override;
void UnmapMemory(DAddr addr, u64 size) override; void UnmapMemory(DAddr addr, u64 size) override;
void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size, DAddr d_addr) override;
void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) override;
void SignalFence(std::function<void()>&& func) override; void SignalFence(std::function<void()>&& func) override;
void SyncOperation(std::function<void()>&& func) override; void SyncOperation(std::function<void()>&& func) override;
void SignalSyncPoint(u32 value) override; void SignalSyncPoint(u32 value) override;

4
src/video_core/renderer_vulkan/vk_scheduler.cpp

@ -44,14 +44,14 @@ Scheduler::Scheduler(const Device& device_, StateTracker& state_tracker_)
master_semaphore{std::make_unique<MasterSemaphore>(device)}, master_semaphore{std::make_unique<MasterSemaphore>(device)},
command_pool{std::make_unique<CommandPool>(*master_semaphore, device)} { command_pool{std::make_unique<CommandPool>(*master_semaphore, device)} {
// PRE-OPTIMIZATION: Warm up the pool to prevent mid-frame spikes
/*// PRE-OPTIMIZATION: Warm up the pool to prevent mid-frame spikes
{ {
std::scoped_lock rl{reserve_mutex}; std::scoped_lock rl{reserve_mutex};
chunk_reserve.reserve(2048); // Prevent vector resizing chunk_reserve.reserve(2048); // Prevent vector resizing
for (int i = 0; i < 1024; ++i) { for (int i = 0; i < 1024; ++i) {
chunk_reserve.push_back(std::make_unique<CommandChunk>()); chunk_reserve.push_back(std::make_unique<CommandChunk>());
} }
}
}*/
AcquireNewChunk(); AcquireNewChunk();
AllocateWorkerCommandBuffer(); AllocateWorkerCommandBuffer();

11
src/video_core/texture_cache/image_base.h

@ -55,13 +55,6 @@ struct AliasedImage {
ImageId id; ImageId id;
}; };
struct SparseBinding {
GPUVAddr gpu_addr; // Virtual GPU address of this tile
DAddr device_addr; // Physical device memory address
u64 tile_index; // Linear tile index in the texture
Extent3D tile_coord; // 3D coordinate of this tile
};
struct NullImageParams {}; struct NullImageParams {};
struct ImageBase { struct ImageBase {
@ -125,10 +118,6 @@ struct ImageBase {
std::vector<AliasedImage> aliased_images; std::vector<AliasedImage> aliased_images;
std::vector<ImageId> overlapping_images; std::vector<ImageId> overlapping_images;
ImageMapId map_view_id{}; ImageMapId map_view_id{};
boost::container::small_vector<u64, 16> dirty_offsets;
std::unordered_map<GPUVAddr, SparseBinding> sparse_bindings;
u32 sparse_tile_size = 65536;
}; };
struct ImageMapView { struct ImageMapView {

51
src/video_core/texture_cache/texture_cache.h

@ -143,7 +143,8 @@ void TextureCache<P>::RunGarbageCollector() {
} }
// Prioritize large sparse textures for cleanup // Prioritize large sparse textures for cleanup
const bool is_large_sparse = image.info.is_sparse &&
const bool is_large_sparse = lowmemorydevice &&
image.info.is_sparse &&
image.guest_size_bytes >= 256_MiB; image.guest_size_bytes >= 256_MiB;
if (!aggressive_mode && !is_large_sparse && if (!aggressive_mode && !is_large_sparse &&
@ -192,7 +193,8 @@ void TextureCache<P>::RunGarbageCollector() {
lru_cache.ForEachItemBelow(frame_tick, [&](ImageId image_id) { lru_cache.ForEachItemBelow(frame_tick, [&](ImageId image_id) {
auto& image = slot_images[image_id]; auto& image = slot_images[image_id];
// Only target sparse textures that are old enough // Only target sparse textures that are old enough
if (image.info.is_sparse &&
if (lowmemorydevice &&
image.info.is_sparse &&
image.guest_size_bytes >= 256_MiB && image.guest_size_bytes >= 256_MiB &&
image.allocation_tick < frame_tick - 3) { image.allocation_tick < frame_tick - 3) {
LOG_DEBUG(HW_GPU, "GC targeting old sparse texture at 0x{:X} ({} MiB, age: {} frames)", LOG_DEBUG(HW_GPU, "GC targeting old sparse texture at 0x{:X} ({} MiB, age: {} frames)",
@ -683,36 +685,7 @@ void TextureCache<P>::UnmapMemory(DAddr cpu_addr, size_t size) {
} }
template <class P> template <class P>
std::optional<SparseBinding> TextureCache<P>::CalculateSparseBinding(
const Image& image, GPUVAddr gpu_addr, DAddr dev_addr) {
if (!image.info.is_sparse) {
return std::nullopt;
}
const u64 offset = gpu_addr - image.gpu_addr;
const u64 tile_index = offset / image.sparse_tile_size;
const u32 tile_width_blocks = 128;
const u32 tile_height_blocks = 32;
const u32 width_in_tiles = (image.info.size.width / 4 + tile_width_blocks - 1) / tile_width_blocks;
const u32 height_in_tiles = (image.info.size.height / 4 + tile_height_blocks - 1) / tile_height_blocks;
const u32 tile_x = static_cast<u32>((tile_index % width_in_tiles) * tile_width_blocks * 4);
const u32 tile_y = static_cast<u32>(((tile_index / width_in_tiles) % height_in_tiles) * tile_height_blocks * 4);
const u32 tile_z = static_cast<u32>(tile_index / (width_in_tiles * height_in_tiles));
return SparseBinding{
.gpu_addr = gpu_addr,
.device_addr = dev_addr,
.tile_index = tile_index,
.tile_coord = {tile_x, tile_y, tile_z}
};
}
template <class P>
void TextureCache<P>::UnmapGPUMemory(size_t as_id, GPUVAddr gpu_addr, size_t size, DAddr dev_addr) {
void TextureCache<P>::UnmapGPUMemory(size_t as_id, GPUVAddr gpu_addr, size_t size) {
boost::container::small_vector<ImageId, 16> deleted_images; boost::container::small_vector<ImageId, 16> deleted_images;
ForEachImageInRegionGPU(as_id, gpu_addr, size, ForEachImageInRegionGPU(as_id, gpu_addr, size,
[&](ImageId id, Image&) { deleted_images.push_back(id); }); [&](ImageId id, Image&) { deleted_images.push_back(id); });
@ -728,15 +701,6 @@ void TextureCache<P>::UnmapGPUMemory(size_t as_id, GPUVAddr gpu_addr, size_t siz
continue; continue;
} }
image.flags |= ImageFlagBits::Remapped; image.flags |= ImageFlagBits::Remapped;
if (image.info.is_sparse && dev_addr != 0) {
// Calculate and store the binding
auto binding = CalculateSparseBinding(image, gpu_addr, dev_addr);
if (binding) {
image.sparse_bindings[gpu_addr] = *binding;
image.dirty_offsets.push_back(binding->tile_index);
}
}
} }
} }
@ -1587,7 +1551,7 @@ ImageId TextureCache<P>::InsertImage(const ImageInfo& info, GPUVAddr gpu_addr,
ASSERT_MSG(cpu_addr, "Tried to insert an image to an invalid gpu_addr=0x{:x}", gpu_addr); ASSERT_MSG(cpu_addr, "Tried to insert an image to an invalid gpu_addr=0x{:x}", gpu_addr);
// For large sparse textures, aggressively clean up old allocations at same address // For large sparse textures, aggressively clean up old allocations at same address
if (info.is_sparse && CalculateGuestSizeInBytes(info) >= 256_MiB) {
if (lowmemorydevice && info.is_sparse && CalculateGuestSizeInBytes(info) >= 256_MiB) {
const auto alloc_it = image_allocs_table.find(gpu_addr); const auto alloc_it = image_allocs_table.find(gpu_addr);
if (alloc_it != image_allocs_table.end()) { if (alloc_it != image_allocs_table.end()) {
const ImageAllocId alloc_id = alloc_it->second; const ImageAllocId alloc_id = alloc_it->second;
@ -1635,7 +1599,7 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, DA
const size_t size_bytes = CalculateGuestSizeInBytes(new_info); const size_t size_bytes = CalculateGuestSizeInBytes(new_info);
// Proactive cleanup for large sparse texture allocations // Proactive cleanup for large sparse texture allocations
if (new_info.is_sparse && size_bytes >= 256_MiB) {
if (lowmemorydevice && new_info.is_sparse && size_bytes >= 256_MiB) {
const u64 estimated_alloc_size = size_bytes; const u64 estimated_alloc_size = size_bytes;
if (total_used_memory + estimated_alloc_size >= critical_memory) { if (total_used_memory + estimated_alloc_size >= critical_memory) {
@ -2690,7 +2654,6 @@ void TextureCache<P>::SynchronizeAliases(ImageId image_id) {
template <class P> template <class P>
void TextureCache<P>::PrepareImage(ImageId image_id, bool is_modification, bool invalidate) { void TextureCache<P>::PrepareImage(ImageId image_id, bool is_modification, bool invalidate) {
Image& image = slot_images[image_id]; Image& image = slot_images[image_id];
runtime.TransitionImageLayout(image);
if (invalidate) { if (invalidate) {
image.flags &= ~(ImageFlagBits::CpuModified | ImageFlagBits::GpuModified); image.flags &= ~(ImageFlagBits::CpuModified | ImageFlagBits::GpuModified);
if (False(image.flags & ImageFlagBits::Tracked)) { if (False(image.flags & ImageFlagBits::Tracked)) {

6
src/video_core/texture_cache/texture_cache_base.h

@ -223,11 +223,7 @@ public:
void UnmapMemory(DAddr cpu_addr, size_t size); void UnmapMemory(DAddr cpu_addr, size_t size);
/// Remove images in a region /// Remove images in a region
void UnmapGPUMemory(size_t as_id, GPUVAddr gpu_addr, size_t size, DAddr dev_addr);
/// Basic sparse binding
std::optional<SparseBinding> CalculateSparseBinding(
const Image& image, GPUVAddr gpu_addr, DAddr dev_addr);
void UnmapGPUMemory(size_t as_id, GPUVAddr gpu_addr, size_t size);
/// Blit an image with the given parameters /// Blit an image with the given parameters
bool BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, bool BlitImage(const Tegra::Engines::Fermi2D::Surface& dst,

Loading…
Cancel
Save