Fix conflicts after new VMA, restore some lost fixes for A6XX

6 months ago · dfeeab965c
1 changed files with 15 additions and 233 deletions
--- a/src/video_core/vulkan_common/vulkan_memory_allocator.cpp
+++ b/src/video_core/vulkan_common/vulkan_memory_allocator.cpp
@ -239,7 +239,11 @@ namespace Vulkan {
        VmaAllocation allocation{};
        VkMemoryPropertyFlags property_flags{};

-        vk::Check(vmaCreateBuffer(allocator, &ci, &alloc_ci, &handle, &allocation, &alloc_info));
+        VkResult result = vmaCreateBuffer(allocator, &ci, &alloc_ci, &handle, &allocation, &alloc_info);
+        if (result == VK_ERROR_OUT_OF_DEVICE_MEMORY) {
+            LOG_ERROR(Render_Vulkan, "Out of memory creating buffer (size: {})", ci.size);
+        }
+        vk::Check(result);
        vmaGetAllocationMemoryProperties(allocator, allocation, &property_flags);

        u8 *data = reinterpret_cast<u8 *>(alloc_info.pMappedData);
@ -253,30 +257,36 @@ namespace Vulkan {

    MemoryCommit MemoryAllocator::Commit(const VkMemoryRequirements &reqs, MemoryUsage usage)
    {
+        // Adreno stands firm - ensure 4KB alignment for Qualcomm GPUs
+        VkMemoryRequirements adjusted_reqs = reqs;
+        if (device.GetDriverID() == VK_DRIVER_ID_QUALCOMM_PROPRIETARY) {
+            adjusted_reqs.size = Common::AlignUp(reqs.size, 4096);
+        }
+
        const auto vma_usage = MemoryUsageVma(usage);
        VmaAllocationCreateInfo ci{};
        ci.flags = VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT | MemoryUsageVmaFlags(usage);
        ci.usage = vma_usage;
-        ci.memoryTypeBits = reqs.memoryTypeBits & valid_memory_types;
+        ci.memoryTypeBits = adjusted_reqs.memoryTypeBits & valid_memory_types;
        ci.requiredFlags = 0;
        ci.preferredFlags = MemoryUsagePreferredVmaFlags(usage);

        VmaAllocation a{};
        VmaAllocationInfo info{};

-        VkResult res = vmaAllocateMemory(allocator, &reqs, &ci, &a, &info);
+        VkResult res = vmaAllocateMemory(allocator, &adjusted_reqs, &ci, &a, &info);

        if (res != VK_SUCCESS) {
            // Relax 1: drop budget constraint
            auto ci2 = ci;
            ci2.flags &= ~VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT;
-            res = vmaAllocateMemory(allocator, &reqs, &ci2, &a, &info);
+            res = vmaAllocateMemory(allocator, &adjusted_reqs, &ci2, &a, &info);

            // Relax 2: if we preferred DEVICE_LOCAL, drop that preference
            if (res != VK_SUCCESS && (ci.preferredFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT)) {
                auto ci3 = ci2;
                ci3.preferredFlags &= ~VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
-                res = vmaAllocateMemory(allocator, &reqs, &ci3, &a, &info);
+                res = vmaAllocateMemory(allocator, &adjusted_reqs, &ci3, &a, &info);
            }
        }

@ -322,234 +332,6 @@ namespace Vulkan {
        return MemoryCommit(allocator, a, info);
    }

-    MemoryAllocator* const allocator;           ///< Parent memory allocation.
-    const vk::DeviceMemory memory;              ///< Vulkan memory allocation handler.
-    const u64 allocation_size;                  ///< Size of this allocation.
-    const VkMemoryPropertyFlags property_flags; ///< Vulkan memory property flags.
-    const u32 shifted_memory_type;              ///< Shifted Vulkan memory type.
-    std::vector<Range> commits;                 ///< All commit ranges done from this allocation.
-    std::span<u8> memory_mapped_span; ///< Memory mapped span. Empty if not queried before.
-};
-
-MemoryCommit::MemoryCommit(MemoryAllocation* allocation_, VkDeviceMemory memory_, u64 begin_,
-                           u64 end_) noexcept
-    : allocation{allocation_}, memory{memory_}, begin{begin_}, end{end_} {}
-
-MemoryCommit::~MemoryCommit() {
-    Release();
-}
-
-MemoryCommit& MemoryCommit::operator=(MemoryCommit&& rhs) noexcept {
-    Release();
-    allocation = std::exchange(rhs.allocation, nullptr);
-    memory = rhs.memory;
-    begin = rhs.begin;
-    end = rhs.end;
-    span = std::exchange(rhs.span, std::span<u8>{});
-    return *this;
-}
-
-MemoryCommit::MemoryCommit(MemoryCommit&& rhs) noexcept
-    : allocation{std::exchange(rhs.allocation, nullptr)}, memory{rhs.memory}, begin{rhs.begin},
-      end{rhs.end}, span{std::exchange(rhs.span, std::span<u8>{})} {}
-
-std::span<u8> MemoryCommit::Map() {
-    if (span.empty()) {
-        span = allocation->Map().subspan(begin, end - begin);
-    }
-    return span;
-}
-
-void MemoryCommit::Release() {
-    if (allocation) {
-        allocation->Free(begin);
-    }
-}
-
-MemoryAllocator::MemoryAllocator(const Device& device_)
-    : device{device_}, allocator{device.GetAllocator()},
-      properties{device_.GetPhysical().GetMemoryProperties().memoryProperties},
-      buffer_image_granularity{
-          device_.GetPhysical().GetProperties().limits.bufferImageGranularity} {
-    // GPUs not supporting rebar may only have a region with less than 256MB host visible/device
-    // local memory. In that case, opening 2 RenderDoc captures side-by-side is not possible due to
-    // the heap running out of memory. With RenderDoc attached and only a small host/device region,
-    // only allow the stream buffer in this memory heap.
-    if (device.HasDebuggingToolAttached()) {
-        using namespace Common::Literals;
-        ForEachDeviceLocalHostVisibleHeap(device, [this](size_t index, VkMemoryHeap& heap) {
-            if (heap.size <= 256_MiB) {
-                valid_memory_types &= ~(1u << index);
-            }
-        });
-    }
-}
-
-MemoryAllocator::~MemoryAllocator() = default;
-
-vk::Image MemoryAllocator::CreateImage(const VkImageCreateInfo& ci) const {
-    const VmaAllocationCreateInfo alloc_ci = {
-        .flags = VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT,
-        .usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE,
-        .requiredFlags = 0,
-        .preferredFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
-        .memoryTypeBits = 0,
-        .pool = VK_NULL_HANDLE,
-        .pUserData = nullptr,
-        .priority = 0.f,
-    };
-
-    VkImage handle{};
-    VmaAllocation allocation{};
-
-    vk::Check(vmaCreateImage(allocator, &ci, &alloc_ci, &handle, &allocation, nullptr));
-
-    return vk::Image(handle, ci.usage, *device.GetLogical(), allocator, allocation,
-                     device.GetDispatchLoader());
-}
-
-vk::Buffer MemoryAllocator::CreateBuffer(const VkBufferCreateInfo& ci, MemoryUsage usage) const {
-    const VmaAllocationCreateInfo alloc_ci = {
-        .flags = VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT | MemoryUsageVmaFlags(usage),
-        .usage = MemoryUsageVma(usage),
-        .requiredFlags = 0,
-        .preferredFlags = MemoryUsagePreferredVmaFlags(usage),
-        .memoryTypeBits = usage == MemoryUsage::Stream ? 0u : valid_memory_types,
-        .pool = VK_NULL_HANDLE,
-        .pUserData = nullptr,
-        .priority = 0.f,
-    };
-
-    VkBuffer handle{};
-    VmaAllocationInfo alloc_info{};
-    VmaAllocation allocation{};
-    VkMemoryPropertyFlags property_flags{};
-
-    VkResult result = vmaCreateBuffer(allocator, &ci, &alloc_ci, &handle, &allocation, &alloc_info);
-    if (result == VK_ERROR_OUT_OF_DEVICE_MEMORY) {
-        LOG_ERROR(Render_Vulkan, "Out of memory creating buffer (size: {})", ci.size);
-    }
-    vmaGetAllocationMemoryProperties(allocator, allocation, &property_flags);
-
-    u8* data = reinterpret_cast<u8*>(alloc_info.pMappedData);
-    const std::span<u8> mapped_data = data ? std::span<u8>{data, ci.size} : std::span<u8>{};
-    const bool is_coherent = property_flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
-
-    return vk::Buffer(handle, *device.GetLogical(), allocator, allocation, mapped_data, is_coherent,
-                      device.GetDispatchLoader());
-}
-
-MemoryCommit MemoryAllocator::Commit(const VkMemoryRequirements& requirements, MemoryUsage usage) {
-        // Find the fastest memory flags we can afford with the current requirements
-        const u32 type_mask = requirements.memoryTypeBits;
-        const VkMemoryPropertyFlags usage_flags = MemoryUsagePropertyFlags(usage);
-        const VkMemoryPropertyFlags flags = MemoryPropertyFlags(type_mask, usage_flags);
-        if (std::optional<MemoryCommit> commit = TryCommit(requirements, flags)) {
-            return std::move(*commit);
-        }
-        // Commit has failed, allocate more memory.
-        const u64 chunk_size = AllocationChunkSize(requirements.size);
-        if (!TryAllocMemory(flags, type_mask, chunk_size)) {
-            // TODO(Rodrigo): Handle out of memory situations in some way like flushing to guest memory.
-            throw vk::Exception(VK_ERROR_OUT_OF_DEVICE_MEMORY);
-        }
-        // Commit again, this time it won't fail since there's a fresh allocation above.
-        // If it does, there's a bug.
-        return TryCommit(requirements, flags).value();
-    }
-
-bool MemoryAllocator::TryAllocMemory(VkMemoryPropertyFlags flags, u32 type_mask, u64 size) {
-    const auto type_opt = FindType(flags, type_mask);
-    if (!type_opt) {
-        return false;
-    }
-
-    // Adreno stands firm
-    const u64 aligned_size = (device.GetDriverID() == VK_DRIVER_ID_QUALCOMM_PROPRIETARY) ?
-                            Common::AlignUp(size, 4096) :
-                            size;

-    vk::DeviceMemory memory = device.GetLogical().TryAllocateMemory({
-        .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
-        .pNext = nullptr,
-        .allocationSize = aligned_size,
-        .memoryTypeIndex = *type_opt,
-    });
-
-    if (!memory) {
-        return false;
-    }
-
-    allocations.push_back(
-        std::make_unique<MemoryAllocation>(this, std::move(memory), flags, aligned_size, *type_opt));
-    return true;
-}
-
-void MemoryAllocator::ReleaseMemory(MemoryAllocation* alloc) {
-    const auto it = std::ranges::find(allocations, alloc, &std::unique_ptr<MemoryAllocation>::get);
-    ASSERT(it != allocations.end());
-    allocations.erase(it);
-}
-
-std::optional<MemoryCommit> MemoryAllocator::TryCommit(const VkMemoryRequirements& requirements,
-                                                       VkMemoryPropertyFlags flags) {
-    // Conservative, spec-compliant alignment for suballocation
-    VkDeviceSize eff_align = requirements.alignment;
-    const auto& limits = device.GetPhysical().GetProperties().limits;
-    if ((flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) &&
-        !(flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) {
-        // Non-coherent memory must be invalidated on atom boundary
-        if (limits.nonCoherentAtomSize > eff_align) eff_align = limits.nonCoherentAtomSize;
-    }
-    // Separate buffers to avoid stalls on tilers
-    if (buffer_image_granularity > eff_align) {
-        eff_align = buffer_image_granularity;
-    }
-    eff_align = std::bit_ceil(eff_align);
-
-    for (auto& allocation : allocations) {
-        if (!allocation->IsCompatible(flags, requirements.memoryTypeBits)) {
-            continue;
-        }
-        if (auto commit = allocation->Commit(requirements.size, eff_align)) {
-            return commit;
-        }
-    }
-    if ((flags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) != 0) {
-        // Look for non device local commits on failure
-        return TryCommit(requirements, flags & ~VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
-    }
-    return std::nullopt;
-}
-
-VkMemoryPropertyFlags MemoryAllocator::MemoryPropertyFlags(u32 type_mask,
-                                                           VkMemoryPropertyFlags flags) const {
-    if (FindType(flags, type_mask)) {
-        // Found a memory type with those requirements
-        return flags;
-    }
-    if ((flags & VK_MEMORY_PROPERTY_HOST_CACHED_BIT) != 0) {
-        // Remove host cached bit in case it's not supported
-        return MemoryPropertyFlags(type_mask, flags & ~VK_MEMORY_PROPERTY_HOST_CACHED_BIT);
-    }
-    if ((flags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) != 0) {
-        // Remove device local, if it's not supported by the requested resource
-        return MemoryPropertyFlags(type_mask, flags & ~VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
-    }
-    ASSERT_MSG(false, "No compatible memory types found");
-    return 0;
-}
-
-std::optional<u32> MemoryAllocator::FindType(VkMemoryPropertyFlags flags, u32 type_mask) const {
-    for (u32 type_index = 0; type_index < properties.memoryTypeCount; ++type_index) {
-        const VkMemoryPropertyFlags type_flags = properties.memoryTypes[type_index].propertyFlags;
-        if ((type_mask & (1U << type_index)) != 0 && (type_flags & flags) == flags) {
-            // The type matches in type and in the wanted properties.
-            return type_index;
-        }
-    }
-    // Failed to find index
-    return std::nullopt;
-}

 } // namespace Vulkan