diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp index 5938de6100..c7dd806570 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp @@ -321,7 +321,7 @@ std::pair Uint8Pass::Assemble(u32 num_vertices, VkBuffer const u32 staging_size = static_cast(num_vertices * sizeof(u16)); const auto staging = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal); - compute_pass_descriptor_queue.Acquire(); + compute_pass_descriptor_queue.Acquire(2); compute_pass_descriptor_queue.AddBuffer(src_buffer, src_offset, num_vertices); compute_pass_descriptor_queue.AddBuffer(staging.buffer, staging.offset, staging_size); const void* const descriptor_data{compute_pass_descriptor_queue.UpdateData()}; @@ -379,7 +379,7 @@ std::pair QuadIndexedPass::Assemble( const std::size_t staging_size = num_tri_vertices * sizeof(u32); const auto staging = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal); - compute_pass_descriptor_queue.Acquire(); + compute_pass_descriptor_queue.Acquire(2); compute_pass_descriptor_queue.AddBuffer(src_buffer, src_offset, input_size); compute_pass_descriptor_queue.AddBuffer(staging.buffer, staging.offset, staging_size); const void* const descriptor_data{compute_pass_descriptor_queue.UpdateData()}; @@ -420,7 +420,7 @@ void ConditionalRenderingResolvePass::Resolve(VkBuffer dst_buffer, VkBuffer src_ u32 src_offset, bool compare_to_zero) { const size_t compare_size = compare_to_zero ? 8 : 24; - compute_pass_descriptor_queue.Acquire(); + compute_pass_descriptor_queue.Acquire(2); compute_pass_descriptor_queue.AddBuffer(src_buffer, src_offset, compare_size); compute_pass_descriptor_queue.AddBuffer(dst_buffer, 0, sizeof(u32)); const void* const descriptor_data{compute_pass_descriptor_queue.UpdateData()}; @@ -476,7 +476,7 @@ void QueriesPrefixScanPass::Run(VkBuffer accumulation_buffer, VkBuffer dst_buffe static constexpr size_t DISPATCH_SIZE = 2048U; size_t runs_to_do = std::min(current_runs, DISPATCH_SIZE); current_runs -= runs_to_do; - compute_pass_descriptor_queue.Acquire(); + compute_pass_descriptor_queue.Acquire(3); compute_pass_descriptor_queue.AddBuffer(src_buffer, 0, number_of_sums * sizeof(u64)); compute_pass_descriptor_queue.AddBuffer(dst_buffer, 0, number_of_sums * sizeof(u64)); compute_pass_descriptor_queue.AddBuffer(accumulation_buffer, 0, sizeof(u64)); @@ -583,7 +583,7 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map, const u32 num_dispatches_y = Common::DivCeil(swizzle.num_tiles.height, 8U); const u32 num_dispatches_z = image.info.resources.layers; - compute_pass_descriptor_queue.Acquire(); + compute_pass_descriptor_queue.Acquire(2); compute_pass_descriptor_queue.AddBuffer(map.buffer, input_offset, image.guest_size_bytes - swizzle.buffer_offset); compute_pass_descriptor_queue.AddImage(image.StorageImageView(swizzle.level)); @@ -688,7 +688,7 @@ void MSAACopyPass::CopyImage(Image& dst_image, Image& src_image, ASSERT(copy.dst_subresource.base_layer == 0); ASSERT(copy.dst_subresource.num_layers == 1); - compute_pass_descriptor_queue.Acquire(); + compute_pass_descriptor_queue.Acquire(2); compute_pass_descriptor_queue.AddImage( src_image.StorageImageView(copy.src_subresource.base_level)); compute_pass_descriptor_queue.AddImage( diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 2d9c5d4148..a673d69abf 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -102,7 +102,16 @@ ComputePipeline::ComputePipeline(const Device& device_, vk::PipelineCache& pipel void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, Tegra::MemoryManager& gpu_memory, Scheduler& scheduler, BufferCache& buffer_cache, TextureCache& texture_cache) { - guest_descriptor_queue.Acquire(); + { + size_t required_entries = 0; + required_entries += Shader::NumDescriptors(info.constant_buffer_descriptors); + required_entries += Shader::NumDescriptors(info.storage_buffers_descriptors); + required_entries += Shader::NumDescriptors(info.texture_buffer_descriptors); + required_entries += Shader::NumDescriptors(info.image_buffer_descriptors); + required_entries += Shader::NumDescriptors(info.texture_descriptors); + required_entries += Shader::NumDescriptors(info.image_descriptors); + guest_descriptor_queue.Acquire(required_entries); + } buffer_cache.SetComputeUniformBufferState(info.constant_buffer_mask, &uniform_buffer_sizes); buffer_cache.UnbindComputeStorageBuffers(); diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 745389213e..e8a3e345ba 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -460,7 +460,25 @@ bool GraphicsPipeline::ConfigureImpl(bool is_indexed) { buffer_cache.UpdateGraphicsBuffers(is_indexed); buffer_cache.BindHostGeometryBuffers(is_indexed); - guest_descriptor_queue.Acquire(); + // Compute exact number of descriptor entries required for this draw. + size_t required_entries = 0; + const auto acc_descriptors = [](const Shader::Info& info) -> size_t { + size_t n = 0; + n += Shader::NumDescriptors(info.constant_buffer_descriptors); + n += Shader::NumDescriptors(info.storage_buffers_descriptors); + n += Shader::NumDescriptors(info.texture_buffer_descriptors); + n += Shader::NumDescriptors(info.image_buffer_descriptors); + n += Shader::NumDescriptors(info.texture_descriptors); + n += Shader::NumDescriptors(info.image_descriptors); + return n; + }; + if constexpr (Spec::enabled_stages[0]) required_entries += acc_descriptors(stage_infos[0]); + if constexpr (Spec::enabled_stages[1]) required_entries += acc_descriptors(stage_infos[1]); + if constexpr (Spec::enabled_stages[2]) required_entries += acc_descriptors(stage_infos[2]); + if constexpr (Spec::enabled_stages[3]) required_entries += acc_descriptors(stage_infos[3]); + if constexpr (Spec::enabled_stages[4]) required_entries += acc_descriptors(stage_infos[4]); + + guest_descriptor_queue.Acquire(required_entries); RescalingPushConstant rescaling; RenderAreaPushConstant render_area; diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp index 0630ebda5e..5e238b33c9 100644 --- a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp +++ b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp @@ -28,17 +28,21 @@ void UpdateDescriptorQueue::TickFrame() { payload_cursor = payload_start; } -void UpdateDescriptorQueue::Acquire() { - // Minimum number of entries required. - // This is the maximum number of entries a single draw call might use. - static constexpr size_t MIN_ENTRIES = 0x400; - - if (std::distance(payload_start, payload_cursor) + MIN_ENTRIES >= FRAME_PAYLOAD_SIZE) { - LOG_WARNING(Render_Vulkan, "Payload overflow, waiting for worker thread"); +void UpdateDescriptorQueue::Acquire(size_t required_entries) { + const size_t used = static_cast(std::distance(payload_start, payload_cursor)); + if (used + required_entries > FRAME_PAYLOAD_SIZE) { + LOG_WARNING(Render_Vulkan, "Descriptor payload near overflow (used={} req={}), waiting", + used, required_entries); scheduler.WaitWorker(); payload_cursor = payload_start; } upload_start = payload_cursor; } +void UpdateDescriptorQueue::Acquire() { + // Conservative legacy reservation for backward callers. + static constexpr size_t MIN_ENTRIES = 0x400; + Acquire(MIN_ENTRIES); +} + } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.h b/src/video_core/renderer_vulkan/vk_update_descriptor.h index 82fce298da..c791ee543d 100644 --- a/src/video_core/renderer_vulkan/vk_update_descriptor.h +++ b/src/video_core/renderer_vulkan/vk_update_descriptor.h @@ -4,6 +4,7 @@ #pragma once #include +#include "common/assert.h" #include "video_core/vulkan_common/vulkan_wrapper.h" @@ -41,6 +42,11 @@ public: void TickFrame(); + // Ensure the queue has at least 'required_entries' free slots for this draw/dispatch. + // Prefer using this overload to avoid underestimations that can cause overflows. + void Acquire(size_t required_entries); + + // Legacy fallback that reserves a conservative number of entries. void Acquire(); const DescriptorUpdateEntry* UpdateData() const noexcept { @@ -48,6 +54,8 @@ public: } void AddSampledImage(VkImageView image_view, VkSampler sampler) { + ASSERT(static_cast(std::distance(payload_start, payload_cursor)) < + FRAME_PAYLOAD_SIZE); *(payload_cursor++) = VkDescriptorImageInfo{ .sampler = sampler, .imageView = image_view, @@ -56,6 +64,8 @@ public: } void AddImage(VkImageView image_view) { + ASSERT(static_cast(std::distance(payload_start, payload_cursor)) < + FRAME_PAYLOAD_SIZE); *(payload_cursor++) = VkDescriptorImageInfo{ .sampler = VK_NULL_HANDLE, .imageView = image_view, @@ -64,6 +74,8 @@ public: } void AddBuffer(VkBuffer buffer, VkDeviceSize offset, VkDeviceSize size) { + ASSERT(static_cast(std::distance(payload_start, payload_cursor)) < + FRAME_PAYLOAD_SIZE); *(payload_cursor++) = VkDescriptorBufferInfo{ .buffer = buffer, .offset = offset, @@ -72,6 +84,8 @@ public: } void AddTexelBuffer(VkBufferView texel_buffer) { + ASSERT(static_cast(std::distance(payload_start, payload_cursor)) < + FRAME_PAYLOAD_SIZE); *(payload_cursor++) = texel_buffer; }