diff --git a/src/video_core/renderer_vulkan/blit_image.cpp b/src/video_core/renderer_vulkan/blit_image.cpp index 68543bdd48..e8569672bf 100644 --- a/src/video_core/renderer_vulkan/blit_image.cpp +++ b/src/video_core/renderer_vulkan/blit_image.cpp @@ -46,7 +46,7 @@ namespace Vulkan { using VideoCommon::ImageViewType; namespace { - + [[nodiscard]] VkImageAspectFlags AspectMaskFromFormat(VideoCore::Surface::PixelFormat format) { using VideoCore::Surface::SurfaceType; switch (VideoCore::Surface::GetFormatType(format)) { @@ -307,11 +307,12 @@ constexpr std::array MakeStages( } void UpdateOneTextureDescriptorSet(const Device& device, VkDescriptorSet descriptor_set, - VkSampler sampler, VkImageView image_view) { + VkSampler sampler, VkImageView image_view, + VkImageLayout layout = VK_IMAGE_LAYOUT_GENERAL) { const VkDescriptorImageInfo image_info{ .sampler = sampler, .imageView = image_view, - .imageLayout = VK_IMAGE_LAYOUT_GENERAL, + .imageLayout = layout, }; const VkWriteDescriptorSet write_descriptor_set{ .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, @@ -330,16 +331,17 @@ void UpdateOneTextureDescriptorSet(const Device& device, VkDescriptorSet descrip void UpdateTwoTexturesDescriptorSet(const Device& device, VkDescriptorSet descriptor_set, VkSampler sampler, VkImageView image_view_0, - VkImageView image_view_1) { + VkImageView image_view_1, + VkImageLayout layout = VK_IMAGE_LAYOUT_GENERAL) { const VkDescriptorImageInfo image_info_0{ .sampler = sampler, .imageView = image_view_0, - .imageLayout = VK_IMAGE_LAYOUT_GENERAL, + .imageLayout = layout, }; const VkDescriptorImageInfo image_info_1{ .sampler = sampler, .imageView = image_view_1, - .imageLayout = VK_IMAGE_LAYOUT_GENERAL, + .imageLayout = layout, }; const std::array write_descriptor_sets{ VkWriteDescriptorSet{ @@ -454,7 +456,7 @@ void RecordShaderReadBarrier(Scheduler& scheduler, const ImageView& image_view) const VkImageSubresourceRange subresource_range = SubresourceRangeFromView(image_view); scheduler.RequestOutsideRenderPassOperationContext(); scheduler.Record([image, subresource_range](vk::CommandBuffer cmdbuf) { - const VkImageMemoryBarrier barrier{ + const VkImageMemoryBarrier barrier_to_read{ .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, .pNext = nullptr, .srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | @@ -463,23 +465,49 @@ void RecordShaderReadBarrier(Scheduler& scheduler, const ImageView& image_view) VK_ACCESS_TRANSFER_WRITE_BIT, .dstAccessMask = VK_ACCESS_SHADER_READ_BIT, .oldLayout = VK_IMAGE_LAYOUT_GENERAL, + .newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = image, + .subresourceRange = subresource_range, + }; + + const VkImageMemoryBarrier barrier_to_general{ + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = VK_ACCESS_SHADER_READ_BIT, + .dstAccessMask = VK_ACCESS_SHADER_READ_BIT, + .oldLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, .newLayout = VK_IMAGE_LAYOUT_GENERAL, .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .image = image, .subresourceRange = subresource_range, }; + cmdbuf.PipelineBarrier( VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | + VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | + VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT | + VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT | + VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT | VK_PIPELINE_STAGE_TRANSFER_BIT | VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, - barrier); + barrier_to_read); + + cmdbuf.PipelineBarrier( + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + 0, + barrier_to_general); }); } @@ -595,13 +623,16 @@ void BlitImageHelper::BlitColor(const Framebuffer* dst_framebuffer, VkImageView TransitionImageLayout(cmdbuf, src_image, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); BeginRenderPass(cmdbuf, dst_framebuffer); const VkDescriptorSet descriptor_set = one_texture_descriptor_allocator.Commit(); - UpdateOneTextureDescriptorSet(device, descriptor_set, src_sampler, src_image_view); + UpdateOneTextureDescriptorSet(device, descriptor_set, src_sampler, src_image_view, + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set, nullptr); BindBlitState(cmdbuf, layout, dst_region, src_region, src_size); cmdbuf.Draw(3, 1, 0, 0); cmdbuf.EndRenderPass(); + TransitionImageLayout(cmdbuf, src_image, VK_IMAGE_LAYOUT_GENERAL, + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); }); } diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index ca58e3fb4c..e04848207a 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -15,6 +15,7 @@ #include "video_core/renderer_vulkan/pipeline_helper.h" #include "common/bit_field.h" +#include "video_core/surface.h" #include "video_core/renderer_vulkan/maxwell_to_vk.h" #include "video_core/renderer_vulkan/pipeline_statistics.h" #include "video_core/renderer_vulkan/vk_buffer_cache.h" @@ -279,7 +280,10 @@ GraphicsPipeline::GraphicsPipeline( descriptor_update_template = builder.CreateTemplate(set_layout, *pipeline_layout, uses_push_descriptor); - const VkRenderPass render_pass{render_pass_cache.Get(MakeRenderPassKey(key.state))}; + VkRenderPass render_pass = VK_NULL_HANDLE; + if (!device.IsDynamicRenderingSupported()) { + render_pass = render_pass_cache.Get(MakeRenderPassKey(key.state)); + } Validate(); MakePipeline(render_pass); if (pipeline_statistics) { @@ -936,10 +940,57 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { flags |= VK_PIPELINE_CREATE_CAPTURE_STATISTICS_BIT_KHR; } + VkPipelineRenderingCreateInfo rendering_create_info{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_RENDERING_CREATE_INFO, + .pNext = nullptr, + .viewMask = 0, // Multiview not currently supported, set to 0 for single-view rendering + .colorAttachmentCount = 0, + .pColorAttachmentFormats = nullptr, + .depthAttachmentFormat = VK_FORMAT_UNDEFINED, + .stencilAttachmentFormat = VK_FORMAT_UNDEFINED, + }; + + std::vector color_attachment_formats; + const void* pNext = nullptr; + + if (device.IsDynamicRenderingSupported()) { + const auto pass_key = MakeRenderPassKey(key.state); + color_attachment_formats.reserve(pass_key.color_formats.size()); + for (const auto& format : pass_key.color_formats) { + if (format != PixelFormat::Invalid) { + color_attachment_formats.push_back( + MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, true, format).format); + } else { + color_attachment_formats.push_back(VK_FORMAT_UNDEFINED); + } + } + rendering_create_info.colorAttachmentCount = static_cast(color_attachment_formats.size()); + rendering_create_info.pColorAttachmentFormats = color_attachment_formats.data(); + + if (pass_key.depth_format != PixelFormat::Invalid) { + const auto depth_format = + MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, true, pass_key.depth_format) + .format; + // Determine which aspects are present + const auto surface_type = VideoCore::Surface::GetFormatType(pass_key.depth_format); + const bool has_depth = surface_type == VideoCore::Surface::SurfaceType::Depth || + surface_type == VideoCore::Surface::SurfaceType::DepthStencil; + const bool has_stencil = surface_type == VideoCore::Surface::SurfaceType::Stencil || + surface_type == VideoCore::Surface::SurfaceType::DepthStencil; + if (has_depth) { + rendering_create_info.depthAttachmentFormat = depth_format; + } + if (has_stencil) { + rendering_create_info.stencilAttachmentFormat = depth_format; + } + } + pNext = &rendering_create_info; + } + pipeline = device.GetLogical().CreateGraphicsPipeline( { .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, - .pNext = nullptr, + .pNext = pNext, .flags = flags, .stageCount = static_cast(shader_stages.size()), .pStages = shader_stages.data(), diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index b66f5a0502..572f8acd14 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -381,11 +381,6 @@ void RasterizerVulkan::Clear(u32 layer_count) { texture_cache.UpdateRenderTargets(true); const Framebuffer* const framebuffer = texture_cache.GetFramebuffer(); const VkExtent2D render_area = framebuffer->RenderArea(); - scheduler.RequestRenderpass(framebuffer); - - query_cache.NotifySegment(true); - query_cache.CounterEnable(VideoCommon::QueryType::ZPassPixelCount64, - maxwell3d->regs.zpass_pixel_count_enable); u32 up_scale = 1; u32 down_shift = 0; @@ -393,7 +388,6 @@ void RasterizerVulkan::Clear(u32 layer_count) { up_scale = Settings::values.resolution_info.up_scale; down_shift = Settings::values.resolution_info.down_shift; } - UpdateViewportsState(regs); VkRect2D default_scissor; default_scissor.offset.x = 0; @@ -415,8 +409,66 @@ void RasterizerVulkan::Clear(u32 layer_count) { .height = (std::min)(clear_rect.rect.extent.height, render_area.height), }; + // Check if full screen clear + const bool is_full_screen = clear_rect.rect.offset.x == 0 && + clear_rect.rect.offset.y == 0 && + clear_rect.rect.extent.width == render_area.width && + clear_rect.rect.extent.height == render_area.height; + + const auto& clear_regs = maxwell3d->regs.clear_surface; + const bool use_color = clear_regs.R || clear_regs.G || clear_regs.B || clear_regs.A; + const bool use_depth = clear_regs.Z; + const bool use_stencil = clear_regs.S; + const bool full_color_mask = clear_regs.R && clear_regs.G && clear_regs.B && clear_regs.A; + + // Extract clear values + VkClearColorValue clear_color_value{}; const u32 color_attachment = regs.clear_surface.RT; - if (use_color && framebuffer->HasAspectColorBit(color_attachment)) { + if (framebuffer->HasAspectColorBit(color_attachment)) { + const auto format = VideoCore::Surface::PixelFormatFromRenderTargetFormat(regs.rt[color_attachment].format); + bool is_integer = IsPixelFormatInteger(format); + bool is_signed = IsPixelFormatSignedInteger(format); + size_t int_size = PixelComponentSizeBitsInteger(format); + if (!is_integer) { + std::memcpy(clear_color_value.float32, regs.clear_color.data(), regs.clear_color.size() * sizeof(f32)); + } else if (!is_signed) { + for (size_t i = 0; i < 4; i++) { + clear_color_value.uint32[i] = static_cast(static_cast(static_cast(int_size) << 1U) * regs.clear_color[i]); + } + } else { + for (size_t i = 0; i < 4; i++) { + clear_color_value.int32[i] = static_cast(static_cast(static_cast(int_size - 1) << 1) * (regs.clear_color[i] - 0.5f)); + } + } + } + + VkClearDepthStencilValue clear_depth_stencil_value{ + .depth = regs.clear_depth, + .stencil = regs.clear_stencil, + }; + + const bool use_renderpass_clear_color = is_full_screen && full_color_mask && use_color; + const bool use_renderpass_clear_depth = is_full_screen && use_depth; + const bool use_renderpass_clear_stencil = is_full_screen && use_stencil && (regs.stencil_front_mask == 0xFF); + + // Only use LOAD_OP_CLEAR if full screen and full mask (and safe stencil mask) + scheduler.SetNextRenderpassClears( + use_renderpass_clear_color, + use_renderpass_clear_depth, + use_renderpass_clear_stencil, + clear_color_value, + clear_depth_stencil_value + ); + + scheduler.RequestRenderpass(framebuffer); + + query_cache.NotifySegment(true); + query_cache.CounterEnable(VideoCommon::QueryType::ZPassPixelCount64, + maxwell3d->regs.zpass_pixel_count_enable); + + UpdateViewportsState(regs); + + if (use_color && !use_renderpass_clear_color && framebuffer->HasAspectColorBit(color_attachment)) { const auto format = VideoCore::Surface::PixelFormatFromRenderTargetFormat(regs.rt[color_attachment].format); bool is_integer = IsPixelFormatInteger(format); @@ -466,10 +518,10 @@ void RasterizerVulkan::Clear(u32 layer_count) { return; } VkImageAspectFlags aspect_flags = 0; - if (use_depth && framebuffer->HasAspectDepthBit()) { + if (use_depth && !use_renderpass_clear_depth && framebuffer->HasAspectDepthBit()) { aspect_flags |= VK_IMAGE_ASPECT_DEPTH_BIT; } - if (use_stencil && framebuffer->HasAspectStencilBit()) { + if (use_stencil && !use_renderpass_clear_stencil && framebuffer->HasAspectStencilBit()) { aspect_flags |= VK_IMAGE_ASPECT_STENCIL_BIT; } if (aspect_flags == 0) { @@ -483,7 +535,7 @@ void RasterizerVulkan::Clear(u32 layer_count) { Offset2D{.x = clear_rect.rect.offset.x + static_cast(clear_rect.rect.extent.width), .y = clear_rect.rect.offset.y + static_cast(clear_rect.rect.extent.height)}}; - blit_image.ClearDepthStencil(framebuffer, use_depth, regs.clear_depth, + blit_image.ClearDepthStencil(framebuffer, use_depth && !use_renderpass_clear_depth, regs.clear_depth, static_cast(regs.stencil_front_mask), regs.clear_stencil, regs.stencil_front_func_mask, dst_region); } else { diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index cd5793746b..7aeea3bde8 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp @@ -93,6 +93,217 @@ void Scheduler::RequestRenderpass(const Framebuffer* framebuffer) { const VkRenderPass renderpass = framebuffer->RenderPass(); const VkFramebuffer framebuffer_handle = framebuffer->Handle(); const VkExtent2D render_area = framebuffer->RenderArea(); + if (device.IsDynamicRenderingSupported()) { + bool is_clear_pending = next_clear_state.depth || next_clear_state.stencil; + for (bool c : next_clear_state.color) { + is_clear_pending |= c; + } + + if (!is_clear_pending && state.is_dynamic_rendering && framebuffer_handle == state.framebuffer && + render_area.width == state.render_area.width && + render_area.height == state.render_area.height) { + return; + } + EndRenderPass(); + + state.is_dynamic_rendering = true; + state.framebuffer = framebuffer_handle; + state.render_area = render_area; + state.renderpass = nullptr; + + const u32 layers = framebuffer->Layers(); + const auto& image_views = framebuffer->ImageViews(); + const auto& image_ranges = framebuffer->ImageRanges(); + const auto& images = framebuffer->Images(); + const u32 num_images = framebuffer->NumImages(); + + std::array attachments_views{}; + for (size_t i = 0; i < 8; ++i) { + if (framebuffer->IsColorAttachmentValid(i)) { + attachments_views[i] = image_views[framebuffer->GetImageIndex(i)]; + } else { + attachments_views[i] = VK_NULL_HANDLE; + } + } + + VkImageView depth_view = VK_NULL_HANDLE; + bool has_depth = framebuffer->HasAspectDepthBit(); + bool has_stencil = framebuffer->HasAspectStencilBit(); + if (has_depth || has_stencil) { + depth_view = image_views[framebuffer->NumColorBuffers()]; + } + + // Determine initialization state for barriers + // If we haven't seen this image before, we must assume it is UNDEFINED. + // This prevents validation errors and artifacts on mobile when loading from UNDEFINED. + std::array is_first_use; + for(size_t i=0; i pre_barriers; + for (size_t i = 0; i < num_images; ++i) { + const VkImageSubresourceRange& range = image_ranges[i]; + const bool is_color = (range.aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) != 0; + const bool is_depth_stencil = (range.aspectMask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) != 0; + + VkImageLayout optimal_layout = VK_IMAGE_LAYOUT_GENERAL; + if (is_color) { + optimal_layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + } else if (is_depth_stencil) { + optimal_layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; + } + + // If first use, transition from UNDEFINED to discard garbage data safely. + // Otherwise, transition from GENERAL to preserve existing data. + const VkImageLayout old_layout = is_first_use[i] ? VK_IMAGE_LAYOUT_UNDEFINED : VK_IMAGE_LAYOUT_GENERAL; + + // For first use (UNDEFINED), we don't need to wait for any previous access. + const VkAccessFlags src_access_mask = is_first_use[i] ? 0 : (VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT | + VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT); + + pre_barriers[i] = VkImageMemoryBarrier{ + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = src_access_mask, + .dstAccessMask = is_color ? (VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT) + : (VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT), + .oldLayout = old_layout, + .newLayout = optimal_layout, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = images[i], + .subresourceRange = range, + }; + } + + if (num_images > 0) { + cmdbuf.PipelineBarrier( + VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT, + 0, nullptr, nullptr, + vk::Span(pre_barriers.data(), num_images) + ); + } + }); + + // Capture the clear state and reset it for next time + const ClearState current_clear_state = next_clear_state; + // Reset for next renderpass + next_clear_state = ClearState{}; + + // Capture image indices to allow looking up is_first_use in the lambda + std::array color_indices{}; + for (size_t i = 0; i < 8; ++i) { + if (framebuffer->IsColorAttachmentValid(i)) { + color_indices[i] = framebuffer->GetImageIndex(i); + } else { + color_indices[i] = 0; + } + } + const size_t depth_index = framebuffer->NumColorBuffers(); + + Record([render_area, layers, attachments_views, depth_view, has_depth, has_stencil, current_clear_state, is_first_use, color_indices, depth_index](vk::CommandBuffer cmdbuf) { + std::array color_attachments{}; + for (size_t i = 0; i < 8; ++i) { + // Determine proper load operation per attachment: + // - DONT_CARE: Null attachments (not used) + // - CLEAR: When we're clearing this specific color attachment + // - LOAD: Normal rendering (preserve existing content) + VkAttachmentLoadOp load_op; + if (attachments_views[i] == VK_NULL_HANDLE) { + load_op = VK_ATTACHMENT_LOAD_OP_DONT_CARE; + } else if (current_clear_state.color[i]) { + load_op = VK_ATTACHMENT_LOAD_OP_CLEAR; + } else { + // Spec Compliance & Turnip Optimization: + // If we transitioned from UNDEFINED (is_first_use), we should use DONT_CARE. + // This avoids loading garbage data and prevents tile loads on mobile GPUs. + // We use the captured image index to check the initialization state. + if (is_first_use[color_indices[i]]) { + load_op = VK_ATTACHMENT_LOAD_OP_DONT_CARE; + } else { + load_op = VK_ATTACHMENT_LOAD_OP_LOAD; + } + } + + color_attachments[i] = { + .sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO, + .pNext = nullptr, + .imageView = attachments_views[i], + .imageLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, + .resolveMode = VK_RESOLVE_MODE_NONE, + .resolveImageView = VK_NULL_HANDLE, + .resolveImageLayout = VK_IMAGE_LAYOUT_UNDEFINED, + .loadOp = load_op, + .storeOp = VK_ATTACHMENT_STORE_OP_STORE, + .clearValue = {.color = current_clear_state.clear_color}, + }; + } + + VkRenderingAttachmentInfo depth_attachment{ + .sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO, + .pNext = nullptr, + .imageView = depth_view, + .imageLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, + .resolveMode = VK_RESOLVE_MODE_NONE, + .resolveImageView = VK_NULL_HANDLE, + .resolveImageLayout = VK_IMAGE_LAYOUT_UNDEFINED, + .loadOp = current_clear_state.depth ? VK_ATTACHMENT_LOAD_OP_CLEAR + : (has_depth && is_first_use[depth_index] ? VK_ATTACHMENT_LOAD_OP_DONT_CARE : VK_ATTACHMENT_LOAD_OP_LOAD), + .storeOp = VK_ATTACHMENT_STORE_OP_STORE, + .clearValue = {.depthStencil = current_clear_state.clear_depth_stencil}, + }; + + VkRenderingAttachmentInfo stencil_attachment{ + .sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO, + .pNext = nullptr, + .imageView = depth_view, + .imageLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, + .resolveMode = VK_RESOLVE_MODE_NONE, + .resolveImageView = VK_NULL_HANDLE, + .resolveImageLayout = VK_IMAGE_LAYOUT_UNDEFINED, + .loadOp = current_clear_state.stencil ? VK_ATTACHMENT_LOAD_OP_CLEAR + : (has_stencil && is_first_use[depth_index] ? VK_ATTACHMENT_LOAD_OP_DONT_CARE : VK_ATTACHMENT_LOAD_OP_LOAD), + .storeOp = VK_ATTACHMENT_STORE_OP_STORE, + .clearValue = {.depthStencil = current_clear_state.clear_depth_stencil}, + }; + + const VkRenderingInfo rendering_info{ + .sType = VK_STRUCTURE_TYPE_RENDERING_INFO, + .pNext = nullptr, + .flags = 0, + .renderArea = { + .offset = {0, 0}, + .extent = render_area, + }, + .layerCount = layers, + .viewMask = 0, + .colorAttachmentCount = 8, + .pColorAttachments = color_attachments.data(), + .pDepthAttachment = has_depth ? &depth_attachment : nullptr, + .pStencilAttachment = has_stencil ? &stencil_attachment : nullptr, + }; + + cmdbuf.BeginRendering(rendering_info); + }); + + num_renderpass_images = framebuffer->NumImages(); + renderpass_images = framebuffer->Images(); + renderpass_image_ranges = framebuffer->ImageRanges(); + return; + } + if (renderpass == state.renderpass && framebuffer_handle == state.framebuffer && render_area.width == state.render_area.width && render_area.height == state.render_area.height) { @@ -262,6 +473,7 @@ void Scheduler::AllocateNewContext() { void Scheduler::InvalidateState() { state.graphics_pipeline = nullptr; state.rescaling_defined = false; + state.is_dynamic_rendering = false; state_tracker.InvalidateCommandBufferState(); } @@ -270,18 +482,19 @@ void Scheduler::EndPendingOperations() { EndRenderPass(); } -void Scheduler::EndRenderPass() - { - if (!state.renderpass) { + void Scheduler::EndRenderPass() { + if (!state.renderpass && !state.is_dynamic_rendering) { return; } query_cache->CounterEnable(VideoCommon::QueryType::ZPassPixelCount64, false); query_cache->NotifySegment(false); + const bool is_dynamic_rendering = state.is_dynamic_rendering; Record([num_images = num_renderpass_images, images = renderpass_images, - ranges = renderpass_image_ranges](vk::CommandBuffer cmdbuf) { + ranges = renderpass_image_ranges, + is_dynamic_rendering](vk::CommandBuffer cmdbuf) { std::array barriers; VkPipelineStageFlags src_stages = 0; @@ -294,16 +507,23 @@ void Scheduler::EndRenderPass() VkAccessFlags src_access = 0; VkPipelineStageFlags this_stage = 0; + VkImageLayout old_layout = VK_IMAGE_LAYOUT_GENERAL; if (is_color) { src_access |= VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; this_stage |= VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + if (is_dynamic_rendering) { + old_layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + } } if (is_depth_stencil) { src_access |= VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; this_stage |= VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; + if (is_dynamic_rendering) { + old_layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; + } } src_stages |= this_stage; @@ -317,7 +537,7 @@ void Scheduler::EndRenderPass() | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT, - .oldLayout = VK_IMAGE_LAYOUT_GENERAL, + .oldLayout = old_layout, .newLayout = VK_IMAGE_LAYOUT_GENERAL, .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, @@ -331,7 +551,11 @@ void Scheduler::EndRenderPass() VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; - cmdbuf.EndRenderPass(); + if (is_dynamic_rendering) { + cmdbuf.EndRendering(); + } else { + cmdbuf.EndRenderPass(); + } cmdbuf.PipelineBarrier(src_stages, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, @@ -343,9 +567,14 @@ void Scheduler::EndRenderPass() }); state.renderpass = nullptr; + state.is_dynamic_rendering = false; num_renderpass_images = 0; } +} +void Scheduler::UnregisterImage(VkImage image) { + initialized_images.erase(image); +} void Scheduler::AcquireNewChunk() { std::scoped_lock rl{reserve_mutex}; diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h index 54ab8ba52b..78a43831b9 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.h +++ b/src/video_core/renderer_vulkan/vk_scheduler.h @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + // SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later @@ -10,6 +13,7 @@ #include #include #include +#include #include "common/alignment.h" #include "common/common_types.h" @@ -55,10 +59,27 @@ public: /// Requests to begin a renderpass. void RequestRenderpass(const Framebuffer* framebuffer); + /// Signals that the next renderpass will perform a clear operation. + void SetNextRenderpassClears(bool clear_color, bool clear_depth, bool clear_stencil, + const VkClearColorValue& color_val = {}, + const VkClearDepthStencilValue& depth_stencil_val = {}) { + // Clear all color attachment flags + for (int i = 0; i < 8; i++) { + next_clear_state.color[i] = clear_color; + } + next_clear_state.depth = clear_depth; + next_clear_state.stencil = clear_stencil; + next_clear_state.clear_color = color_val; + next_clear_state.clear_depth_stencil = depth_stencil_val; + } + /// Requests the current execution context to be able to execute operations only allowed outside /// of a renderpass. void RequestOutsideRenderPassOperationContext(); + /// Unregisters an image from the scheduler, removing it from initialized tracking. + void UnregisterImage(VkImage image); + /// Update the pipeline to the current execution context. bool UpdateGraphicsPipeline(GraphicsPipeline* pipeline); @@ -214,6 +235,7 @@ private: GraphicsPipeline* graphics_pipeline = nullptr; bool is_rescaling = false; bool rescaling_defined = false; + bool is_dynamic_rendering = false; }; void WorkerThread(std::stop_token stop_token); @@ -246,6 +268,19 @@ private: State state; + // Clear operation tracking for dynamic rendering + struct ClearState { + bool color[8] = {false}; // Which color attachments to clear + bool depth = false; + bool stencil = false; + VkClearColorValue clear_color{}; + VkClearDepthStencilValue clear_depth_stencil{}; + }; + ClearState next_clear_state; + + // Tracks images that have been initialized/transitioned at least once to avoid UNDEFINED loads + std::unordered_set initialized_images; + u32 num_renderpass_images = 0; std::array renderpass_images{}; std::array renderpass_image_ranges{}; diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 136a11f78d..61db298dc1 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -1553,7 +1553,16 @@ Image::Image(TextureCacheRuntime& runtime_, const ImageInfo& info_, GPUVAddr gpu Image::Image(const VideoCommon::NullImageParams& params) : VideoCommon::ImageBase{params} {} -Image::~Image() = default; +Image::~Image() { + if (scheduler) { + if (original_image) { + scheduler->UnregisterImage(*original_image); + } + if (scaled_image) { + scheduler->UnregisterImage(*scaled_image); + } + } +} void Image::UploadMemory(VkBuffer buffer, VkDeviceSize offset, std::span copies) { @@ -2300,6 +2309,7 @@ void Framebuffer::CreateFramebuffer(TextureCacheRuntime& runtime, is_rescaled = is_rescaled_; const auto& resolution = runtime.resolution; + valid_color_attachments = 0; u32 width = (std::numeric_limits::max)(); u32 height = (std::numeric_limits::max)(); @@ -2315,8 +2325,10 @@ void Framebuffer::CreateFramebuffer(TextureCacheRuntime& runtime, : color_buffer->size.height); attachments.push_back(color_buffer->RenderTarget()); renderpass_key.color_formats[index] = color_buffer->format; + valid_color_attachments |= static_cast(1 << index); num_layers = (std::max)(num_layers, color_buffer->range.extent.layers); images[num_images] = color_buffer->ImageHandle(); + image_views[num_images] = color_buffer->RenderTarget(); image_ranges[num_images] = MakeSubresourceRange(color_buffer); rt_map[index] = num_images; samples = color_buffer->Samples(); @@ -2332,6 +2344,7 @@ void Framebuffer::CreateFramebuffer(TextureCacheRuntime& runtime, renderpass_key.depth_format = depth_buffer->format; num_layers = (std::max)(num_layers, depth_buffer->range.extent.layers); images[num_images] = depth_buffer->ImageHandle(); + image_views[num_images] = depth_buffer->RenderTarget(); const VkImageSubresourceRange subresource_range = MakeSubresourceRange(depth_buffer); image_ranges[num_images] = subresource_range; samples = depth_buffer->Samples(); @@ -2342,6 +2355,7 @@ void Framebuffer::CreateFramebuffer(TextureCacheRuntime& runtime, renderpass_key.depth_format = PixelFormat::Invalid; } renderpass_key.samples = samples; + layers = static_cast((std::max)(num_layers, 1)); renderpass = runtime.render_pass_cache.Get(renderpass_key); render_area.width = (std::min)(render_area.width, width); @@ -2357,7 +2371,7 @@ void Framebuffer::CreateFramebuffer(TextureCacheRuntime& runtime, .pAttachments = attachments.data(), .width = render_area.width, .height = render_area.height, - .layers = static_cast((std::max)(num_layers, 1)), + .layers = layers, }); } diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index cd11cc8fc7..442900be5d 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + // SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later @@ -340,6 +343,10 @@ public: return render_area; } + [[nodiscard]] u32 Layers() const noexcept { + return layers; + } + [[nodiscard]] VkSampleCountFlagBits Samples() const noexcept { return samples; } @@ -356,6 +363,10 @@ public: return images; } + [[nodiscard]] const std::array& ImageViews() const noexcept { + return image_views; + } + [[nodiscard]] const std::array& ImageRanges() const noexcept { return image_ranges; } @@ -376,16 +387,27 @@ public: return is_rescaled; } + [[nodiscard]] bool IsColorAttachmentValid(size_t index) const noexcept { + return (valid_color_attachments & (1 << index)) != 0; + } + + [[nodiscard]] size_t GetImageIndex(size_t index) const noexcept { + return rt_map[index]; + } + private: vk::Framebuffer framebuffer; VkRenderPass renderpass{}; VkExtent2D render_area{}; + u32 layers = 1; VkSampleCountFlagBits samples = VK_SAMPLE_COUNT_1_BIT; u32 num_color_buffers = 0; u32 num_images = 0; std::array images{}; + std::array image_views{}; std::array image_ranges{}; std::array rt_map{}; + u8 valid_color_attachments = 0; bool has_depth{}; bool has_stencil{}; bool is_rescaled{}; diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index cb13f28523..a38ed07557 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -39,7 +39,8 @@ VK_DEFINE_HANDLE(VmaAllocator) #define FOR_EACH_VK_FEATURE_1_3(FEATURE) \ FEATURE(EXT, ShaderDemoteToHelperInvocation, SHADER_DEMOTE_TO_HELPER_INVOCATION, \ shader_demote_to_helper_invocation) \ - FEATURE(EXT, SubgroupSizeControl, SUBGROUP_SIZE_CONTROL, subgroup_size_control) + FEATURE(EXT, SubgroupSizeControl, SUBGROUP_SIZE_CONTROL, subgroup_size_control) \ + FEATURE(KHR, DynamicRendering, DYNAMIC_RENDERING, dynamic_rendering) // Define all features which may be used by the implementation and require an extension here. #define FOR_EACH_VK_FEATURE_EXT(FEATURE) \ @@ -534,6 +535,11 @@ public: return features.extended_dynamic_state2.extendedDynamicState2LogicOp; } + /// Returns true if the device supports VK_KHR_dynamic_rendering. + bool IsDynamicRenderingSupported() const { + return features.dynamic_rendering.dynamicRendering; + } + /// Returns true if the device supports VK_EXT_extended_dynamic_state3. bool IsExtExtendedDynamicState3Supported() const { return extensions.extended_dynamic_state3; diff --git a/src/video_core/vulkan_common/vulkan_wrapper.cpp b/src/video_core/vulkan_common/vulkan_wrapper.cpp index b77d01711a..6fd74f319d 100644 --- a/src/video_core/vulkan_common/vulkan_wrapper.cpp +++ b/src/video_core/vulkan_common/vulkan_wrapper.cpp @@ -91,6 +91,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept { X(vkBindImageMemory); X(vkCmdBeginConditionalRenderingEXT); X(vkCmdBeginQuery); + X(vkCmdBeginRendering); X(vkCmdBeginRenderPass); X(vkCmdBeginTransformFeedbackEXT); X(vkCmdBeginDebugUtilsLabelEXT); @@ -118,6 +119,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept { X(vkCmdDrawIndirectByteCountEXT); X(vkCmdEndConditionalRenderingEXT); X(vkCmdEndQuery); + X(vkCmdEndRendering); X(vkCmdEndRenderPass); X(vkCmdResetQueryPool); X(vkCmdEndTransformFeedbackEXT); @@ -251,6 +253,12 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept { Proc(dld.vkCmdDrawIndirectCount, dld, "vkCmdDrawIndirectCountKHR", device); Proc(dld.vkCmdDrawIndexedIndirectCount, dld, "vkCmdDrawIndexedIndirectCountKHR", device); } + + // Support for dynamic rendering is optional in Vulkan 1.2 (via KHR) and core in 1.3 + if (!dld.vkCmdBeginRendering) { + Proc(dld.vkCmdBeginRendering, dld, "vkCmdBeginRenderingKHR", device); + Proc(dld.vkCmdEndRendering, dld, "vkCmdEndRenderingKHR", device); + } #undef X } diff --git a/src/video_core/vulkan_common/vulkan_wrapper.h b/src/video_core/vulkan_common/vulkan_wrapper.h index 39396b3279..d24827bbd9 100644 --- a/src/video_core/vulkan_common/vulkan_wrapper.h +++ b/src/video_core/vulkan_common/vulkan_wrapper.h @@ -191,6 +191,7 @@ struct DeviceDispatch : InstanceDispatch { PFN_vkCmdBeginConditionalRenderingEXT vkCmdBeginConditionalRenderingEXT{}; PFN_vkCmdBeginDebugUtilsLabelEXT vkCmdBeginDebugUtilsLabelEXT{}; PFN_vkCmdBeginQuery vkCmdBeginQuery{}; + PFN_vkCmdBeginRendering vkCmdBeginRendering{}; PFN_vkCmdBeginRenderPass vkCmdBeginRenderPass{}; PFN_vkCmdBeginTransformFeedbackEXT vkCmdBeginTransformFeedbackEXT{}; PFN_vkCmdBindDescriptorSets vkCmdBindDescriptorSets{}; @@ -219,6 +220,7 @@ struct DeviceDispatch : InstanceDispatch { PFN_vkCmdEndConditionalRenderingEXT vkCmdEndConditionalRenderingEXT{}; PFN_vkCmdEndDebugUtilsLabelEXT vkCmdEndDebugUtilsLabelEXT{}; PFN_vkCmdEndQuery vkCmdEndQuery{}; + PFN_vkCmdEndRendering vkCmdEndRendering{}; PFN_vkCmdResetQueryPool vkCmdResetQueryPool{}; PFN_vkCmdEndRenderPass vkCmdEndRenderPass{}; PFN_vkCmdEndTransformFeedbackEXT vkCmdEndTransformFeedbackEXT{};