diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt index fef9a5b16e..00190c565b 100644 --- a/src/video_core/host_shaders/CMakeLists.txt +++ b/src/video_core/host_shaders/CMakeLists.txt @@ -76,6 +76,7 @@ set(SHADER_FILES vulkan_quad_indexed.comp vulkan_turbo_mode.comp vulkan_uint8.comp + vulkan_qcom_msaa_resolve.frag convert_rgba8_to_bgra8.frag convert_yuv420_to_rgb.comp convert_rgb_to_yuv420.comp diff --git a/src/video_core/host_shaders/vulkan_qcom_msaa_resolve.frag b/src/video_core/host_shaders/vulkan_qcom_msaa_resolve.frag new file mode 100644 index 0000000000..4756de5ed6 --- /dev/null +++ b/src/video_core/host_shaders/vulkan_qcom_msaa_resolve.frag @@ -0,0 +1,39 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + +#version 450 + +// VK_QCOM_render_pass_shader_resolve fragment shader +// Resolves MSAA attachment to single-sample within render pass +// Requires VK_SUBPASS_DESCRIPTION_SHADER_RESOLVE_BIT_QCOM in subpass flags + +// Use combined image sampler for MSAA texture instead of input attachment +// This allows us to sample MSAA textures from previous rendering +layout(set = 0, binding = 0) uniform sampler2DMS msaa_texture; + +layout(location = 0) out vec4 color_output; + +layout(push_constant) uniform PushConstants { + vec2 tex_scale; + vec2 tex_offset; +} push_constants; + +// Custom MSAA resolve using box filter (simple average) +// Assumes 4x MSAA (can be extended with push constant for dynamic sample count) +void main() { + ivec2 coord = ivec2(gl_FragCoord.xy); + ivec2 tex_size = textureSize(msaa_texture); + + // Clamp coordinates to texture bounds + coord = clamp(coord, ivec2(0), tex_size - ivec2(1)); + + vec4 accumulated_color = vec4(0.0); + int sample_count = 4; // Adreno typically uses 4x MSAA max + + // Box filter: simple average of all MSAA samples + for (int i = 0; i < sample_count; i++) { + accumulated_color += texelFetch(msaa_texture, coord, i); + } + + color_output = accumulated_color / float(sample_count); +} diff --git a/src/video_core/renderer_vulkan/blit_image.cpp b/src/video_core/renderer_vulkan/blit_image.cpp index 68543bdd48..2608d32a70 100644 --- a/src/video_core/renderer_vulkan/blit_image.cpp +++ b/src/video_core/renderer_vulkan/blit_image.cpp @@ -40,6 +40,7 @@ #include "video_core/host_shaders/convert_rgba16f_to_rgba8_frag_spv.h" #include "video_core/host_shaders/dither_temporal_frag_spv.h" #include "video_core/host_shaders/dynamic_resolution_scale_comp_spv.h" +#include "video_core/host_shaders/vulkan_qcom_msaa_resolve_frag_spv.h" namespace Vulkan { @@ -545,6 +546,7 @@ BlitImageHelper::BlitImageHelper(const Device& device_, Scheduler& scheduler_, convert_rgba16f_to_rgba8_frag(BuildShader(device, CONVERT_RGBA16F_TO_RGBA8_FRAG_SPV)), dither_temporal_frag(BuildShader(device, DITHER_TEMPORAL_FRAG_SPV)), dynamic_resolution_scale_comp(BuildShader(device, DYNAMIC_RESOLUTION_SCALE_COMP_SPV)), + qcom_msaa_resolve_frag(BuildShader(device, VULKAN_QCOM_MSAA_RESOLVE_FRAG_SPV)), linear_sampler(device.GetLogical().CreateSampler(SAMPLER_CREATE_INFO)), nearest_sampler(device.GetLogical().CreateSampler(SAMPLER_CREATE_INFO)) {} @@ -1240,4 +1242,30 @@ void BlitImageHelper::ApplyDynamicResolutionScale(const Framebuffer* dst_framebu Convert(*dynamic_resolution_scale_pipeline, dst_framebuffer, src_image_view); } +void BlitImageHelper::ResolveMSAAQcom(const Framebuffer* dst_framebuffer, + const ImageView& src_image_view) { + // VK_QCOM_render_pass_shader_resolve implementation + // This must be used within a render pass with VK_SUBPASS_DESCRIPTION_SHADER_RESOLVE_BIT_QCOM + ConvertPipeline(qcom_msaa_resolve_pipeline, + dst_framebuffer->RenderPass(), + false); + + RecordShaderReadBarrier(scheduler, src_image_view); + scheduler.RequestRenderpass(dst_framebuffer); + + const VkImageView src_view = src_image_view.Handle(Shader::TextureType::Color2D); + const VkPipelineLayout layout = *one_texture_pipeline_layout; + const VkPipeline pipeline = *qcom_msaa_resolve_pipeline; + + scheduler.Record([this, src_view, layout, pipeline](vk::CommandBuffer cmdbuf) { + const VkDescriptorSet descriptor_set = one_texture_descriptor_allocator.Commit(); + UpdateOneTextureDescriptorSet(device, descriptor_set, *nearest_sampler, src_view); + cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); + cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set, nullptr); + cmdbuf.Draw(3, 1, 0, 0); + }); + + scheduler.InvalidateState(); +} + } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/blit_image.h b/src/video_core/renderer_vulkan/blit_image.h index bdb8cce883..28be64f88d 100644 --- a/src/video_core/renderer_vulkan/blit_image.h +++ b/src/video_core/renderer_vulkan/blit_image.h @@ -95,6 +95,8 @@ public: void ConvertRGBA16FtoRGBA8(const Framebuffer* dst_framebuffer, const ImageView& src_image_view); void ApplyDitherTemporal(const Framebuffer* dst_framebuffer, const ImageView& src_image_view); void ApplyDynamicResolutionScale(const Framebuffer* dst_framebuffer, const ImageView& src_image_view); + + void ResolveMSAAQcom(const Framebuffer* dst_framebuffer, const ImageView& src_image_view); private: void Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer, @@ -159,6 +161,7 @@ private: vk::ShaderModule convert_rgba16f_to_rgba8_frag; vk::ShaderModule dither_temporal_frag; vk::ShaderModule dynamic_resolution_scale_comp; + vk::ShaderModule qcom_msaa_resolve_frag; vk::Sampler linear_sampler; vk::Sampler nearest_sampler; @@ -188,6 +191,7 @@ private: vk::Pipeline convert_rgba16f_to_rgba8_pipeline; vk::Pipeline dither_temporal_pipeline; vk::Pipeline dynamic_resolution_scale_pipeline; + vk::Pipeline qcom_msaa_resolve_pipeline; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp b/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp index 9499690a85..b64aab8be0 100644 --- a/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp @@ -156,8 +156,15 @@ VkRenderPass RenderPassCache::Get(const RenderPassKey& key) { descriptions.push_back(AttachmentDescription(*device, key.depth_format, key.samples, key.tbdr_will_clear, key.tbdr_discard_after)); } + VkSubpassDescriptionFlags subpass_flags = 0; + if (key.qcom_shader_resolve) { + // VK_QCOM_render_pass_shader_resolve: enables custom shader resolve in fragment shader + // This must be the last subpass in the dependency chain + subpass_flags |= 0x00000004; // VK_SUBPASS_DESCRIPTION_SHADER_RESOLVE_BIT_QCOM + } + const VkSubpassDescription subpass{ - .flags = 0, + .flags = subpass_flags, .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, .inputAttachmentCount = 0, .pInputAttachments = nullptr, diff --git a/src/video_core/renderer_vulkan/vk_render_pass_cache.h b/src/video_core/renderer_vulkan/vk_render_pass_cache.h index 4375327ca4..c375b1996b 100644 --- a/src/video_core/renderer_vulkan/vk_render_pass_cache.h +++ b/src/video_core/renderer_vulkan/vk_render_pass_cache.h @@ -25,6 +25,9 @@ struct RenderPassKey { // These flags indicate the expected usage pattern to optimize load/store operations bool tbdr_will_clear{false}; // Attachment will be cleared with vkCmdClearAttachments bool tbdr_discard_after{false}; // Attachment won't be read after render pass + + // VK_QCOM_render_pass_shader_resolve support + bool qcom_shader_resolve{false}; // Use shader resolve instead of fixed-function (last subpass) }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 51a4954df1..27ed8f8540 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -160,43 +160,39 @@ constexpr VkBorderColor ConvertBorderColor(const std::array& color) { }; } -/// Emergency fallback for MSAA with HDR formats: degrade to non-MSAA if driver doesn't support -/// shaderStorageImageMultisample (required for msaa_copy_pass) +/// Emergency fallback: degrade MSAA to non-MSAA for HDR formats when no resolve support exists [[nodiscard]] ImageInfo AdjustMSAAForHDRFormats(const Device& device, ImageInfo info) { - // Only apply emergency fallback if MSAA is requested if (info.num_samples <= 1) { return info; } - // Check if this is an HDR format that commonly fails with MSAA const auto vk_format = MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, false, info.format).format; const bool is_hdr_format = vk_format == VK_FORMAT_B10G11R11_UFLOAT_PACK32 || vk_format == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32; if (!is_hdr_format) { - return info; // Not an HDR format, no adjustment needed - } - - // If driver doesn't support shader storage image multisample, MSAACopyPass will fail - // Emergency fallback: degrade to non-MSAA (1 sample) to avoid texture corruption - if (!device.IsStorageImageMultisampleSupported()) { - LOG_ERROR(Render_Vulkan, - "EMERGENCY MSAA FALLBACK: Driver doesn't support shaderStorageImageMultisample. " - "Degrading HDR format {} from {}x MSAA to 1x (non-MSAA) to prevent texture corruption. " - "This will cause visual quality loss but prevents black textures.", - vk_format, info.num_samples); - - // Degrade to non-MSAA - // NOTE: We only change num_samples, NOT dimensions. The ImageInfo dimensions are already - // in "logical" space (full resolution), and MakeImageCreateInfo will handle the conversion - // to physical GPU dimensions based on num_samples automatically. - info.num_samples = 1; - return info; } - return info; // Driver supports MSAA storage images, no adjustment needed + // Qualcomm: VK_QCOM_render_pass_shader_resolve handles HDR+MSAA + if (device.GetDriverID() == VK_DRIVER_ID_QUALCOMM_PROPRIETARY) { + if (device.IsQcomRenderPassShaderResolveSupported()) { + return info; + } + } + + // Other vendors: shaderStorageImageMultisample handles HDR+MSAA + if (device.IsStorageImageMultisampleSupported()) { + return info; + } + + // No suitable resolve method - degrade to non-MSAA + LOG_WARNING(Render_Vulkan, "HDR format {} with MSAA not supported, degrading to 1x samples", + vk_format); + info.num_samples = 1; + + return info; } [[nodiscard]] vk::Image MakeImage(const Device& device, const MemoryAllocator& allocator, @@ -896,6 +892,9 @@ TextureCacheRuntime::TextureCacheRuntime(const Device& device_, Scheduler& sched astc_decoder_pass.emplace(device, scheduler, descriptor_pool, staging_buffer_pool, compute_pass_descriptor_queue, memory_allocator); } + + // MSAA copy support via compute shader (only for non-Qualcomm with shaderStorageImageMultisample) + // Qualcomm uses VK_QCOM_render_pass_shader_resolve (fragment shader in render pass) if (device.IsStorageImageMultisampleSupported()) { msaa_copy_pass = std::make_unique( device, scheduler, descriptor_pool, staging_buffer_pool, compute_pass_descriptor_queue); diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index f4409ccd6a..5d48db5103 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -549,7 +549,7 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR LOG_INFO(Render_Vulkan, "Shader Float Controls FORCE ENABLED by user (Eden Veil/Extensions)"); // Log driver capabilities - const auto& fc = float_control; + const auto& fc = properties.float_controls; LOG_INFO(Render_Vulkan, "Driver Float Controls Capabilities:"); LOG_INFO(Render_Vulkan, " - Denorm Flush FP32: {}", fc.shaderDenormFlushToZeroFloat32 ? "YES" : "NO"); LOG_INFO(Render_Vulkan, " - Denorm Preserve FP32: {}", fc.shaderDenormPreserveFloat32 ? "YES" : "NO"); diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index ace7ee611f..3e319724e8 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -95,6 +95,7 @@ VK_DEFINE_HANDLE(VmaAllocator) EXTENSION(EXT, DESCRIPTOR_INDEXING, descriptor_indexing) \ EXTENSION(EXT, FILTER_CUBIC, filter_cubic) \ EXTENSION(QCOM, FILTER_CUBIC_WEIGHTS, filter_cubic_weights) \ + EXTENSION(QCOM, RENDER_PASS_SHADER_RESOLVE, render_pass_shader_resolve) \ EXTENSION(KHR, MAINTENANCE_1, maintenance1) \ EXTENSION(KHR, MAINTENANCE_2, maintenance2) \ EXTENSION(KHR, MAINTENANCE_3, maintenance3) \ @@ -582,6 +583,21 @@ public: return extensions.filter_cubic_weights; } + /// Returns true if the device supports VK_QCOM_render_pass_shader_resolve + bool IsQcomRenderPassShaderResolveSupported() const { + return extensions.render_pass_shader_resolve; + } + + /// Returns true if MSAA copy operations are supported via compute shader (upload/download) + /// Qualcomm uses render pass shader resolve instead, so this returns false for Qualcomm + bool CanUploadMSAA() const { + return IsStorageImageMultisampleSupported(); + } + + bool CanDownloadMSAA() const { + return CanUploadMSAA(); + } + /// Returns true if the device supports VK_EXT_line_rasterization. bool IsExtLineRasterizationSupported() const { return extensions.line_rasterization;