Browse Source

[vk, host shaders, qcom] MSAA Handling by Native QCOM Shader Resolve

test-revert-gpu-optim
CamilleLaVey 1 month ago
parent
commit
aea945b671
  1. 1
      src/video_core/host_shaders/CMakeLists.txt
  2. 39
      src/video_core/host_shaders/vulkan_qcom_msaa_resolve.frag
  3. 28
      src/video_core/renderer_vulkan/blit_image.cpp
  4. 4
      src/video_core/renderer_vulkan/blit_image.h
  5. 9
      src/video_core/renderer_vulkan/vk_render_pass_cache.cpp
  6. 3
      src/video_core/renderer_vulkan/vk_render_pass_cache.h
  7. 45
      src/video_core/renderer_vulkan/vk_texture_cache.cpp
  8. 2
      src/video_core/vulkan_common/vulkan_device.cpp
  9. 16
      src/video_core/vulkan_common/vulkan_device.h

1
src/video_core/host_shaders/CMakeLists.txt

@ -76,6 +76,7 @@ set(SHADER_FILES
vulkan_quad_indexed.comp
vulkan_turbo_mode.comp
vulkan_uint8.comp
vulkan_qcom_msaa_resolve.frag
convert_rgba8_to_bgra8.frag
convert_yuv420_to_rgb.comp
convert_rgb_to_yuv420.comp

39
src/video_core/host_shaders/vulkan_qcom_msaa_resolve.frag

@ -0,0 +1,39 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
#version 450
// VK_QCOM_render_pass_shader_resolve fragment shader
// Resolves MSAA attachment to single-sample within render pass
// Requires VK_SUBPASS_DESCRIPTION_SHADER_RESOLVE_BIT_QCOM in subpass flags
// Use combined image sampler for MSAA texture instead of input attachment
// This allows us to sample MSAA textures from previous rendering
layout(set = 0, binding = 0) uniform sampler2DMS msaa_texture;
layout(location = 0) out vec4 color_output;
layout(push_constant) uniform PushConstants {
vec2 tex_scale;
vec2 tex_offset;
} push_constants;
// Custom MSAA resolve using box filter (simple average)
// Assumes 4x MSAA (can be extended with push constant for dynamic sample count)
void main() {
ivec2 coord = ivec2(gl_FragCoord.xy);
ivec2 tex_size = textureSize(msaa_texture);
// Clamp coordinates to texture bounds
coord = clamp(coord, ivec2(0), tex_size - ivec2(1));
vec4 accumulated_color = vec4(0.0);
int sample_count = 4; // Adreno typically uses 4x MSAA max
// Box filter: simple average of all MSAA samples
for (int i = 0; i < sample_count; i++) {
accumulated_color += texelFetch(msaa_texture, coord, i);
}
color_output = accumulated_color / float(sample_count);
}

28
src/video_core/renderer_vulkan/blit_image.cpp

@ -40,6 +40,7 @@
#include "video_core/host_shaders/convert_rgba16f_to_rgba8_frag_spv.h"
#include "video_core/host_shaders/dither_temporal_frag_spv.h"
#include "video_core/host_shaders/dynamic_resolution_scale_comp_spv.h"
#include "video_core/host_shaders/vulkan_qcom_msaa_resolve_frag_spv.h"
namespace Vulkan {
@ -545,6 +546,7 @@ BlitImageHelper::BlitImageHelper(const Device& device_, Scheduler& scheduler_,
convert_rgba16f_to_rgba8_frag(BuildShader(device, CONVERT_RGBA16F_TO_RGBA8_FRAG_SPV)),
dither_temporal_frag(BuildShader(device, DITHER_TEMPORAL_FRAG_SPV)),
dynamic_resolution_scale_comp(BuildShader(device, DYNAMIC_RESOLUTION_SCALE_COMP_SPV)),
qcom_msaa_resolve_frag(BuildShader(device, VULKAN_QCOM_MSAA_RESOLVE_FRAG_SPV)),
linear_sampler(device.GetLogical().CreateSampler(SAMPLER_CREATE_INFO<VK_FILTER_LINEAR>)),
nearest_sampler(device.GetLogical().CreateSampler(SAMPLER_CREATE_INFO<VK_FILTER_NEAREST>)) {}
@ -1240,4 +1242,30 @@ void BlitImageHelper::ApplyDynamicResolutionScale(const Framebuffer* dst_framebu
Convert(*dynamic_resolution_scale_pipeline, dst_framebuffer, src_image_view);
}
void BlitImageHelper::ResolveMSAAQcom(const Framebuffer* dst_framebuffer,
const ImageView& src_image_view) {
// VK_QCOM_render_pass_shader_resolve implementation
// This must be used within a render pass with VK_SUBPASS_DESCRIPTION_SHADER_RESOLVE_BIT_QCOM
ConvertPipeline(qcom_msaa_resolve_pipeline,
dst_framebuffer->RenderPass(),
false);
RecordShaderReadBarrier(scheduler, src_image_view);
scheduler.RequestRenderpass(dst_framebuffer);
const VkImageView src_view = src_image_view.Handle(Shader::TextureType::Color2D);
const VkPipelineLayout layout = *one_texture_pipeline_layout;
const VkPipeline pipeline = *qcom_msaa_resolve_pipeline;
scheduler.Record([this, src_view, layout, pipeline](vk::CommandBuffer cmdbuf) {
const VkDescriptorSet descriptor_set = one_texture_descriptor_allocator.Commit();
UpdateOneTextureDescriptorSet(device, descriptor_set, *nearest_sampler, src_view);
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set, nullptr);
cmdbuf.Draw(3, 1, 0, 0);
});
scheduler.InvalidateState();
}
} // namespace Vulkan

4
src/video_core/renderer_vulkan/blit_image.h

@ -95,6 +95,8 @@ public:
void ConvertRGBA16FtoRGBA8(const Framebuffer* dst_framebuffer, const ImageView& src_image_view);
void ApplyDitherTemporal(const Framebuffer* dst_framebuffer, const ImageView& src_image_view);
void ApplyDynamicResolutionScale(const Framebuffer* dst_framebuffer, const ImageView& src_image_view);
void ResolveMSAAQcom(const Framebuffer* dst_framebuffer, const ImageView& src_image_view);
private:
void Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer,
@ -159,6 +161,7 @@ private:
vk::ShaderModule convert_rgba16f_to_rgba8_frag;
vk::ShaderModule dither_temporal_frag;
vk::ShaderModule dynamic_resolution_scale_comp;
vk::ShaderModule qcom_msaa_resolve_frag;
vk::Sampler linear_sampler;
vk::Sampler nearest_sampler;
@ -188,6 +191,7 @@ private:
vk::Pipeline convert_rgba16f_to_rgba8_pipeline;
vk::Pipeline dither_temporal_pipeline;
vk::Pipeline dynamic_resolution_scale_pipeline;
vk::Pipeline qcom_msaa_resolve_pipeline;
};
} // namespace Vulkan

9
src/video_core/renderer_vulkan/vk_render_pass_cache.cpp

@ -156,8 +156,15 @@ VkRenderPass RenderPassCache::Get(const RenderPassKey& key) {
descriptions.push_back(AttachmentDescription(*device, key.depth_format, key.samples,
key.tbdr_will_clear, key.tbdr_discard_after));
}
VkSubpassDescriptionFlags subpass_flags = 0;
if (key.qcom_shader_resolve) {
// VK_QCOM_render_pass_shader_resolve: enables custom shader resolve in fragment shader
// This must be the last subpass in the dependency chain
subpass_flags |= 0x00000004; // VK_SUBPASS_DESCRIPTION_SHADER_RESOLVE_BIT_QCOM
}
const VkSubpassDescription subpass{
.flags = 0,
.flags = subpass_flags,
.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
.inputAttachmentCount = 0,
.pInputAttachments = nullptr,

3
src/video_core/renderer_vulkan/vk_render_pass_cache.h

@ -25,6 +25,9 @@ struct RenderPassKey {
// These flags indicate the expected usage pattern to optimize load/store operations
bool tbdr_will_clear{false}; // Attachment will be cleared with vkCmdClearAttachments
bool tbdr_discard_after{false}; // Attachment won't be read after render pass
// VK_QCOM_render_pass_shader_resolve support
bool qcom_shader_resolve{false}; // Use shader resolve instead of fixed-function (last subpass)
};
} // namespace Vulkan

45
src/video_core/renderer_vulkan/vk_texture_cache.cpp

@ -160,43 +160,39 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
};
}
/// Emergency fallback for MSAA with HDR formats: degrade to non-MSAA if driver doesn't support
/// shaderStorageImageMultisample (required for msaa_copy_pass)
/// Emergency fallback: degrade MSAA to non-MSAA for HDR formats when no resolve support exists
[[nodiscard]] ImageInfo AdjustMSAAForHDRFormats(const Device& device, ImageInfo info) {
// Only apply emergency fallback if MSAA is requested
if (info.num_samples <= 1) {
return info;
}
// Check if this is an HDR format that commonly fails with MSAA
const auto vk_format = MaxwellToVK::SurfaceFormat(device, FormatType::Optimal,
false, info.format).format;
const bool is_hdr_format = vk_format == VK_FORMAT_B10G11R11_UFLOAT_PACK32 ||
vk_format == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32;
if (!is_hdr_format) {
return info; // Not an HDR format, no adjustment needed
}
// If driver doesn't support shader storage image multisample, MSAACopyPass will fail
// Emergency fallback: degrade to non-MSAA (1 sample) to avoid texture corruption
if (!device.IsStorageImageMultisampleSupported()) {
LOG_ERROR(Render_Vulkan,
"EMERGENCY MSAA FALLBACK: Driver doesn't support shaderStorageImageMultisample. "
"Degrading HDR format {} from {}x MSAA to 1x (non-MSAA) to prevent texture corruption. "
"This will cause visual quality loss but prevents black textures.",
vk_format, info.num_samples);
// Degrade to non-MSAA
// NOTE: We only change num_samples, NOT dimensions. The ImageInfo dimensions are already
// in "logical" space (full resolution), and MakeImageCreateInfo will handle the conversion
// to physical GPU dimensions based on num_samples automatically.
info.num_samples = 1;
return info;
}
return info; // Driver supports MSAA storage images, no adjustment needed
// Qualcomm: VK_QCOM_render_pass_shader_resolve handles HDR+MSAA
if (device.GetDriverID() == VK_DRIVER_ID_QUALCOMM_PROPRIETARY) {
if (device.IsQcomRenderPassShaderResolveSupported()) {
return info;
}
}
// Other vendors: shaderStorageImageMultisample handles HDR+MSAA
if (device.IsStorageImageMultisampleSupported()) {
return info;
}
// No suitable resolve method - degrade to non-MSAA
LOG_WARNING(Render_Vulkan, "HDR format {} with MSAA not supported, degrading to 1x samples",
vk_format);
info.num_samples = 1;
return info;
}
[[nodiscard]] vk::Image MakeImage(const Device& device, const MemoryAllocator& allocator,
@ -896,6 +892,9 @@ TextureCacheRuntime::TextureCacheRuntime(const Device& device_, Scheduler& sched
astc_decoder_pass.emplace(device, scheduler, descriptor_pool, staging_buffer_pool,
compute_pass_descriptor_queue, memory_allocator);
}
// MSAA copy support via compute shader (only for non-Qualcomm with shaderStorageImageMultisample)
// Qualcomm uses VK_QCOM_render_pass_shader_resolve (fragment shader in render pass)
if (device.IsStorageImageMultisampleSupported()) {
msaa_copy_pass = std::make_unique<MSAACopyPass>(
device, scheduler, descriptor_pool, staging_buffer_pool, compute_pass_descriptor_queue);

2
src/video_core/vulkan_common/vulkan_device.cpp

@ -549,7 +549,7 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
LOG_INFO(Render_Vulkan, "Shader Float Controls FORCE ENABLED by user (Eden Veil/Extensions)");
// Log driver capabilities
const auto& fc = float_control;
const auto& fc = properties.float_controls;
LOG_INFO(Render_Vulkan, "Driver Float Controls Capabilities:");
LOG_INFO(Render_Vulkan, " - Denorm Flush FP32: {}", fc.shaderDenormFlushToZeroFloat32 ? "YES" : "NO");
LOG_INFO(Render_Vulkan, " - Denorm Preserve FP32: {}", fc.shaderDenormPreserveFloat32 ? "YES" : "NO");

16
src/video_core/vulkan_common/vulkan_device.h

@ -95,6 +95,7 @@ VK_DEFINE_HANDLE(VmaAllocator)
EXTENSION(EXT, DESCRIPTOR_INDEXING, descriptor_indexing) \
EXTENSION(EXT, FILTER_CUBIC, filter_cubic) \
EXTENSION(QCOM, FILTER_CUBIC_WEIGHTS, filter_cubic_weights) \
EXTENSION(QCOM, RENDER_PASS_SHADER_RESOLVE, render_pass_shader_resolve) \
EXTENSION(KHR, MAINTENANCE_1, maintenance1) \
EXTENSION(KHR, MAINTENANCE_2, maintenance2) \
EXTENSION(KHR, MAINTENANCE_3, maintenance3) \
@ -582,6 +583,21 @@ public:
return extensions.filter_cubic_weights;
}
/// Returns true if the device supports VK_QCOM_render_pass_shader_resolve
bool IsQcomRenderPassShaderResolveSupported() const {
return extensions.render_pass_shader_resolve;
}
/// Returns true if MSAA copy operations are supported via compute shader (upload/download)
/// Qualcomm uses render pass shader resolve instead, so this returns false for Qualcomm
bool CanUploadMSAA() const {
return IsStorageImageMultisampleSupported();
}
bool CanDownloadMSAA() const {
return CanUploadMSAA();
}
/// Returns true if the device supports VK_EXT_line_rasterization.
bool IsExtLineRasterizationSupported() const {
return extensions.line_rasterization;

Loading…
Cancel
Save