|
|
|
@ -1525,6 +1525,23 @@ void TextureCacheRuntime::CopyImage(Image& dst, Image& src, |
|
|
|
void TextureCacheRuntime::CopyImageMSAA(Image& dst, Image& src, |
|
|
|
std::span<const VideoCommon::ImageCopy> copies) { |
|
|
|
const bool msaa_to_non_msaa = src.info.num_samples > 1 && dst.info.num_samples == 1; |
|
|
|
|
|
|
|
// Use VK_QCOM_render_pass_shader_resolve for HDR formats on Qualcomm
|
|
|
|
// This is more efficient than compute shader (stays on-chip in TBDR)
|
|
|
|
const bool is_hdr_format = src.info.format == PixelFormat::B10G11R11_FLOAT || |
|
|
|
dst.info.format == PixelFormat::B10G11R11_FLOAT; |
|
|
|
const bool use_qcom_resolve = msaa_to_non_msaa && |
|
|
|
device.IsQcomRenderPassShaderResolveSupported() && |
|
|
|
is_hdr_format && |
|
|
|
copies.size() == 1; // QCOM resolve works best with single full copy
|
|
|
|
|
|
|
|
if (use_qcom_resolve) { |
|
|
|
// Create temporary framebuffer with resolve target
|
|
|
|
// TODO Camille: Implement QCOM shader resolve path with proper framebuffer setup
|
|
|
|
// For now, fall through to standard path
|
|
|
|
LOG_DEBUG(Render_Vulkan, "QCOM shader resolve opportunity detected but not yet implemented"); |
|
|
|
} |
|
|
|
|
|
|
|
if (msaa_copy_pass) { |
|
|
|
return msaa_copy_pass->CopyImage(dst, src, copies, msaa_to_non_msaa); |
|
|
|
} |
|
|
|
@ -2391,6 +2408,26 @@ void Framebuffer::CreateFramebuffer(TextureCacheRuntime& runtime, |
|
|
|
} |
|
|
|
renderpass_key.samples = samples; |
|
|
|
|
|
|
|
// Enable VK_QCOM_render_pass_shader_resolve for HDR+MSAA on Qualcomm
|
|
|
|
// This performs MSAA resolve using fragment shader IN the render pass (on-chip)
|
|
|
|
// Benefits: ~70% bandwidth reduction, better performance on TBDR architectures
|
|
|
|
// Requirements: pResolveAttachments configured + explicit shader execution
|
|
|
|
if (samples > VK_SAMPLE_COUNT_1_BIT && runtime.device.IsQcomRenderPassShaderResolveSupported()) { |
|
|
|
// Check if any color attachment is HDR format that benefits from shader resolve
|
|
|
|
bool has_hdr_attachment = false; |
|
|
|
for (size_t index = 0; index < NUM_RT && !has_hdr_attachment; ++index) { |
|
|
|
const auto format = renderpass_key.color_formats[index]; |
|
|
|
// B10G11R11_FLOAT benefits most: compute shader limited, fixed-function slower
|
|
|
|
if (format == PixelFormat::B10G11R11_FLOAT) { |
|
|
|
has_hdr_attachment = true; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
if (has_hdr_attachment) { |
|
|
|
renderpass_key.qcom_shader_resolve = true; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
renderpass = runtime.render_pass_cache.Get(renderpass_key); |
|
|
|
render_area.width = (std::min)(render_area.width, width); |
|
|
|
render_area.height = (std::min)(render_area.height, height); |
|
|
|
|