From 4bbde3e5ecb03db5d0abfd9157f527d749cefbab Mon Sep 17 00:00:00 2001 From: CamilleLaVey Date: Wed, 5 Nov 2025 13:59:47 -0400 Subject: [PATCH] Improve float operations for ARM and Qualcomm drivers and other extensions workarounds --- .../renderer_vulkan/maxwell_to_vk.cpp | 7 +++- .../renderer_vulkan/vk_pipeline_cache.cpp | 36 +++++++++++-------- .../vulkan_common/vulkan_device.cpp | 36 ++++++++++++++++--- src/video_core/vulkan_common/vulkan_device.h | 5 +++ 4 files changed, 65 insertions(+), 19 deletions(-) diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp index a7a878f18c..1d27f454dd 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp @@ -77,7 +77,12 @@ VkSamplerAddressMode WrapMode(const Device& device, Tegra::Texture::WrapMode wra ASSERT(false); return VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; case Tegra::Texture::WrapMode::MirrorOnceClampToEdge: - return VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE; + if (device.IsKhrSamplerMirrorClampToEdgeSupported()) { + return VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE; + } + // Fallback when the sampler mirror clamp extension isn't present. + // Use CLAMP_TO_EDGE as the safest approximation. + return VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; case Tegra::Texture::WrapMode::MirrorOnceBorder: UNIMPLEMENTED(); return VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 10ee14773f..fbcad44d5f 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -327,20 +327,28 @@ PipelineCache::PipelineCache(Tegra::MaxwellDeviceMemoryManager& device_memory_, .support_int64 = device.IsShaderInt64Supported(), .support_vertex_instance_id = false, .support_float_controls = device.IsKhrShaderFloatControlsSupported(), - .support_separate_denorm_behavior = - float_control.denormBehaviorIndependence == VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL, - .support_separate_rounding_mode = - float_control.roundingModeIndependence == VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL, - .support_fp16_denorm_preserve = float_control.shaderDenormPreserveFloat16 != VK_FALSE, - .support_fp32_denorm_preserve = float_control.shaderDenormPreserveFloat32 != VK_FALSE, - .support_fp16_denorm_flush = float_control.shaderDenormFlushToZeroFloat16 != VK_FALSE, - .support_fp32_denorm_flush = float_control.shaderDenormFlushToZeroFloat32 != VK_FALSE, - .support_fp16_signed_zero_nan_preserve = - float_control.shaderSignedZeroInfNanPreserveFloat16 != VK_FALSE, - .support_fp32_signed_zero_nan_preserve = - float_control.shaderSignedZeroInfNanPreserveFloat32 != VK_FALSE, - .support_fp64_signed_zero_nan_preserve = - float_control.shaderSignedZeroInfNanPreserveFloat64 != VK_FALSE, + // Only enable per-size float control capabilities when the KHR_shader_float_controls + // extension is actually enabled on the device and the driver reports explicit support + // for the individual properties. This avoids enabling functionality when the extension + // was removed due to driver workarounds. + .support_separate_denorm_behavior = device.IsKhrShaderFloatControlsSupported() && + (float_control.denormBehaviorIndependence == VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL), + .support_separate_rounding_mode = device.IsKhrShaderFloatControlsSupported() && + (float_control.roundingModeIndependence == VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL), + .support_fp16_denorm_preserve = device.IsKhrShaderFloatControlsSupported() && + (float_control.shaderDenormPreserveFloat16 == VK_TRUE), + .support_fp32_denorm_preserve = device.IsKhrShaderFloatControlsSupported() && + (float_control.shaderDenormPreserveFloat32 == VK_TRUE), + .support_fp16_denorm_flush = device.IsKhrShaderFloatControlsSupported() && + (float_control.shaderDenormFlushToZeroFloat16 == VK_TRUE), + .support_fp32_denorm_flush = device.IsKhrShaderFloatControlsSupported() && + (float_control.shaderDenormFlushToZeroFloat32 == VK_TRUE), + .support_fp16_signed_zero_nan_preserve = device.IsKhrShaderFloatControlsSupported() && + (float_control.shaderSignedZeroInfNanPreserveFloat16 == VK_TRUE), + .support_fp32_signed_zero_nan_preserve = device.IsKhrShaderFloatControlsSupported() && + (float_control.shaderSignedZeroInfNanPreserveFloat32 == VK_TRUE), + .support_fp64_signed_zero_nan_preserve = device.IsKhrShaderFloatControlsSupported() && + (float_control.shaderSignedZeroInfNanPreserveFloat64 == VK_TRUE), .support_explicit_workgroup_layout = device.IsKhrWorkgroupMemoryExplicitLayoutSupported(), .support_vote = device.IsSubgroupFeatureSupported(VK_SUBGROUP_FEATURE_VOTE_BIT), .support_viewport_index_layer_non_geometry = diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 170627ea1b..8dbdc8f241 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -507,7 +507,7 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME); } - if (is_qualcomm) { + if (is_qualcomm || is_arm) { if (!force_extensions) { LOG_WARNING(Render_Vulkan, "Qualcomm drivers have a slow VK_KHR_push_descriptor implementation"); @@ -515,7 +515,7 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR } LOG_WARNING(Render_Vulkan, - "Disabling shader float controls and 64-bit integer features on Qualcomm proprietary drivers"); + "Disabling shader float controls and 64-bit integer features on Qualcomm and ARM Mali proprietary drivers"); RemoveExtension(extensions.shader_float_controls, VK_KHR_SHADER_FLOAT_CONTROLS_EXTENSION_NAME); RemoveExtensionFeature(extensions.shader_atomic_int64, features.shader_atomic_int64, VK_KHR_SHADER_ATOMIC_INT64_EXTENSION_NAME); @@ -585,13 +585,14 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR VK_EXT_EXTENDED_DYNAMIC_STATE_2_EXTENSION_NAME); } } - if (extensions.extended_dynamic_state2 && is_qualcomm) { + if (extensions.extended_dynamic_state2 && is_qualcomm || is_arm) { const u32 version = (properties.properties.driverVersion << 3) >> 3; if (version >= VK_MAKE_API_VERSION(0, 0, 676, 0) && version < VK_MAKE_API_VERSION(0, 0, 680, 0) && !force_extensions) { + // Arm Mali Inmortalis drivers have broken extendedDynamicState2LogicOp. // Qualcomm Adreno 7xx drivers do not properly support extended_dynamic_state2. LOG_WARNING(Render_Vulkan, - "Qualcomm Adreno 7xx drivers have broken VK_EXT_extended_dynamic_state2"); + "Qualcomm Adreno 7xx and Arm Mali Inmortalis drivers have broken VK_EXT_extended_dynamic_state2"); RemoveExtensionFeature(extensions.extended_dynamic_state2, features.extended_dynamic_state2, VK_EXT_EXTENDED_DYNAMIC_STATE_2_EXTENSION_NAME); @@ -1173,9 +1174,36 @@ bool Device::GetSuitability(bool requires_swapchain) { // Store base properties properties.properties = properties2.properties; + // Diagnostic logging for shader float controls on Qualcomm/ARM drivers. + // Print the reported per-float-size properties so we can debug denorm/flush issues. + { + const auto driver_id = properties.driver.driverID; + if (driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY || driver_id == VK_DRIVER_ID_ARM_PROPRIETARY) { + const auto& fc = properties.float_controls; + LOG_INFO(Render_Vulkan, + "Driver '{}' id={} reports VK_KHR_shader_float_controls extension present={} -- " + "denormPreserveF16={} denormPreserveF32={} flushToZeroF16={} flushToZeroF32={} " + "denormBehaviorIndependence={} roundingModeIndependence={}", + properties.driver.driverName, driver_id, extensions.shader_float_controls, + (fc.shaderDenormPreserveFloat16 == VK_TRUE), (fc.shaderDenormPreserveFloat32 == VK_TRUE), + (fc.shaderDenormFlushToZeroFloat16 == VK_TRUE), (fc.shaderDenormFlushToZeroFloat32 == VK_TRUE), + fc.denormBehaviorIndependence, fc.roundingModeIndependence); + } + } + // Unload extensions if feature support is insufficient. RemoveUnsuitableExtensions(); + // Log final state of shader float controls extension on Qualcomm/ARM for diagnostics. + { + const auto driver_id = properties.driver.driverID; + if (driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY || driver_id == VK_DRIVER_ID_ARM_PROPRIETARY) { + LOG_INFO(Render_Vulkan, + "Final shader float controls extension enabled={} after suitability checks for driver '{}' id={}", + extensions.shader_float_controls, properties.driver.driverName, driver_id); + } + } + // Check limits. struct Limit { u32 minimum; diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index cb13f28523..98059c5707 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -445,6 +445,11 @@ public: return extensions.shader_float_controls; } + /// Returns true if VK_KHR_sampler_mirror_clamp_to_edge is enabled. + bool IsKhrSamplerMirrorClampToEdgeSupported() const { + return extensions.sampler_mirror_clamp_to_edge; + } + /// Returns true if the device supports VK_KHR_workgroup_memory_explicit_layout. bool IsKhrWorkgroupMemoryExplicitLayoutSupported() const { return extensions.workgroup_memory_explicit_layout;