From 92fb89cbf0c18d314520275b714d8a7a3e2d18a4 Mon Sep 17 00:00:00 2001 From: lizzie Date: Sun, 9 Nov 2025 06:59:04 +0000 Subject: [PATCH] [vk] use RTE for FMZ mode; be more strict on missing FTZ-defaultness --- .../backend/spirv/emit_spirv.cpp | 72 ++++++++++++------- .../spirv/emit_spirv_context_get_set.cpp | 4 +- .../ir_opt/collect_shader_info_pass.cpp | 42 +++++------ src/shader_recompiler/profile.h | 5 ++ src/shader_recompiler/shader_info.h | 15 ++-- .../renderer_opengl/gl_shader_cache.cpp | 3 + .../renderer_vulkan/vk_pipeline_cache.cpp | 12 ++-- 7 files changed, 90 insertions(+), 63 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 313a1deb30..0be5ce0dcc 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -338,55 +338,73 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) { ctx.AddEntryPoint(execution_model, main, "main", interfaces); } -void SetupDenormControl(const Profile& profile, const IR::Program& program, EmitContext& ctx, - Id main_func) { - const Info& info{program.info}; - if (info.uses_fp32_denorms_flush && info.uses_fp32_denorms_preserve) { - LOG_DEBUG(Shader_SPIRV, "Fp32 denorm flush and preserve on the same shader"); - } else if (info.uses_fp32_denorms_flush) { +void SetupDenormControl(const Profile& profile, IR::Program const& program, EmitContext& ctx, Id main_func) { + Info const& info = program.info; + switch (info.fp32_denorm) { + case Shader::FloatDenormKind::None: + default: + break; + case Shader::FloatDenormKind::DenormFlushToZero: if (profile.support_fp32_denorm_flush) { ctx.AddCapability(spv::Capability::DenormFlushToZero); ctx.AddExecutionMode(main_func, spv::ExecutionMode::DenormFlushToZero, 32U); + } else if(!profile.uses_ftz_as_default) { + LOG_WARNING(Shader_SPIRV, "f32.ftz requested but not supported"); + } + break; + case Shader::FloatDenormKind::RoundingModeRTE: + if (profile.support_fp32_round_rte) { + ctx.AddCapability(spv::Capability::RoundingModeRTE); + ctx.AddExecutionMode(main_func, spv::ExecutionMode::RoundingModeRTE, 32U); } else { - // Drivers will most likely flush denorms by default, no need to warn + LOG_WARNING(Shader_SPIRV, "f32.rte requested but not supported"); } - } else if (info.uses_fp32_denorms_preserve) { + break; + case Shader::FloatDenormKind::DenormPreserve: if (profile.support_fp32_denorm_preserve) { ctx.AddCapability(spv::Capability::DenormPreserve); ctx.AddExecutionMode(main_func, spv::ExecutionMode::DenormPreserve, 32U); } else { - LOG_DEBUG(Shader_SPIRV, "Fp32 denorm preserve used in shader without host support"); + LOG_WARNING(Shader_SPIRV, "f32.pre requested but not supported"); } + break; } - if (!profile.support_separate_denorm_behavior || profile.has_broken_fp16_float_controls) { - // No separate denorm behavior - return; - } - if (info.uses_fp16_denorms_flush && info.uses_fp16_denorms_preserve) { - LOG_DEBUG(Shader_SPIRV, "Fp16 denorm flush and preserve on the same shader"); - } else if (info.uses_fp16_denorms_flush) { - if (profile.support_fp16_denorm_flush) { + + // No separate denorm behavior + bool can_fp16 = !(!profile.support_separate_denorm_behavior || profile.has_broken_fp16_float_controls); + switch (info.fp16_denorm) { + case Shader::FloatDenormKind::None: + default: + break; + case Shader::FloatDenormKind::DenormFlushToZero: + if (can_fp16 && profile.support_fp16_denorm_flush) { ctx.AddCapability(spv::Capability::DenormFlushToZero); ctx.AddExecutionMode(main_func, spv::ExecutionMode::DenormFlushToZero, 16U); + } else if(!profile.uses_ftz_as_default) { + LOG_WARNING(Shader_SPIRV, "f16.ftz requested but not supported"); + } + break; + case Shader::FloatDenormKind::RoundingModeRTE: + if (can_fp16 && profile.support_fp16_round_rte) { + ctx.AddCapability(spv::Capability::RoundingModeRTE); + ctx.AddExecutionMode(main_func, spv::ExecutionMode::RoundingModeRTE, 16U); } else { - // Same as fp32, no need to warn as most drivers will flush by default + LOG_WARNING(Shader_SPIRV, "f16.rte requested but not supported"); } - } else if (info.uses_fp16_denorms_preserve) { - if (profile.support_fp16_denorm_preserve) { + break; + case Shader::FloatDenormKind::DenormPreserve: + if (can_fp16 && profile.support_fp16_denorm_preserve) { ctx.AddCapability(spv::Capability::DenormPreserve); ctx.AddExecutionMode(main_func, spv::ExecutionMode::DenormPreserve, 16U); } else { - LOG_DEBUG(Shader_SPIRV, "Fp16 denorm preserve used in shader without host support"); + LOG_WARNING(Shader_SPIRV, "f16.pre requested but not supported"); } + break; } } -void SetupSignedNanCapabilities(const Profile& profile, const IR::Program& program, - EmitContext& ctx, Id main_func) { - if (profile.has_broken_fp16_float_controls && program.info.uses_fp16) { - return; - } - if (program.info.uses_fp16 && profile.support_fp16_signed_zero_nan_preserve) { +void SetupSignedNanCapabilities(const Profile& profile, const IR::Program& program, EmitContext& ctx, Id main_func) { + if (!profile.has_broken_fp16_float_controls && program.info.uses_fp16 && profile.support_fp16_signed_zero_nan_preserve) { ctx.AddCapability(spv::Capability::SignedZeroInfNanPreserve); ctx.AddExecutionMode(main_func, spv::ExecutionMode::SignedZeroInfNanPreserve, 16U); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 1de5709394..553e487514 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -76,8 +76,8 @@ std::optional OutputAttrPointer(EmitContext& ctx, IR::Attribute attr) { case IR::Attribute::ClipDistance5: case IR::Attribute::ClipDistance6: case IR::Attribute::ClipDistance7: { - const u32 base{static_cast(IR::Attribute::ClipDistance0)}; - const u32 index{static_cast(attr) - base}; + const u32 base{u32(IR::Attribute::ClipDistance0)}; + const u32 index{u32(attr) - base}; if (index >= ctx.profile.max_user_clip_distances) { LOG_WARNING(Shader, "Ignoring clip distance store {} >= {} supported", index, ctx.profile.max_user_clip_distances); diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index 2bfa3227a8..6e37593d3f 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -5,6 +5,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include "common/alignment.h" +#include "common/assert.h" #include "shader_recompiler/environment.h" #include "shader_recompiler/frontend/ir/modifiers.h" #include "shader_recompiler/frontend/ir/program.h" @@ -769,6 +770,15 @@ void VisitUsages(Info& info, IR::Inst& inst) { } } +constexpr Shader::FloatDenormKind FloatDenormModeToShaderMode(IR::FmzMode const mode) noexcept { + switch (mode) { + case IR::FmzMode::DontCare: return Shader::FloatDenormKind::None; + case IR::FmzMode::FTZ: return Shader::FloatDenormKind::DenormFlushToZero; + case IR::FmzMode::FMZ: return Shader::FloatDenormKind::RoundingModeRTE; + case IR::FmzMode::None: return Shader::FloatDenormKind::DenormPreserve; + } +} + void VisitFpModifiers(Info& info, IR::Inst& inst) { switch (inst.GetOpcode()) { case IR::Opcode::FPAdd16: @@ -778,18 +788,10 @@ void VisitFpModifiers(Info& info, IR::Inst& inst) { case IR::Opcode::FPFloor16: case IR::Opcode::FPCeil16: case IR::Opcode::FPTrunc16: { - const auto control{inst.Flags()}; - switch (control.fmz_mode) { - case IR::FmzMode::DontCare: - break; - case IR::FmzMode::FTZ: - case IR::FmzMode::FMZ: - info.uses_fp16_denorms_flush = true; - break; - case IR::FmzMode::None: - info.uses_fp16_denorms_preserve = true; - break; - } + auto const control = inst.Flags(); + auto const denorm = FloatDenormModeToShaderMode(control.fmz_mode); + ASSERT(info.fp16_denorm == FloatDenormKind::None || info.fp16_denorm == denorm); + info.fp16_denorm = denorm; break; } case IR::Opcode::FPAdd32: @@ -813,18 +815,10 @@ void VisitFpModifiers(Info& info, IR::Inst& inst) { case IR::Opcode::FPUnordGreaterThanEqual32: case IR::Opcode::ConvertF16F32: case IR::Opcode::ConvertF64F32: { - const auto control{inst.Flags()}; - switch (control.fmz_mode) { - case IR::FmzMode::DontCare: - break; - case IR::FmzMode::FTZ: - case IR::FmzMode::FMZ: - info.uses_fp32_denorms_flush = true; - break; - case IR::FmzMode::None: - info.uses_fp32_denorms_preserve = true; - break; - } + const auto control = inst.Flags(); + auto const denorm = FloatDenormModeToShaderMode(control.fmz_mode); + ASSERT(info.fp32_denorm == FloatDenormKind::None || info.fp32_denorm == denorm); + info.fp32_denorm = denorm; break; } default: diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index 90e46bb1ba..7549e59174 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -22,6 +22,8 @@ struct Profile { bool support_fp32_denorm_preserve{}; bool support_fp16_denorm_flush{}; bool support_fp32_denorm_flush{}; + bool support_fp16_round_rte{}; + bool support_fp32_round_rte{}; bool support_fp16_signed_zero_nan_preserve{}; bool support_fp32_signed_zero_nan_preserve{}; bool support_fp64_signed_zero_nan_preserve{}; @@ -46,6 +48,9 @@ struct Profile { bool support_multi_viewport{}; bool support_geometry_streams{}; + /// FTZ is default mode so no need to specify it again (QCOM) + bool uses_ftz_as_default{}; + bool warp_size_potentially_larger_than_guest{}; bool lower_left_origin_mode{}; diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index ed13e68209..ef22c84337 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h @@ -235,6 +235,15 @@ struct ImageDescriptor { }; using ImageDescriptors = boost::container::small_vector; +enum class FloatDenormKind : u32 { + None = 0, + DenormPreserve, + DenormFlushToZero, + SignedZeroInfNanPreserve, + RoundingModeRTE, + RoundingModeRTZ +}; + struct Info { static constexpr size_t MAX_INDIRECT_CBUFS{14}; static constexpr size_t MAX_CBUFS{18}; @@ -273,10 +282,8 @@ struct Info { bool uses_fp16{}; bool uses_fp64{}; - bool uses_fp16_denorms_flush{}; - bool uses_fp16_denorms_preserve{}; - bool uses_fp32_denorms_flush{}; - bool uses_fp32_denorms_preserve{}; + FloatDenormKind fp16_denorm{}; + FloatDenormKind fp32_denorm{}; bool uses_int8{}; bool uses_int16{}; bool uses_int64{}; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 45f729698e..851a462aeb 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -198,6 +198,8 @@ ShaderCache::ShaderCache(Tegra::MaxwellDeviceMemoryManager& device_memory_, .support_fp32_denorm_preserve = false, .support_fp16_denorm_flush = false, .support_fp32_denorm_flush = false, + .support_fp16_round_rte = false, + .support_fp32_round_rte = false, .support_fp16_signed_zero_nan_preserve = false, .support_fp32_signed_zero_nan_preserve = false, .support_fp64_signed_zero_nan_preserve = false, @@ -221,6 +223,7 @@ ShaderCache::ShaderCache(Tegra::MaxwellDeviceMemoryManager& device_memory_, .support_gl_derivative_control = device.HasDerivativeControl(), .support_geometry_streams = true, + .uses_ftz_as_default = false, .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyLargerThanGuest(), .lower_left_origin_mode = true, diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index f75398da65..b82a7ed805 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -335,12 +335,11 @@ PipelineCache::PipelineCache(Tegra::MaxwellDeviceMemoryManager& device_memory_, .support_fp32_denorm_preserve = float_control.shaderDenormPreserveFloat32 != VK_FALSE, .support_fp16_denorm_flush = float_control.shaderDenormFlushToZeroFloat16 != VK_FALSE, .support_fp32_denorm_flush = float_control.shaderDenormFlushToZeroFloat32 != VK_FALSE, - .support_fp16_signed_zero_nan_preserve = - float_control.shaderSignedZeroInfNanPreserveFloat16 != VK_FALSE, - .support_fp32_signed_zero_nan_preserve = - float_control.shaderSignedZeroInfNanPreserveFloat32 != VK_FALSE, - .support_fp64_signed_zero_nan_preserve = - float_control.shaderSignedZeroInfNanPreserveFloat64 != VK_FALSE, + .support_fp16_round_rte = float_control.shaderRoundingModeRTEFloat16 != VK_FALSE, + .support_fp32_round_rte = float_control.shaderRoundingModeRTEFloat32 != VK_FALSE, + .support_fp16_signed_zero_nan_preserve = float_control.shaderSignedZeroInfNanPreserveFloat16 != VK_FALSE, + .support_fp32_signed_zero_nan_preserve = float_control.shaderSignedZeroInfNanPreserveFloat32 != VK_FALSE, + .support_fp64_signed_zero_nan_preserve = float_control.shaderSignedZeroInfNanPreserveFloat64 != VK_FALSE, .support_explicit_workgroup_layout = device.IsKhrWorkgroupMemoryExplicitLayoutSupported(), .support_vote = device.IsSubgroupFeatureSupported(VK_SUBGROUP_FEATURE_VOTE_BIT), .support_viewport_index_layer_non_geometry = @@ -357,6 +356,7 @@ PipelineCache::PipelineCache(Tegra::MaxwellDeviceMemoryManager& device_memory_, .support_multi_viewport = device.SupportsMultiViewport(), .support_geometry_streams = device.AreTransformFeedbackGeometryStreamsSupported(), + .uses_ftz_as_default = driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY, .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyBiggerThanGuest(), .lower_left_origin_mode = false,