Browse Source

Added FTZ optimizations and adjust pipeline float control handling for Qualcomm drivers

flatopsfixes23485
CamilleLaVey 2 months ago
parent
commit
1fc9c3f6ff
  1. 4
      src/common/settings.h
  2. 24
      src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp
  3. 6
      src/video_core/renderer_vulkan/vk_graphics_pipeline.h
  4. 30
      src/video_core/renderer_vulkan/vk_pipeline_cache.cpp

4
src/common/settings.h

@ -547,6 +547,10 @@ struct Values {
Specialization::Scalar}; Specialization::Scalar};
SwitchableSetting<bool> force_unsupported_extensions{linkage, false, "force_unsupported_extensions", Category::RendererExtensions}; SwitchableSetting<bool> force_unsupported_extensions{linkage, false, "force_unsupported_extensions", Category::RendererExtensions};
// Developer toggle to enable FTZ (flush-to-zero) optimizations for testing on
// Qualcomm devices. Default false for safety; when true, pipeline code may
// emit FTZ-friendly SPIR-V and allow fast-math transforms for Adreno GPUs.
SwitchableSetting<bool> enable_ftz{linkage, false, "enable_ftz", Category::RendererExtensions};
SwitchableSetting<bool> provoking_vertex{linkage, false, "provoking_vertex", Category::RendererExtensions}; SwitchableSetting<bool> provoking_vertex{linkage, false, "provoking_vertex", Category::RendererExtensions};
SwitchableSetting<bool> descriptor_indexing{linkage, false, "descriptor_indexing", Category::RendererExtensions}; SwitchableSetting<bool> descriptor_indexing{linkage, false, "descriptor_indexing", Category::RendererExtensions};
SwitchableSetting<bool> sample_shading{linkage, false, "sample_shading", Category::RendererExtensions, Specialization::Paired}; SwitchableSetting<bool> sample_shading{linkage, false, "sample_shading", Category::RendererExtensions, Specialization::Paired};

24
src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp

@ -73,15 +73,33 @@ Id EmitFPAdd64(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
} }
Id EmitFPFma16(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c) { Id EmitFPFma16(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c) {
return Decorate(ctx, inst, ctx.OpFma(ctx.F16[1], a, b, c));
// Use OpFma only when fast-math is allowed. Some drivers/targets require
// conservative separate mul+add semantics; when need_fastmath_off is true
// we emit separate FMul + FAdd to avoid relying on fused behavior.
if (!ctx.profile.need_fastmath_off) {
return Decorate(ctx, inst, ctx.OpFma(ctx.F16[1], a, b, c));
} else {
const Id mul = ctx.OpFMul(ctx.F16[1], a, b);
return Decorate(ctx, inst, ctx.OpFAdd(ctx.F16[1], mul, c));
}
} }
Id EmitFPFma32(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c) { Id EmitFPFma32(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c) {
return Decorate(ctx, inst, ctx.OpFma(ctx.F32[1], a, b, c));
if (!ctx.profile.need_fastmath_off) {
return Decorate(ctx, inst, ctx.OpFma(ctx.F32[1], a, b, c));
} else {
const Id mul = ctx.OpFMul(ctx.F32[1], a, b);
return Decorate(ctx, inst, ctx.OpFAdd(ctx.F32[1], mul, c));
}
} }
Id EmitFPFma64(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c) { Id EmitFPFma64(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c) {
return Decorate(ctx, inst, ctx.OpFma(ctx.F64[1], a, b, c));
if (!ctx.profile.need_fastmath_off) {
return Decorate(ctx, inst, ctx.OpFma(ctx.F64[1], a, b, c));
} else {
const Id mul = ctx.OpFMul(ctx.F64[1], a, b);
return Decorate(ctx, inst, ctx.OpFAdd(ctx.F64[1], mul, c));
}
} }
Id EmitFPMax32(EmitContext& ctx, Id a, Id b) { Id EmitFPMax32(EmitContext& ctx, Id a, Id b) {

6
src/video_core/renderer_vulkan/vk_graphics_pipeline.h

@ -31,6 +31,10 @@ namespace Vulkan {
struct GraphicsPipelineCacheKey { struct GraphicsPipelineCacheKey {
std::array<u64, 6> unique_hashes; std::array<u64, 6> unique_hashes;
FixedPipelineState state; FixedPipelineState state;
// Per-pipeline float control choices (selected at pipeline key time).
// 0 = disabled, 1 = enabled
uint8_t use_ftz_f32{};
uint8_t use_ftz_f16{};
size_t Hash() const noexcept; size_t Hash() const noexcept;
@ -41,7 +45,7 @@ struct GraphicsPipelineCacheKey {
} }
size_t Size() const noexcept { size_t Size() const noexcept {
return sizeof(unique_hashes) + state.Size();
return sizeof(unique_hashes) + state.Size() + sizeof(use_ftz_f32) + sizeof(use_ftz_f16);
} }
}; };
static_assert(std::has_unique_object_representations_v<GraphicsPipelineCacheKey>); static_assert(std::has_unique_object_representations_v<GraphicsPipelineCacheKey>);

30
src/video_core/renderer_vulkan/vk_pipeline_cache.cpp

@ -446,6 +446,26 @@ GraphicsPipeline* PipelineCache::CurrentGraphicsPipeline() {
} }
graphics_key.state.Refresh(*maxwell3d, dynamic_features); graphics_key.state.Refresh(*maxwell3d, dynamic_features);
// Decide per-pipeline FTZ (flush-to-zero) usage based on device float-controls
// properties and a dedicated enable_ftz developer toggle. FTZ is gated to
// Qualcomm drivers for initial testing to avoid widespread regressions.
const bool enable_ftz_setting = Settings::values.enable_ftz.GetValue();
const auto& float_control = device.FloatControlProperties();
const bool has_khr_float_controls = device.IsKhrShaderFloatControlsSupported();
const bool denorm_indep_all = float_control.denormBehaviorIndependence == VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL;
const bool denorm_indep_32 = denorm_indep_all || float_control.denormBehaviorIndependence == VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_32_BIT_ONLY;
const bool is_qualcomm = device.GetDriverID() == VK_DRIVER_ID_QUALCOMM_PROPRIETARY;
const bool allow_ftz = enable_ftz_setting && is_qualcomm;
graphics_key.use_ftz_f32 = (has_khr_float_controls && (float_control.shaderDenormFlushToZeroFloat32 == VK_TRUE) && denorm_indep_32 && allow_ftz) ? 1 : 0;
graphics_key.use_ftz_f16 = (has_khr_float_controls && (float_control.shaderDenormFlushToZeroFloat16 == VK_TRUE) && denorm_indep_all && allow_ftz) ? 1 : 0;
if (allow_ftz && (graphics_key.use_ftz_f32 || graphics_key.use_ftz_f16)) {
LOG_INFO(Render_Vulkan, "Enabling per-pipeline FTZ (fast-math) for Qualcomm device: f32={} f16={}",
graphics_key.use_ftz_f32, graphics_key.use_ftz_f16);
}
if (current_pipeline) { if (current_pipeline) {
GraphicsPipeline* const next{current_pipeline->Next(graphics_key)}; GraphicsPipeline* const next{current_pipeline->Next(graphics_key)};
if (next) { if (next) {
@ -690,7 +710,15 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline(
const auto runtime_info{MakeRuntimeInfo(programs, key, program, previous_stage)}; const auto runtime_info{MakeRuntimeInfo(programs, key, program, previous_stage)};
ConvertLegacyToGeneric(program, runtime_info); ConvertLegacyToGeneric(program, runtime_info);
const std::vector<u32> code{EmitSPIRV(profile, runtime_info, program, binding, this->optimize_spirv_output)};
// Forward a pipeline-specific profile to the SPIR-V emitter so it can
// enable/disable fast-math (FTZ) optimizations per-pipeline. We use the
// GraphicsPipelineCacheKey's FTZ choice to decide whether to allow
// fast-math. When fast-math is enabled we keep need_fastmath_off=false
// (allow optimizations); otherwise we set it to true to prevent unsafe
// transformations.
Shader::Profile emit_profile = profile;
emit_profile.need_fastmath_off = (key.use_ftz_f32 == 0);
const std::vector<u32> code{EmitSPIRV(emit_profile, runtime_info, program, binding, this->optimize_spirv_output)};
device.SaveShader(code); device.SaveShader(code);
modules[stage_index] = BuildShader(device, code); modules[stage_index] = BuildShader(device, code);
if (device.HasDebuggingToolAttached()) { if (device.HasDebuggingToolAttached()) {

Loading…
Cancel
Save