Browse Source

[vk] use RTE for FMZ mode; be more strict on missing FTZ-defaultness

qcom-weird-vk-ftz
lizzie 1 month ago
committed by crueter
parent
commit
92fb89cbf0
  1. 72
      src/shader_recompiler/backend/spirv/emit_spirv.cpp
  2. 4
      src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
  3. 42
      src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
  4. 5
      src/shader_recompiler/profile.h
  5. 15
      src/shader_recompiler/shader_info.h
  6. 3
      src/video_core/renderer_opengl/gl_shader_cache.cpp
  7. 12
      src/video_core/renderer_vulkan/vk_pipeline_cache.cpp

72
src/shader_recompiler/backend/spirv/emit_spirv.cpp

@ -338,55 +338,73 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) {
ctx.AddEntryPoint(execution_model, main, "main", interfaces);
}
void SetupDenormControl(const Profile& profile, const IR::Program& program, EmitContext& ctx,
Id main_func) {
const Info& info{program.info};
if (info.uses_fp32_denorms_flush && info.uses_fp32_denorms_preserve) {
LOG_DEBUG(Shader_SPIRV, "Fp32 denorm flush and preserve on the same shader");
} else if (info.uses_fp32_denorms_flush) {
void SetupDenormControl(const Profile& profile, IR::Program const& program, EmitContext& ctx, Id main_func) {
Info const& info = program.info;
switch (info.fp32_denorm) {
case Shader::FloatDenormKind::None:
default:
break;
case Shader::FloatDenormKind::DenormFlushToZero:
if (profile.support_fp32_denorm_flush) {
ctx.AddCapability(spv::Capability::DenormFlushToZero);
ctx.AddExecutionMode(main_func, spv::ExecutionMode::DenormFlushToZero, 32U);
} else if(!profile.uses_ftz_as_default) {
LOG_WARNING(Shader_SPIRV, "f32.ftz requested but not supported");
}
break;
case Shader::FloatDenormKind::RoundingModeRTE:
if (profile.support_fp32_round_rte) {
ctx.AddCapability(spv::Capability::RoundingModeRTE);
ctx.AddExecutionMode(main_func, spv::ExecutionMode::RoundingModeRTE, 32U);
} else {
// Drivers will most likely flush denorms by default, no need to warn
LOG_WARNING(Shader_SPIRV, "f32.rte requested but not supported");
}
} else if (info.uses_fp32_denorms_preserve) {
break;
case Shader::FloatDenormKind::DenormPreserve:
if (profile.support_fp32_denorm_preserve) {
ctx.AddCapability(spv::Capability::DenormPreserve);
ctx.AddExecutionMode(main_func, spv::ExecutionMode::DenormPreserve, 32U);
} else {
LOG_DEBUG(Shader_SPIRV, "Fp32 denorm preserve used in shader without host support");
LOG_WARNING(Shader_SPIRV, "f32.pre requested but not supported");
}
break;
}
if (!profile.support_separate_denorm_behavior || profile.has_broken_fp16_float_controls) {
// No separate denorm behavior
return;
}
if (info.uses_fp16_denorms_flush && info.uses_fp16_denorms_preserve) {
LOG_DEBUG(Shader_SPIRV, "Fp16 denorm flush and preserve on the same shader");
} else if (info.uses_fp16_denorms_flush) {
if (profile.support_fp16_denorm_flush) {
// No separate denorm behavior
bool can_fp16 = !(!profile.support_separate_denorm_behavior || profile.has_broken_fp16_float_controls);
switch (info.fp16_denorm) {
case Shader::FloatDenormKind::None:
default:
break;
case Shader::FloatDenormKind::DenormFlushToZero:
if (can_fp16 && profile.support_fp16_denorm_flush) {
ctx.AddCapability(spv::Capability::DenormFlushToZero);
ctx.AddExecutionMode(main_func, spv::ExecutionMode::DenormFlushToZero, 16U);
} else if(!profile.uses_ftz_as_default) {
LOG_WARNING(Shader_SPIRV, "f16.ftz requested but not supported");
}
break;
case Shader::FloatDenormKind::RoundingModeRTE:
if (can_fp16 && profile.support_fp16_round_rte) {
ctx.AddCapability(spv::Capability::RoundingModeRTE);
ctx.AddExecutionMode(main_func, spv::ExecutionMode::RoundingModeRTE, 16U);
} else {
// Same as fp32, no need to warn as most drivers will flush by default
LOG_WARNING(Shader_SPIRV, "f16.rte requested but not supported");
}
} else if (info.uses_fp16_denorms_preserve) {
if (profile.support_fp16_denorm_preserve) {
break;
case Shader::FloatDenormKind::DenormPreserve:
if (can_fp16 && profile.support_fp16_denorm_preserve) {
ctx.AddCapability(spv::Capability::DenormPreserve);
ctx.AddExecutionMode(main_func, spv::ExecutionMode::DenormPreserve, 16U);
} else {
LOG_DEBUG(Shader_SPIRV, "Fp16 denorm preserve used in shader without host support");
LOG_WARNING(Shader_SPIRV, "f16.pre requested but not supported");
}
break;
}
}
void SetupSignedNanCapabilities(const Profile& profile, const IR::Program& program,
EmitContext& ctx, Id main_func) {
if (profile.has_broken_fp16_float_controls && program.info.uses_fp16) {
return;
}
if (program.info.uses_fp16 && profile.support_fp16_signed_zero_nan_preserve) {
void SetupSignedNanCapabilities(const Profile& profile, const IR::Program& program, EmitContext& ctx, Id main_func) {
if (!profile.has_broken_fp16_float_controls && program.info.uses_fp16 && profile.support_fp16_signed_zero_nan_preserve) {
ctx.AddCapability(spv::Capability::SignedZeroInfNanPreserve);
ctx.AddExecutionMode(main_func, spv::ExecutionMode::SignedZeroInfNanPreserve, 16U);
}

4
src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp

@ -76,8 +76,8 @@ std::optional<OutAttr> OutputAttrPointer(EmitContext& ctx, IR::Attribute attr) {
case IR::Attribute::ClipDistance5:
case IR::Attribute::ClipDistance6:
case IR::Attribute::ClipDistance7: {
const u32 base{static_cast<u32>(IR::Attribute::ClipDistance0)};
const u32 index{static_cast<u32>(attr) - base};
const u32 base{u32(IR::Attribute::ClipDistance0)};
const u32 index{u32(attr) - base};
if (index >= ctx.profile.max_user_clip_distances) {
LOG_WARNING(Shader, "Ignoring clip distance store {} >= {} supported", index,
ctx.profile.max_user_clip_distances);

42
src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp

@ -5,6 +5,7 @@
// SPDX-License-Identifier: GPL-2.0-or-later
#include "common/alignment.h"
#include "common/assert.h"
#include "shader_recompiler/environment.h"
#include "shader_recompiler/frontend/ir/modifiers.h"
#include "shader_recompiler/frontend/ir/program.h"
@ -769,6 +770,15 @@ void VisitUsages(Info& info, IR::Inst& inst) {
}
}
constexpr Shader::FloatDenormKind FloatDenormModeToShaderMode(IR::FmzMode const mode) noexcept {
switch (mode) {
case IR::FmzMode::DontCare: return Shader::FloatDenormKind::None;
case IR::FmzMode::FTZ: return Shader::FloatDenormKind::DenormFlushToZero;
case IR::FmzMode::FMZ: return Shader::FloatDenormKind::RoundingModeRTE;
case IR::FmzMode::None: return Shader::FloatDenormKind::DenormPreserve;
}
}
void VisitFpModifiers(Info& info, IR::Inst& inst) {
switch (inst.GetOpcode()) {
case IR::Opcode::FPAdd16:
@ -778,18 +788,10 @@ void VisitFpModifiers(Info& info, IR::Inst& inst) {
case IR::Opcode::FPFloor16:
case IR::Opcode::FPCeil16:
case IR::Opcode::FPTrunc16: {
const auto control{inst.Flags<IR::FpControl>()};
switch (control.fmz_mode) {
case IR::FmzMode::DontCare:
break;
case IR::FmzMode::FTZ:
case IR::FmzMode::FMZ:
info.uses_fp16_denorms_flush = true;
break;
case IR::FmzMode::None:
info.uses_fp16_denorms_preserve = true;
break;
}
auto const control = inst.Flags<IR::FpControl>();
auto const denorm = FloatDenormModeToShaderMode(control.fmz_mode);
ASSERT(info.fp16_denorm == FloatDenormKind::None || info.fp16_denorm == denorm);
info.fp16_denorm = denorm;
break;
}
case IR::Opcode::FPAdd32:
@ -813,18 +815,10 @@ void VisitFpModifiers(Info& info, IR::Inst& inst) {
case IR::Opcode::FPUnordGreaterThanEqual32:
case IR::Opcode::ConvertF16F32:
case IR::Opcode::ConvertF64F32: {
const auto control{inst.Flags<IR::FpControl>()};
switch (control.fmz_mode) {
case IR::FmzMode::DontCare:
break;
case IR::FmzMode::FTZ:
case IR::FmzMode::FMZ:
info.uses_fp32_denorms_flush = true;
break;
case IR::FmzMode::None:
info.uses_fp32_denorms_preserve = true;
break;
}
const auto control = inst.Flags<IR::FpControl>();
auto const denorm = FloatDenormModeToShaderMode(control.fmz_mode);
ASSERT(info.fp32_denorm == FloatDenormKind::None || info.fp32_denorm == denorm);
info.fp32_denorm = denorm;
break;
}
default:

5
src/shader_recompiler/profile.h

@ -22,6 +22,8 @@ struct Profile {
bool support_fp32_denorm_preserve{};
bool support_fp16_denorm_flush{};
bool support_fp32_denorm_flush{};
bool support_fp16_round_rte{};
bool support_fp32_round_rte{};
bool support_fp16_signed_zero_nan_preserve{};
bool support_fp32_signed_zero_nan_preserve{};
bool support_fp64_signed_zero_nan_preserve{};
@ -46,6 +48,9 @@ struct Profile {
bool support_multi_viewport{};
bool support_geometry_streams{};
/// FTZ is default mode so no need to specify it again (QCOM)
bool uses_ftz_as_default{};
bool warp_size_potentially_larger_than_guest{};
bool lower_left_origin_mode{};

15
src/shader_recompiler/shader_info.h

@ -235,6 +235,15 @@ struct ImageDescriptor {
};
using ImageDescriptors = boost::container::small_vector<ImageDescriptor, 4>;
enum class FloatDenormKind : u32 {
None = 0,
DenormPreserve,
DenormFlushToZero,
SignedZeroInfNanPreserve,
RoundingModeRTE,
RoundingModeRTZ
};
struct Info {
static constexpr size_t MAX_INDIRECT_CBUFS{14};
static constexpr size_t MAX_CBUFS{18};
@ -273,10 +282,8 @@ struct Info {
bool uses_fp16{};
bool uses_fp64{};
bool uses_fp16_denorms_flush{};
bool uses_fp16_denorms_preserve{};
bool uses_fp32_denorms_flush{};
bool uses_fp32_denorms_preserve{};
FloatDenormKind fp16_denorm{};
FloatDenormKind fp32_denorm{};
bool uses_int8{};
bool uses_int16{};
bool uses_int64{};

3
src/video_core/renderer_opengl/gl_shader_cache.cpp

@ -198,6 +198,8 @@ ShaderCache::ShaderCache(Tegra::MaxwellDeviceMemoryManager& device_memory_,
.support_fp32_denorm_preserve = false,
.support_fp16_denorm_flush = false,
.support_fp32_denorm_flush = false,
.support_fp16_round_rte = false,
.support_fp32_round_rte = false,
.support_fp16_signed_zero_nan_preserve = false,
.support_fp32_signed_zero_nan_preserve = false,
.support_fp64_signed_zero_nan_preserve = false,
@ -221,6 +223,7 @@ ShaderCache::ShaderCache(Tegra::MaxwellDeviceMemoryManager& device_memory_,
.support_gl_derivative_control = device.HasDerivativeControl(),
.support_geometry_streams = true,
.uses_ftz_as_default = false,
.warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyLargerThanGuest(),
.lower_left_origin_mode = true,

12
src/video_core/renderer_vulkan/vk_pipeline_cache.cpp

@ -335,12 +335,11 @@ PipelineCache::PipelineCache(Tegra::MaxwellDeviceMemoryManager& device_memory_,
.support_fp32_denorm_preserve = float_control.shaderDenormPreserveFloat32 != VK_FALSE,
.support_fp16_denorm_flush = float_control.shaderDenormFlushToZeroFloat16 != VK_FALSE,
.support_fp32_denorm_flush = float_control.shaderDenormFlushToZeroFloat32 != VK_FALSE,
.support_fp16_signed_zero_nan_preserve =
float_control.shaderSignedZeroInfNanPreserveFloat16 != VK_FALSE,
.support_fp32_signed_zero_nan_preserve =
float_control.shaderSignedZeroInfNanPreserveFloat32 != VK_FALSE,
.support_fp64_signed_zero_nan_preserve =
float_control.shaderSignedZeroInfNanPreserveFloat64 != VK_FALSE,
.support_fp16_round_rte = float_control.shaderRoundingModeRTEFloat16 != VK_FALSE,
.support_fp32_round_rte = float_control.shaderRoundingModeRTEFloat32 != VK_FALSE,
.support_fp16_signed_zero_nan_preserve = float_control.shaderSignedZeroInfNanPreserveFloat16 != VK_FALSE,
.support_fp32_signed_zero_nan_preserve = float_control.shaderSignedZeroInfNanPreserveFloat32 != VK_FALSE,
.support_fp64_signed_zero_nan_preserve = float_control.shaderSignedZeroInfNanPreserveFloat64 != VK_FALSE,
.support_explicit_workgroup_layout = device.IsKhrWorkgroupMemoryExplicitLayoutSupported(),
.support_vote = device.IsSubgroupFeatureSupported(VK_SUBGROUP_FEATURE_VOTE_BIT),
.support_viewport_index_layer_non_geometry =
@ -357,6 +356,7 @@ PipelineCache::PipelineCache(Tegra::MaxwellDeviceMemoryManager& device_memory_,
.support_multi_viewport = device.SupportsMultiViewport(),
.support_geometry_streams = device.AreTransformFeedbackGeometryStreamsSupported(),
.uses_ftz_as_default = driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY,
.warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyBiggerThanGuest(),
.lower_left_origin_mode = false,

Loading…
Cancel
Save