diff --git a/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp b/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp index 2bf7f4de13..579b6ceeeb 100644 --- a/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp @@ -380,13 +380,14 @@ void EmitContext::SetupExtensions() { if (info.uses_int64 && profile.support_int64) { header += "#extension GL_ARB_gpu_shader_int64 : enable\n"; } - if (info.uses_int64_bit_atomics) { + if (info.uses_int64_bit_atomics && profile.support_gl_shader_atomic_int64) { header += "#extension GL_NV_shader_atomic_int64 : enable\n"; } - if (info.uses_atomic_f32_add) { + if (info.uses_atomic_f32_add && profile.support_gl_shader_atomic_float) { header += "#extension GL_NV_shader_atomic_float : enable\n"; } - if (info.uses_atomic_f16x2_add || info.uses_atomic_f16x2_min || info.uses_atomic_f16x2_max) { + if ((info.uses_atomic_f16x2_add || info.uses_atomic_f16x2_min || info.uses_atomic_f16x2_max) && + profile.support_gl_shader_atomic_fp16_vector) { header += "#extension GL_NV_shader_atomic_fp16_vector : enable\n"; } if (info.uses_fp16) { diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index 90e46bb1ba..c6851959f0 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -38,6 +38,9 @@ struct Profile { bool support_gl_nv_gpu_shader_5{}; bool support_gl_amd_gpu_shader_half_float{}; bool support_gl_texture_shadow_lod{}; + bool support_gl_shader_atomic_float{}; + bool support_gl_shader_atomic_fp16_vector{}; + bool support_gl_shader_atomic_int64{}; bool support_gl_warp_intrinsics{}; bool support_gl_variable_aoffi{}; bool support_gl_sparse_textures{}; diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index f5bf995d00..131808c25a 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -225,6 +225,9 @@ Device::Device(Core::Frontend::EmuWindow& emu_window) { has_amd_shader_half_float = GLAD_GL_AMD_gpu_shader_half_float; has_sparse_texture_2 = GLAD_GL_ARB_sparse_texture2; has_draw_texture = GLAD_GL_NV_draw_texture; + has_shader_atomic_float = GLAD_GL_NV_shader_atomic_float; + has_shader_atomic_fp16_vector = GLAD_GL_NV_shader_atomic_fp16_vector; + has_shader_atomic_int64 = GLAD_GL_NV_shader_atomic_int64; warp_size_potentially_larger_than_guest = !is_nvidia && !is_intel; need_fastmath_off = is_nvidia; can_report_memory = GLAD_GL_NVX_gpu_memory_info; diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index a5a6bbbba7..a25daba8eb 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -152,6 +152,18 @@ public: return has_draw_texture; } + bool HasShaderAtomicFloat() const { + return has_shader_atomic_float; + } + + bool HasShaderAtomicFp16Vector() const { + return has_shader_atomic_fp16_vector; + } + + bool HasShaderAtomicInt64() const { + return has_shader_atomic_int64; + } + bool IsWarpSizePotentiallyLargerThanGuest() const { return warp_size_potentially_larger_than_guest; } @@ -235,6 +247,9 @@ private: bool has_amd_shader_half_float{}; bool has_sparse_texture_2{}; bool has_draw_texture{}; + bool has_shader_atomic_float{}; + bool has_shader_atomic_fp16_vector{}; + bool has_shader_atomic_int64{}; bool warp_size_potentially_larger_than_guest{}; bool need_fastmath_off{}; bool has_cbuf_ftou_bug{}; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 45f729698e..881c906b79 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -215,6 +215,9 @@ ShaderCache::ShaderCache(Tegra::MaxwellDeviceMemoryManager& device_memory_, .support_gl_nv_gpu_shader_5 = device.HasNvGpuShader5(), .support_gl_amd_gpu_shader_half_float = device.HasAmdShaderHalfFloat(), .support_gl_texture_shadow_lod = device.HasTextureShadowLod(), + .support_gl_shader_atomic_float = device.HasShaderAtomicFloat(), + .support_gl_shader_atomic_fp16_vector = device.HasShaderAtomicFp16Vector(), + .support_gl_shader_atomic_int64 = device.HasShaderAtomicInt64(), .support_gl_warp_intrinsics = false, .support_gl_variable_aoffi = device.HasVariableAoffi(), .support_gl_sparse_textures = device.HasSparseTexture2(), diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 5d59fdbc4d..44cd114bd3 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -1304,6 +1304,43 @@ void Device::RemoveUnsuitableExtensions() { VK_EXT_PROVOKING_VERTEX_EXTENSION_NAME); } + // VK_KHR_shader_float16_int8 + const bool float16_int8_requested = extensions.shader_float16_int8; + const bool float16_int8_usable = + features.shader_float16_int8.shaderFloat16 || features.shader_float16_int8.shaderInt8; + if (float16_int8_requested && !float16_int8_usable) { + LOG_WARNING(Render_Vulkan, + "Disabling VK_KHR_shader_float16_int8 — no shaderFloat16/shaderInt8 features reported"); + } + extensions.shader_float16_int8 = float16_int8_requested && float16_int8_usable; + RemoveExtensionFeatureIfUnsuitable(float16_int8_usable, features.shader_float16_int8, + VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME); + + // VK_EXT_shader_atomic_float + const bool atomic_float_requested = extensions.shader_atomic_float; + const auto& atomic_float_features = features.shader_atomic_float; + const bool supports_buffer_f32 = atomic_float_features.shaderBufferFloat32Atomics || + atomic_float_features.shaderBufferFloat32AtomicAdd; + const bool supports_shared_f32 = atomic_float_features.shaderSharedFloat32Atomics || + atomic_float_features.shaderSharedFloat32AtomicAdd; + const bool supports_image_f32 = atomic_float_features.shaderImageFloat32Atomics || + atomic_float_features.shaderImageFloat32AtomicAdd; + const bool supports_sparse_f32 = atomic_float_features.sparseImageFloat32Atomics || + atomic_float_features.sparseImageFloat32AtomicAdd; + const bool supports_buffer_f64 = atomic_float_features.shaderBufferFloat64Atomics || + atomic_float_features.shaderBufferFloat64AtomicAdd; + const bool supports_shared_f64 = atomic_float_features.shaderSharedFloat64Atomics || + atomic_float_features.shaderSharedFloat64AtomicAdd; + const bool atomic_float_usable = supports_buffer_f32 || supports_shared_f32 || supports_image_f32 || + supports_sparse_f32 || supports_buffer_f64 || supports_shared_f64; + if (atomic_float_requested && !atomic_float_usable) { + LOG_WARNING(Render_Vulkan, + "Disabling VK_EXT_shader_atomic_float — no usable atomic float feature bits reported"); + } + extensions.shader_atomic_float = atomic_float_requested && atomic_float_usable; + RemoveExtensionFeatureIfUnsuitable(atomic_float_usable, features.shader_atomic_float, + VK_EXT_SHADER_ATOMIC_FLOAT_EXTENSION_NAME); + // VK_KHR_shader_atomic_int64 extensions.shader_atomic_int64 = features.shader_atomic_int64.shaderBufferInt64Atomics && features.shader_atomic_int64.shaderSharedInt64Atomics; diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index 71b431c9fc..a6f7d5fb07 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -49,6 +49,7 @@ VK_DEFINE_HANDLE(VmaAllocator) FEATURE(EXT, ExtendedDynamicState, EXTENDED_DYNAMIC_STATE, extended_dynamic_state) \ FEATURE(EXT, ExtendedDynamicState2, EXTENDED_DYNAMIC_STATE_2, extended_dynamic_state2) \ FEATURE(EXT, ExtendedDynamicState3, EXTENDED_DYNAMIC_STATE_3, extended_dynamic_state3) \ + FEATURE(EXT, ShaderAtomicFloat, SHADER_ATOMIC_FLOAT, shader_atomic_float) \ FEATURE(EXT, 4444Formats, 4444_FORMATS, format_a4b4g4r4) \ FEATURE(EXT, IndexTypeUint8, INDEX_TYPE_UINT8, index_type_uint8) \ FEATURE(EXT, LineRasterization, LINE_RASTERIZATION, line_rasterization) \ @@ -611,6 +612,11 @@ public: return extensions.shader_atomic_int64; } + /// Returns true if the device supports VK_EXT_shader_atomic_float. + bool IsExtShaderAtomicFloatSupported() const { + return extensions.shader_atomic_float; + } + bool IsExtConditionalRendering() const { return extensions.conditional_rendering; }