Browse Source

[gl. vk] Extending impl for atomic floats operations

test-revert-gpu-optim
CamilleLaVey 1 month ago
committed by lizzie
parent
commit
5f88deeebf
  1. 7
      src/shader_recompiler/backend/glsl/glsl_emit_context.cpp
  2. 3
      src/shader_recompiler/profile.h
  3. 3
      src/video_core/renderer_opengl/gl_device.cpp
  4. 15
      src/video_core/renderer_opengl/gl_device.h
  5. 3
      src/video_core/renderer_opengl/gl_shader_cache.cpp
  6. 37
      src/video_core/vulkan_common/vulkan_device.cpp
  7. 6
      src/video_core/vulkan_common/vulkan_device.h

7
src/shader_recompiler/backend/glsl/glsl_emit_context.cpp

@ -380,13 +380,14 @@ void EmitContext::SetupExtensions() {
if (info.uses_int64 && profile.support_int64) {
header += "#extension GL_ARB_gpu_shader_int64 : enable\n";
}
if (info.uses_int64_bit_atomics) {
if (info.uses_int64_bit_atomics && profile.support_gl_shader_atomic_int64) {
header += "#extension GL_NV_shader_atomic_int64 : enable\n";
}
if (info.uses_atomic_f32_add) {
if (info.uses_atomic_f32_add && profile.support_gl_shader_atomic_float) {
header += "#extension GL_NV_shader_atomic_float : enable\n";
}
if (info.uses_atomic_f16x2_add || info.uses_atomic_f16x2_min || info.uses_atomic_f16x2_max) {
if ((info.uses_atomic_f16x2_add || info.uses_atomic_f16x2_min || info.uses_atomic_f16x2_max) &&
profile.support_gl_shader_atomic_fp16_vector) {
header += "#extension GL_NV_shader_atomic_fp16_vector : enable\n";
}
if (info.uses_fp16) {

3
src/shader_recompiler/profile.h

@ -38,6 +38,9 @@ struct Profile {
bool support_gl_nv_gpu_shader_5{};
bool support_gl_amd_gpu_shader_half_float{};
bool support_gl_texture_shadow_lod{};
bool support_gl_shader_atomic_float{};
bool support_gl_shader_atomic_fp16_vector{};
bool support_gl_shader_atomic_int64{};
bool support_gl_warp_intrinsics{};
bool support_gl_variable_aoffi{};
bool support_gl_sparse_textures{};

3
src/video_core/renderer_opengl/gl_device.cpp

@ -225,6 +225,9 @@ Device::Device(Core::Frontend::EmuWindow& emu_window) {
has_amd_shader_half_float = GLAD_GL_AMD_gpu_shader_half_float;
has_sparse_texture_2 = GLAD_GL_ARB_sparse_texture2;
has_draw_texture = GLAD_GL_NV_draw_texture;
has_shader_atomic_float = GLAD_GL_NV_shader_atomic_float;
has_shader_atomic_fp16_vector = GLAD_GL_NV_shader_atomic_fp16_vector;
has_shader_atomic_int64 = GLAD_GL_NV_shader_atomic_int64;
warp_size_potentially_larger_than_guest = !is_nvidia && !is_intel;
need_fastmath_off = is_nvidia;
can_report_memory = GLAD_GL_NVX_gpu_memory_info;

15
src/video_core/renderer_opengl/gl_device.h

@ -152,6 +152,18 @@ public:
return has_draw_texture;
}
bool HasShaderAtomicFloat() const {
return has_shader_atomic_float;
}
bool HasShaderAtomicFp16Vector() const {
return has_shader_atomic_fp16_vector;
}
bool HasShaderAtomicInt64() const {
return has_shader_atomic_int64;
}
bool IsWarpSizePotentiallyLargerThanGuest() const {
return warp_size_potentially_larger_than_guest;
}
@ -235,6 +247,9 @@ private:
bool has_amd_shader_half_float{};
bool has_sparse_texture_2{};
bool has_draw_texture{};
bool has_shader_atomic_float{};
bool has_shader_atomic_fp16_vector{};
bool has_shader_atomic_int64{};
bool warp_size_potentially_larger_than_guest{};
bool need_fastmath_off{};
bool has_cbuf_ftou_bug{};

3
src/video_core/renderer_opengl/gl_shader_cache.cpp

@ -215,6 +215,9 @@ ShaderCache::ShaderCache(Tegra::MaxwellDeviceMemoryManager& device_memory_,
.support_gl_nv_gpu_shader_5 = device.HasNvGpuShader5(),
.support_gl_amd_gpu_shader_half_float = device.HasAmdShaderHalfFloat(),
.support_gl_texture_shadow_lod = device.HasTextureShadowLod(),
.support_gl_shader_atomic_float = device.HasShaderAtomicFloat(),
.support_gl_shader_atomic_fp16_vector = device.HasShaderAtomicFp16Vector(),
.support_gl_shader_atomic_int64 = device.HasShaderAtomicInt64(),
.support_gl_warp_intrinsics = false,
.support_gl_variable_aoffi = device.HasVariableAoffi(),
.support_gl_sparse_textures = device.HasSparseTexture2(),

37
src/video_core/vulkan_common/vulkan_device.cpp

@ -1304,6 +1304,43 @@ void Device::RemoveUnsuitableExtensions() {
VK_EXT_PROVOKING_VERTEX_EXTENSION_NAME);
}
// VK_KHR_shader_float16_int8
const bool float16_int8_requested = extensions.shader_float16_int8;
const bool float16_int8_usable =
features.shader_float16_int8.shaderFloat16 || features.shader_float16_int8.shaderInt8;
if (float16_int8_requested && !float16_int8_usable) {
LOG_WARNING(Render_Vulkan,
"Disabling VK_KHR_shader_float16_int8 — no shaderFloat16/shaderInt8 features reported");
}
extensions.shader_float16_int8 = float16_int8_requested && float16_int8_usable;
RemoveExtensionFeatureIfUnsuitable(float16_int8_usable, features.shader_float16_int8,
VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME);
// VK_EXT_shader_atomic_float
const bool atomic_float_requested = extensions.shader_atomic_float;
const auto& atomic_float_features = features.shader_atomic_float;
const bool supports_buffer_f32 = atomic_float_features.shaderBufferFloat32Atomics ||
atomic_float_features.shaderBufferFloat32AtomicAdd;
const bool supports_shared_f32 = atomic_float_features.shaderSharedFloat32Atomics ||
atomic_float_features.shaderSharedFloat32AtomicAdd;
const bool supports_image_f32 = atomic_float_features.shaderImageFloat32Atomics ||
atomic_float_features.shaderImageFloat32AtomicAdd;
const bool supports_sparse_f32 = atomic_float_features.sparseImageFloat32Atomics ||
atomic_float_features.sparseImageFloat32AtomicAdd;
const bool supports_buffer_f64 = atomic_float_features.shaderBufferFloat64Atomics ||
atomic_float_features.shaderBufferFloat64AtomicAdd;
const bool supports_shared_f64 = atomic_float_features.shaderSharedFloat64Atomics ||
atomic_float_features.shaderSharedFloat64AtomicAdd;
const bool atomic_float_usable = supports_buffer_f32 || supports_shared_f32 || supports_image_f32 ||
supports_sparse_f32 || supports_buffer_f64 || supports_shared_f64;
if (atomic_float_requested && !atomic_float_usable) {
LOG_WARNING(Render_Vulkan,
"Disabling VK_EXT_shader_atomic_float — no usable atomic float feature bits reported");
}
extensions.shader_atomic_float = atomic_float_requested && atomic_float_usable;
RemoveExtensionFeatureIfUnsuitable(atomic_float_usable, features.shader_atomic_float,
VK_EXT_SHADER_ATOMIC_FLOAT_EXTENSION_NAME);
// VK_KHR_shader_atomic_int64
extensions.shader_atomic_int64 = features.shader_atomic_int64.shaderBufferInt64Atomics &&
features.shader_atomic_int64.shaderSharedInt64Atomics;

6
src/video_core/vulkan_common/vulkan_device.h

@ -49,6 +49,7 @@ VK_DEFINE_HANDLE(VmaAllocator)
FEATURE(EXT, ExtendedDynamicState, EXTENDED_DYNAMIC_STATE, extended_dynamic_state) \
FEATURE(EXT, ExtendedDynamicState2, EXTENDED_DYNAMIC_STATE_2, extended_dynamic_state2) \
FEATURE(EXT, ExtendedDynamicState3, EXTENDED_DYNAMIC_STATE_3, extended_dynamic_state3) \
FEATURE(EXT, ShaderAtomicFloat, SHADER_ATOMIC_FLOAT, shader_atomic_float) \
FEATURE(EXT, 4444Formats, 4444_FORMATS, format_a4b4g4r4) \
FEATURE(EXT, IndexTypeUint8, INDEX_TYPE_UINT8, index_type_uint8) \
FEATURE(EXT, LineRasterization, LINE_RASTERIZATION, line_rasterization) \
@ -611,6 +612,11 @@ public:
return extensions.shader_atomic_int64;
}
/// Returns true if the device supports VK_EXT_shader_atomic_float.
bool IsExtShaderAtomicFloatSupported() const {
return extensions.shader_atomic_float;
}
bool IsExtConditionalRendering() const {
return extensions.conditional_rendering;
}

Loading…
Cancel
Save