diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp index df05dad74a..b3cc896dd4 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + // SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later @@ -12,10 +15,17 @@ Id Pointer(EmitContext& ctx, Id pointer_type, Id array, Id offset, u32 shift) { return ctx.OpAccessChain(pointer_type, array, ctx.u32_zero_value, index); } +Id WorkgroupWordPointer(EmitContext& ctx, Id index) { + if (ctx.profile.support_explicit_workgroup_layout) { + return ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, ctx.u32_zero_value, index); + } + return ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index); +} + Id Word(EmitContext& ctx, Id offset) { const Id shift_id{ctx.Const(2U)}; const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)}; - const Id pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index)}; + const Id pointer{WorkgroupWordPointer(ctx, index)}; return ctx.OpLoad(ctx.U32[1], pointer); } @@ -28,7 +38,9 @@ std::pair ExtractArgs(EmitContext& ctx, Id offset, u32 mask, u32 count) } // Anonymous namespace Id EmitLoadSharedU8(EmitContext& ctx, Id offset) { - if (ctx.profile.support_explicit_workgroup_layout) { + const bool use_explicit = ctx.profile.support_explicit_workgroup_layout && + ctx.profile.support_explicit_workgroup_layout_u8; + if (use_explicit) { const Id pointer{ ctx.OpAccessChain(ctx.shared_u8, ctx.shared_memory_u8, ctx.u32_zero_value, offset)}; return ctx.OpUConvert(ctx.U32[1], ctx.OpLoad(ctx.U8, pointer)); @@ -39,7 +51,9 @@ Id EmitLoadSharedU8(EmitContext& ctx, Id offset) { } Id EmitLoadSharedS8(EmitContext& ctx, Id offset) { - if (ctx.profile.support_explicit_workgroup_layout) { + const bool use_explicit = ctx.profile.support_explicit_workgroup_layout && + ctx.profile.support_explicit_workgroup_layout_u8; + if (use_explicit) { const Id pointer{ ctx.OpAccessChain(ctx.shared_u8, ctx.shared_memory_u8, ctx.u32_zero_value, offset)}; return ctx.OpSConvert(ctx.U32[1], ctx.OpLoad(ctx.U8, pointer)); @@ -50,7 +64,9 @@ Id EmitLoadSharedS8(EmitContext& ctx, Id offset) { } Id EmitLoadSharedU16(EmitContext& ctx, Id offset) { - if (ctx.profile.support_explicit_workgroup_layout) { + const bool use_explicit = ctx.profile.support_explicit_workgroup_layout && + ctx.profile.support_explicit_workgroup_layout_u16; + if (use_explicit) { const Id pointer{Pointer(ctx, ctx.shared_u16, ctx.shared_memory_u16, offset, 1)}; return ctx.OpUConvert(ctx.U32[1], ctx.OpLoad(ctx.U16, pointer)); } else { @@ -60,7 +76,9 @@ Id EmitLoadSharedU16(EmitContext& ctx, Id offset) { } Id EmitLoadSharedS16(EmitContext& ctx, Id offset) { - if (ctx.profile.support_explicit_workgroup_layout) { + const bool use_explicit = ctx.profile.support_explicit_workgroup_layout && + ctx.profile.support_explicit_workgroup_layout_u16; + if (use_explicit) { const Id pointer{Pointer(ctx, ctx.shared_u16, ctx.shared_memory_u16, offset, 1)}; return ctx.OpSConvert(ctx.U32[1], ctx.OpLoad(ctx.U16, pointer)); } else { @@ -86,8 +104,8 @@ Id EmitLoadSharedU64(EmitContext& ctx, Id offset) { const Id shift_id{ctx.Const(2U)}; const Id base_index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)}; const Id next_index{ctx.OpIAdd(ctx.U32[1], base_index, ctx.Const(1U))}; - const Id lhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, base_index)}; - const Id rhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, next_index)}; + const Id lhs_pointer{WorkgroupWordPointer(ctx, base_index)}; + const Id rhs_pointer{WorkgroupWordPointer(ctx, next_index)}; return ctx.OpCompositeConstruct(ctx.U32[2], ctx.OpLoad(ctx.U32[1], lhs_pointer), ctx.OpLoad(ctx.U32[1], rhs_pointer)); } @@ -103,14 +121,16 @@ Id EmitLoadSharedU128(EmitContext& ctx, Id offset) { std::array values{}; for (u32 i = 0; i < 4; ++i) { const Id index{i == 0 ? base_index : ctx.OpIAdd(ctx.U32[1], base_index, ctx.Const(i))}; - const Id pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index)}; + const Id pointer{WorkgroupWordPointer(ctx, index)}; values[i] = ctx.OpLoad(ctx.U32[1], pointer); } return ctx.OpCompositeConstruct(ctx.U32[4], values); } void EmitWriteSharedU8(EmitContext& ctx, Id offset, Id value) { - if (ctx.profile.support_explicit_workgroup_layout) { + const bool use_explicit = ctx.profile.support_explicit_workgroup_layout && + ctx.profile.support_explicit_workgroup_layout_u8; + if (use_explicit) { const Id pointer{ ctx.OpAccessChain(ctx.shared_u8, ctx.shared_memory_u8, ctx.u32_zero_value, offset)}; ctx.OpStore(pointer, ctx.OpUConvert(ctx.U8, value)); @@ -120,7 +140,9 @@ void EmitWriteSharedU8(EmitContext& ctx, Id offset, Id value) { } void EmitWriteSharedU16(EmitContext& ctx, Id offset, Id value) { - if (ctx.profile.support_explicit_workgroup_layout) { + const bool use_explicit = ctx.profile.support_explicit_workgroup_layout && + ctx.profile.support_explicit_workgroup_layout_u16; + if (use_explicit) { const Id pointer{Pointer(ctx, ctx.shared_u16, ctx.shared_memory_u16, offset, 1)}; ctx.OpStore(pointer, ctx.OpUConvert(ctx.U16, value)); } else { @@ -135,7 +157,7 @@ void EmitWriteSharedU32(EmitContext& ctx, Id offset, Id value) { } else { const Id shift{ctx.Const(2U)}; const Id word_offset{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)}; - pointer = ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, word_offset); + pointer = WorkgroupWordPointer(ctx, word_offset); } ctx.OpStore(pointer, value); } @@ -149,8 +171,8 @@ void EmitWriteSharedU64(EmitContext& ctx, Id offset, Id value) { const Id shift{ctx.Const(2U)}; const Id word_offset{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)}; const Id next_offset{ctx.OpIAdd(ctx.U32[1], word_offset, ctx.Const(1U))}; - const Id lhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, word_offset)}; - const Id rhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, next_offset)}; + const Id lhs_pointer{WorkgroupWordPointer(ctx, word_offset)}; + const Id rhs_pointer{WorkgroupWordPointer(ctx, next_offset)}; ctx.OpStore(lhs_pointer, ctx.OpCompositeExtract(ctx.U32[1], value, 0U)); ctx.OpStore(rhs_pointer, ctx.OpCompositeExtract(ctx.U32[1], value, 1U)); } @@ -165,7 +187,7 @@ void EmitWriteSharedU128(EmitContext& ctx, Id offset, Id value) { const Id base_index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)}; for (u32 i = 0; i < 4; ++i) { const Id index{i == 0 ? base_index : ctx.OpIAdd(ctx.U32[1], base_index, ctx.Const(i))}; - const Id pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index)}; + const Id pointer{WorkgroupWordPointer(ctx, index)}; ctx.OpStore(pointer, ctx.OpCompositeExtract(ctx.U32[1], value, i)); } } diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index 8d4aff315a..598b0cbc58 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -635,14 +635,66 @@ void EmitContext::DefineSharedMemory(const IR::Program& program) { return std::make_tuple(variable, element_pointer, pointer); }}; + const auto define_bitfield_stores{[&](bool define_u8, bool define_u16) { + if (!define_u8 && !define_u16) { + return; + } + const Id func_type{TypeFunction(void_id, U32[1], U32[1])}; + const auto make_function{[&](u32 mask, u32 size) { + const Id loop_header{OpLabel()}; + const Id continue_block{OpLabel()}; + const Id merge_block{OpLabel()}; + + const Id func{OpFunction(void_id, spv::FunctionControlMask::MaskNone, func_type)}; + const Id offset{OpFunctionParameter(U32[1])}; + const Id insert_value{OpFunctionParameter(U32[1])}; + AddLabel(); + OpBranch(loop_header); + + AddLabel(loop_header); + const Id word_offset{OpShiftRightArithmetic(U32[1], offset, Const(2U))}; + const Id shift_offset{OpShiftLeftLogical(U32[1], offset, Const(3U))}; + const Id bit_offset{OpBitwiseAnd(U32[1], shift_offset, Const(mask))}; + const Id count{Const(size)}; + OpLoopMerge(merge_block, continue_block, spv::LoopControlMask::MaskNone); + OpBranch(continue_block); + + AddLabel(continue_block); + const Id word_pointer{profile.support_explicit_workgroup_layout + ? OpAccessChain(shared_u32, shared_memory_u32, + u32_zero_value, word_offset) + : OpAccessChain(shared_u32, shared_memory_u32, word_offset)}; + const Id old_value{OpLoad(U32[1], word_pointer)}; + const Id new_value{OpBitFieldInsert(U32[1], old_value, insert_value, bit_offset, + count)}; + const Id atomic_res{OpAtomicCompareExchange(U32[1], word_pointer, Const(1U), + u32_zero_value, u32_zero_value, new_value, + old_value)}; + const Id success{OpIEqual(U1, atomic_res, old_value)}; + OpBranchConditional(success, merge_block, loop_header); + + AddLabel(merge_block); + OpReturn(); + OpFunctionEnd(); + return func; + }}; + if (define_u8) { + shared_store_u8_func = make_function(24, 8); + } + if (define_u16) { + shared_store_u16_func = make_function(16, 16); + } + }}; + const bool uses_int8 = program.info.uses_int8; + const bool uses_int16 = program.info.uses_int16; if (profile.support_explicit_workgroup_layout) { AddExtension("SPV_KHR_workgroup_memory_explicit_layout"); AddCapability(spv::Capability::WorkgroupMemoryExplicitLayoutKHR); - if (program.info.uses_int8) { + if (uses_int8 && profile.support_explicit_workgroup_layout_u8) { AddCapability(spv::Capability::WorkgroupMemoryExplicitLayout8BitAccessKHR); std::tie(shared_memory_u8, shared_u8, std::ignore) = make(U8, 1); } - if (program.info.uses_int16) { + if (uses_int16 && profile.support_explicit_workgroup_layout_u16) { AddCapability(spv::Capability::WorkgroupMemoryExplicitLayout16BitAccessKHR); std::tie(shared_memory_u16, shared_u16, std::ignore) = make(U16, 2); } @@ -652,6 +704,9 @@ void EmitContext::DefineSharedMemory(const IR::Program& program) { std::tie(shared_memory_u32, shared_u32, shared_memory_u32_type) = make(U32[1], 4); std::tie(shared_memory_u32x2, shared_u32x2, std::ignore) = make(U32[2], 8); std::tie(shared_memory_u32x4, shared_u32x4, std::ignore) = make(U32[4], 16); + const bool need_u8_fallback = uses_int8 && !profile.support_explicit_workgroup_layout_u8; + const bool need_u16_fallback = uses_int16 && !profile.support_explicit_workgroup_layout_u16; + define_bitfield_stores(need_u8_fallback, need_u16_fallback); return; } const u32 num_elements{Common::DivCeil(program.shared_memory_size, 4U)}; @@ -661,47 +716,7 @@ void EmitContext::DefineSharedMemory(const IR::Program& program) { shared_u32 = TypePointer(spv::StorageClass::Workgroup, U32[1]); shared_memory_u32 = AddGlobalVariable(shared_memory_u32_type, spv::StorageClass::Workgroup); interfaces.push_back(shared_memory_u32); - - const Id func_type{TypeFunction(void_id, U32[1], U32[1])}; - const auto make_function{[&](u32 mask, u32 size) { - const Id loop_header{OpLabel()}; - const Id continue_block{OpLabel()}; - const Id merge_block{OpLabel()}; - - const Id func{OpFunction(void_id, spv::FunctionControlMask::MaskNone, func_type)}; - const Id offset{OpFunctionParameter(U32[1])}; - const Id insert_value{OpFunctionParameter(U32[1])}; - AddLabel(); - OpBranch(loop_header); - - AddLabel(loop_header); - const Id word_offset{OpShiftRightArithmetic(U32[1], offset, Const(2U))}; - const Id shift_offset{OpShiftLeftLogical(U32[1], offset, Const(3U))}; - const Id bit_offset{OpBitwiseAnd(U32[1], shift_offset, Const(mask))}; - const Id count{Const(size)}; - OpLoopMerge(merge_block, continue_block, spv::LoopControlMask::MaskNone); - OpBranch(continue_block); - - AddLabel(continue_block); - const Id word_pointer{OpAccessChain(shared_u32, shared_memory_u32, word_offset)}; - const Id old_value{OpLoad(U32[1], word_pointer)}; - const Id new_value{OpBitFieldInsert(U32[1], old_value, insert_value, bit_offset, count)}; - const Id atomic_res{OpAtomicCompareExchange(U32[1], word_pointer, Const(1U), u32_zero_value, - u32_zero_value, new_value, old_value)}; - const Id success{OpIEqual(U1, atomic_res, old_value)}; - OpBranchConditional(success, merge_block, loop_header); - - AddLabel(merge_block); - OpReturn(); - OpFunctionEnd(); - return func; - }}; - if (program.info.uses_int8) { - shared_store_u8_func = make_function(24, 8); - } - if (program.info.uses_int16) { - shared_store_u16_func = make_function(16, 16); - } + define_bitfield_stores(uses_int8, uses_int16); } void EmitContext::DefineSharedMemoryFunctions(const IR::Program& program) { diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index ea7f5cc76f..8a87374013 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -32,6 +32,8 @@ struct Profile { bool support_fp32_signed_zero_nan_preserve{}; bool support_fp64_signed_zero_nan_preserve{}; bool support_explicit_workgroup_layout{}; + bool support_explicit_workgroup_layout_u8{}; + bool support_explicit_workgroup_layout_u16{}; bool support_vote{}; bool support_viewport_index_layer_non_geometry{}; bool support_viewport_mask{}; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 1be7f8f641..a4384738ea 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -387,6 +387,10 @@ PipelineCache::PipelineCache(Tegra::MaxwellDeviceMemoryManager& device_memory_, .support_fp64_signed_zero_nan_preserve = float_controls_supported && float_control.shaderSignedZeroInfNanPreserveFloat64 != VK_FALSE, .support_explicit_workgroup_layout = device.IsKhrWorkgroupMemoryExplicitLayoutSupported(), + .support_explicit_workgroup_layout_u8 = + device.SupportsWorkgroupExplicitLayout8Bit(), + .support_explicit_workgroup_layout_u16 = + device.SupportsWorkgroupExplicitLayout16Bit(), .support_vote = device.IsSubgroupFeatureSupported(VK_SUBGROUP_FEATURE_VOTE_BIT), .support_viewport_index_layer_non_geometry = device.IsExtShaderViewportIndexLayerSupported(), diff --git a/src/video_core/vulkan_common/vulkan.h b/src/video_core/vulkan_common/vulkan.h index 8d2e8e2a37..ccdec45b6f 100644 --- a/src/video_core/vulkan_common/vulkan.h +++ b/src/video_core/vulkan_common/vulkan.h @@ -29,9 +29,6 @@ #ifndef VK_KHR_MAINTENANCE_8_EXTENSION_NAME #define VK_KHR_MAINTENANCE_8_EXTENSION_NAME "VK_KHR_maintenance8" #endif -#ifndef VK_KHR_MAINTENANCE_9_EXTENSION_NAME -#define VK_KHR_MAINTENANCE_9_EXTENSION_NAME "VK_KHR_maintenance9" -#endif // Sanitize macros #undef CreateEvent diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 4d33d07fc8..7c840aa115 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -1530,12 +1530,22 @@ void Device::RemoveUnsuitableExtensions() { } // VK_KHR_workgroup_memory_explicit_layout - extensions.workgroup_memory_explicit_layout = - features.features.shaderInt16 && + workgroup_memory_explicit_layout_caps.supports_8bit = + features.workgroup_memory_explicit_layout.workgroupMemoryExplicitLayout8BitAccess; + workgroup_memory_explicit_layout_caps.supports_16bit = + features.workgroup_memory_explicit_layout.workgroupMemoryExplicitLayout16BitAccess; + const bool has_workgroup_base = features.workgroup_memory_explicit_layout.workgroupMemoryExplicitLayout && - features.workgroup_memory_explicit_layout.workgroupMemoryExplicitLayout8BitAccess && - features.workgroup_memory_explicit_layout.workgroupMemoryExplicitLayout16BitAccess && features.workgroup_memory_explicit_layout.workgroupMemoryExplicitLayoutScalarBlockLayout; + if (is_qualcomm) { + extensions.workgroup_memory_explicit_layout = + features.features.shaderInt16 && has_workgroup_base; + } else { + extensions.workgroup_memory_explicit_layout = + features.features.shaderInt16 && has_workgroup_base && + workgroup_memory_explicit_layout_caps.supports_8bit && + workgroup_memory_explicit_layout_caps.supports_16bit; + } RemoveExtensionFeatureIfUnsuitable(extensions.workgroup_memory_explicit_layout, features.workgroup_memory_explicit_layout, VK_KHR_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_EXTENSION_NAME); @@ -1606,9 +1616,6 @@ void Device::RemoveUnsuitableExtensions() { extensions.maintenance8 = loaded_extensions.contains(VK_KHR_MAINTENANCE_8_EXTENSION_NAME); RemoveExtensionIfUnsuitable(extensions.maintenance8, VK_KHR_MAINTENANCE_8_EXTENSION_NAME); - // VK_KHR_maintenance9 (proposed for Vulkan 1.4, no features) - extensions.maintenance9 = loaded_extensions.contains(VK_KHR_MAINTENANCE_9_EXTENSION_NAME); - RemoveExtensionIfUnsuitable(extensions.maintenance9, VK_KHR_MAINTENANCE_9_EXTENSION_NAME); } bool Device::SupportsSubgroupStage(VkShaderStageFlags stage_mask) const { diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index 06666a4190..a9cd7f4c34 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -100,7 +100,6 @@ VK_DEFINE_HANDLE(VmaAllocator) EXTENSION(KHR, MAINTENANCE_3, maintenance3) \ EXTENSION(KHR, MAINTENANCE_7, maintenance7) \ EXTENSION(KHR, MAINTENANCE_8, maintenance8) \ - EXTENSION(KHR, MAINTENANCE_9, maintenance9) \ EXTENSION(NV, DEVICE_DIAGNOSTICS_CONFIG, device_diagnostics_config) \ EXTENSION(NV, GEOMETRY_SHADER_PASSTHROUGH, geometry_shader_passthrough) \ EXTENSION(NV, VIEWPORT_ARRAY2, viewport_array2) \ @@ -511,6 +510,16 @@ public: return extensions.workgroup_memory_explicit_layout; } + /// Returns true if explicit workgroup layout supports 8-bit access. + bool SupportsWorkgroupExplicitLayout8Bit() const { + return workgroup_memory_explicit_layout_caps.supports_8bit; + } + + /// Returns true if explicit workgroup layout supports 16-bit access. + bool SupportsWorkgroupExplicitLayout16Bit() const { + return workgroup_memory_explicit_layout_caps.supports_16bit; + } + /// Returns true if the device supports VK_KHR_image_format_list. bool IsKhrImageFormatListSupported() const { return extensions.image_format_list || instance_version >= VK_API_VERSION_1_2; @@ -948,11 +957,6 @@ public: return extensions.maintenance8; } - /// Returns true if the device supports VK_KHR_maintenance9. - bool IsKhrMaintenance9Supported() const { - return extensions.maintenance9; - } - [[nodiscard]] static constexpr bool CheckBrokenCompute(VkDriverId driver_id, u32 driver_version) { if (driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS) { @@ -1080,6 +1084,13 @@ private: Features features{}; Properties properties{}; + struct WorkgroupExplicitLayoutCaps { + bool supports_8bit{}; + bool supports_16bit{}; + }; + + WorkgroupExplicitLayoutCaps workgroup_memory_explicit_layout_caps{}; + VkPhysicalDeviceFeatures2 features2{}; VkPhysicalDeviceProperties2 properties2{};