diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp index b3cc896dd4..df05dad74a 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp @@ -1,6 +1,3 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project -// SPDX-License-Identifier: GPL-3.0-or-later - // SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later @@ -15,17 +12,10 @@ Id Pointer(EmitContext& ctx, Id pointer_type, Id array, Id offset, u32 shift) { return ctx.OpAccessChain(pointer_type, array, ctx.u32_zero_value, index); } -Id WorkgroupWordPointer(EmitContext& ctx, Id index) { - if (ctx.profile.support_explicit_workgroup_layout) { - return ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, ctx.u32_zero_value, index); - } - return ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index); -} - Id Word(EmitContext& ctx, Id offset) { const Id shift_id{ctx.Const(2U)}; const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)}; - const Id pointer{WorkgroupWordPointer(ctx, index)}; + const Id pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index)}; return ctx.OpLoad(ctx.U32[1], pointer); } @@ -38,9 +28,7 @@ std::pair ExtractArgs(EmitContext& ctx, Id offset, u32 mask, u32 count) } // Anonymous namespace Id EmitLoadSharedU8(EmitContext& ctx, Id offset) { - const bool use_explicit = ctx.profile.support_explicit_workgroup_layout && - ctx.profile.support_explicit_workgroup_layout_u8; - if (use_explicit) { + if (ctx.profile.support_explicit_workgroup_layout) { const Id pointer{ ctx.OpAccessChain(ctx.shared_u8, ctx.shared_memory_u8, ctx.u32_zero_value, offset)}; return ctx.OpUConvert(ctx.U32[1], ctx.OpLoad(ctx.U8, pointer)); @@ -51,9 +39,7 @@ Id EmitLoadSharedU8(EmitContext& ctx, Id offset) { } Id EmitLoadSharedS8(EmitContext& ctx, Id offset) { - const bool use_explicit = ctx.profile.support_explicit_workgroup_layout && - ctx.profile.support_explicit_workgroup_layout_u8; - if (use_explicit) { + if (ctx.profile.support_explicit_workgroup_layout) { const Id pointer{ ctx.OpAccessChain(ctx.shared_u8, ctx.shared_memory_u8, ctx.u32_zero_value, offset)}; return ctx.OpSConvert(ctx.U32[1], ctx.OpLoad(ctx.U8, pointer)); @@ -64,9 +50,7 @@ Id EmitLoadSharedS8(EmitContext& ctx, Id offset) { } Id EmitLoadSharedU16(EmitContext& ctx, Id offset) { - const bool use_explicit = ctx.profile.support_explicit_workgroup_layout && - ctx.profile.support_explicit_workgroup_layout_u16; - if (use_explicit) { + if (ctx.profile.support_explicit_workgroup_layout) { const Id pointer{Pointer(ctx, ctx.shared_u16, ctx.shared_memory_u16, offset, 1)}; return ctx.OpUConvert(ctx.U32[1], ctx.OpLoad(ctx.U16, pointer)); } else { @@ -76,9 +60,7 @@ Id EmitLoadSharedU16(EmitContext& ctx, Id offset) { } Id EmitLoadSharedS16(EmitContext& ctx, Id offset) { - const bool use_explicit = ctx.profile.support_explicit_workgroup_layout && - ctx.profile.support_explicit_workgroup_layout_u16; - if (use_explicit) { + if (ctx.profile.support_explicit_workgroup_layout) { const Id pointer{Pointer(ctx, ctx.shared_u16, ctx.shared_memory_u16, offset, 1)}; return ctx.OpSConvert(ctx.U32[1], ctx.OpLoad(ctx.U16, pointer)); } else { @@ -104,8 +86,8 @@ Id EmitLoadSharedU64(EmitContext& ctx, Id offset) { const Id shift_id{ctx.Const(2U)}; const Id base_index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)}; const Id next_index{ctx.OpIAdd(ctx.U32[1], base_index, ctx.Const(1U))}; - const Id lhs_pointer{WorkgroupWordPointer(ctx, base_index)}; - const Id rhs_pointer{WorkgroupWordPointer(ctx, next_index)}; + const Id lhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, base_index)}; + const Id rhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, next_index)}; return ctx.OpCompositeConstruct(ctx.U32[2], ctx.OpLoad(ctx.U32[1], lhs_pointer), ctx.OpLoad(ctx.U32[1], rhs_pointer)); } @@ -121,16 +103,14 @@ Id EmitLoadSharedU128(EmitContext& ctx, Id offset) { std::array values{}; for (u32 i = 0; i < 4; ++i) { const Id index{i == 0 ? base_index : ctx.OpIAdd(ctx.U32[1], base_index, ctx.Const(i))}; - const Id pointer{WorkgroupWordPointer(ctx, index)}; + const Id pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index)}; values[i] = ctx.OpLoad(ctx.U32[1], pointer); } return ctx.OpCompositeConstruct(ctx.U32[4], values); } void EmitWriteSharedU8(EmitContext& ctx, Id offset, Id value) { - const bool use_explicit = ctx.profile.support_explicit_workgroup_layout && - ctx.profile.support_explicit_workgroup_layout_u8; - if (use_explicit) { + if (ctx.profile.support_explicit_workgroup_layout) { const Id pointer{ ctx.OpAccessChain(ctx.shared_u8, ctx.shared_memory_u8, ctx.u32_zero_value, offset)}; ctx.OpStore(pointer, ctx.OpUConvert(ctx.U8, value)); @@ -140,9 +120,7 @@ void EmitWriteSharedU8(EmitContext& ctx, Id offset, Id value) { } void EmitWriteSharedU16(EmitContext& ctx, Id offset, Id value) { - const bool use_explicit = ctx.profile.support_explicit_workgroup_layout && - ctx.profile.support_explicit_workgroup_layout_u16; - if (use_explicit) { + if (ctx.profile.support_explicit_workgroup_layout) { const Id pointer{Pointer(ctx, ctx.shared_u16, ctx.shared_memory_u16, offset, 1)}; ctx.OpStore(pointer, ctx.OpUConvert(ctx.U16, value)); } else { @@ -157,7 +135,7 @@ void EmitWriteSharedU32(EmitContext& ctx, Id offset, Id value) { } else { const Id shift{ctx.Const(2U)}; const Id word_offset{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)}; - pointer = WorkgroupWordPointer(ctx, word_offset); + pointer = ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, word_offset); } ctx.OpStore(pointer, value); } @@ -171,8 +149,8 @@ void EmitWriteSharedU64(EmitContext& ctx, Id offset, Id value) { const Id shift{ctx.Const(2U)}; const Id word_offset{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)}; const Id next_offset{ctx.OpIAdd(ctx.U32[1], word_offset, ctx.Const(1U))}; - const Id lhs_pointer{WorkgroupWordPointer(ctx, word_offset)}; - const Id rhs_pointer{WorkgroupWordPointer(ctx, next_offset)}; + const Id lhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, word_offset)}; + const Id rhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, next_offset)}; ctx.OpStore(lhs_pointer, ctx.OpCompositeExtract(ctx.U32[1], value, 0U)); ctx.OpStore(rhs_pointer, ctx.OpCompositeExtract(ctx.U32[1], value, 1U)); } @@ -187,7 +165,7 @@ void EmitWriteSharedU128(EmitContext& ctx, Id offset, Id value) { const Id base_index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)}; for (u32 i = 0; i < 4; ++i) { const Id index{i == 0 ? base_index : ctx.OpIAdd(ctx.U32[1], base_index, ctx.Const(i))}; - const Id pointer{WorkgroupWordPointer(ctx, index)}; + const Id pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index)}; ctx.OpStore(pointer, ctx.OpCompositeExtract(ctx.U32[1], value, i)); } } diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index 598b0cbc58..8d4aff315a 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -635,66 +635,14 @@ void EmitContext::DefineSharedMemory(const IR::Program& program) { return std::make_tuple(variable, element_pointer, pointer); }}; - const auto define_bitfield_stores{[&](bool define_u8, bool define_u16) { - if (!define_u8 && !define_u16) { - return; - } - const Id func_type{TypeFunction(void_id, U32[1], U32[1])}; - const auto make_function{[&](u32 mask, u32 size) { - const Id loop_header{OpLabel()}; - const Id continue_block{OpLabel()}; - const Id merge_block{OpLabel()}; - - const Id func{OpFunction(void_id, spv::FunctionControlMask::MaskNone, func_type)}; - const Id offset{OpFunctionParameter(U32[1])}; - const Id insert_value{OpFunctionParameter(U32[1])}; - AddLabel(); - OpBranch(loop_header); - - AddLabel(loop_header); - const Id word_offset{OpShiftRightArithmetic(U32[1], offset, Const(2U))}; - const Id shift_offset{OpShiftLeftLogical(U32[1], offset, Const(3U))}; - const Id bit_offset{OpBitwiseAnd(U32[1], shift_offset, Const(mask))}; - const Id count{Const(size)}; - OpLoopMerge(merge_block, continue_block, spv::LoopControlMask::MaskNone); - OpBranch(continue_block); - - AddLabel(continue_block); - const Id word_pointer{profile.support_explicit_workgroup_layout - ? OpAccessChain(shared_u32, shared_memory_u32, - u32_zero_value, word_offset) - : OpAccessChain(shared_u32, shared_memory_u32, word_offset)}; - const Id old_value{OpLoad(U32[1], word_pointer)}; - const Id new_value{OpBitFieldInsert(U32[1], old_value, insert_value, bit_offset, - count)}; - const Id atomic_res{OpAtomicCompareExchange(U32[1], word_pointer, Const(1U), - u32_zero_value, u32_zero_value, new_value, - old_value)}; - const Id success{OpIEqual(U1, atomic_res, old_value)}; - OpBranchConditional(success, merge_block, loop_header); - - AddLabel(merge_block); - OpReturn(); - OpFunctionEnd(); - return func; - }}; - if (define_u8) { - shared_store_u8_func = make_function(24, 8); - } - if (define_u16) { - shared_store_u16_func = make_function(16, 16); - } - }}; - const bool uses_int8 = program.info.uses_int8; - const bool uses_int16 = program.info.uses_int16; if (profile.support_explicit_workgroup_layout) { AddExtension("SPV_KHR_workgroup_memory_explicit_layout"); AddCapability(spv::Capability::WorkgroupMemoryExplicitLayoutKHR); - if (uses_int8 && profile.support_explicit_workgroup_layout_u8) { + if (program.info.uses_int8) { AddCapability(spv::Capability::WorkgroupMemoryExplicitLayout8BitAccessKHR); std::tie(shared_memory_u8, shared_u8, std::ignore) = make(U8, 1); } - if (uses_int16 && profile.support_explicit_workgroup_layout_u16) { + if (program.info.uses_int16) { AddCapability(spv::Capability::WorkgroupMemoryExplicitLayout16BitAccessKHR); std::tie(shared_memory_u16, shared_u16, std::ignore) = make(U16, 2); } @@ -704,9 +652,6 @@ void EmitContext::DefineSharedMemory(const IR::Program& program) { std::tie(shared_memory_u32, shared_u32, shared_memory_u32_type) = make(U32[1], 4); std::tie(shared_memory_u32x2, shared_u32x2, std::ignore) = make(U32[2], 8); std::tie(shared_memory_u32x4, shared_u32x4, std::ignore) = make(U32[4], 16); - const bool need_u8_fallback = uses_int8 && !profile.support_explicit_workgroup_layout_u8; - const bool need_u16_fallback = uses_int16 && !profile.support_explicit_workgroup_layout_u16; - define_bitfield_stores(need_u8_fallback, need_u16_fallback); return; } const u32 num_elements{Common::DivCeil(program.shared_memory_size, 4U)}; @@ -716,7 +661,47 @@ void EmitContext::DefineSharedMemory(const IR::Program& program) { shared_u32 = TypePointer(spv::StorageClass::Workgroup, U32[1]); shared_memory_u32 = AddGlobalVariable(shared_memory_u32_type, spv::StorageClass::Workgroup); interfaces.push_back(shared_memory_u32); - define_bitfield_stores(uses_int8, uses_int16); + + const Id func_type{TypeFunction(void_id, U32[1], U32[1])}; + const auto make_function{[&](u32 mask, u32 size) { + const Id loop_header{OpLabel()}; + const Id continue_block{OpLabel()}; + const Id merge_block{OpLabel()}; + + const Id func{OpFunction(void_id, spv::FunctionControlMask::MaskNone, func_type)}; + const Id offset{OpFunctionParameter(U32[1])}; + const Id insert_value{OpFunctionParameter(U32[1])}; + AddLabel(); + OpBranch(loop_header); + + AddLabel(loop_header); + const Id word_offset{OpShiftRightArithmetic(U32[1], offset, Const(2U))}; + const Id shift_offset{OpShiftLeftLogical(U32[1], offset, Const(3U))}; + const Id bit_offset{OpBitwiseAnd(U32[1], shift_offset, Const(mask))}; + const Id count{Const(size)}; + OpLoopMerge(merge_block, continue_block, spv::LoopControlMask::MaskNone); + OpBranch(continue_block); + + AddLabel(continue_block); + const Id word_pointer{OpAccessChain(shared_u32, shared_memory_u32, word_offset)}; + const Id old_value{OpLoad(U32[1], word_pointer)}; + const Id new_value{OpBitFieldInsert(U32[1], old_value, insert_value, bit_offset, count)}; + const Id atomic_res{OpAtomicCompareExchange(U32[1], word_pointer, Const(1U), u32_zero_value, + u32_zero_value, new_value, old_value)}; + const Id success{OpIEqual(U1, atomic_res, old_value)}; + OpBranchConditional(success, merge_block, loop_header); + + AddLabel(merge_block); + OpReturn(); + OpFunctionEnd(); + return func; + }}; + if (program.info.uses_int8) { + shared_store_u8_func = make_function(24, 8); + } + if (program.info.uses_int16) { + shared_store_u16_func = make_function(16, 16); + } } void EmitContext::DefineSharedMemoryFunctions(const IR::Program& program) { diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index 8a87374013..ea7f5cc76f 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -32,8 +32,6 @@ struct Profile { bool support_fp32_signed_zero_nan_preserve{}; bool support_fp64_signed_zero_nan_preserve{}; bool support_explicit_workgroup_layout{}; - bool support_explicit_workgroup_layout_u8{}; - bool support_explicit_workgroup_layout_u16{}; bool support_vote{}; bool support_viewport_index_layer_non_geometry{}; bool support_viewport_mask{}; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index a4384738ea..1be7f8f641 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -387,10 +387,6 @@ PipelineCache::PipelineCache(Tegra::MaxwellDeviceMemoryManager& device_memory_, .support_fp64_signed_zero_nan_preserve = float_controls_supported && float_control.shaderSignedZeroInfNanPreserveFloat64 != VK_FALSE, .support_explicit_workgroup_layout = device.IsKhrWorkgroupMemoryExplicitLayoutSupported(), - .support_explicit_workgroup_layout_u8 = - device.SupportsWorkgroupExplicitLayout8Bit(), - .support_explicit_workgroup_layout_u16 = - device.SupportsWorkgroupExplicitLayout16Bit(), .support_vote = device.IsSubgroupFeatureSupported(VK_SUBGROUP_FEATURE_VOTE_BIT), .support_viewport_index_layer_non_geometry = device.IsExtShaderViewportIndexLayerSupported(), diff --git a/src/video_core/vulkan_common/vulkan.h b/src/video_core/vulkan_common/vulkan.h index ccdec45b6f..8d2e8e2a37 100644 --- a/src/video_core/vulkan_common/vulkan.h +++ b/src/video_core/vulkan_common/vulkan.h @@ -29,6 +29,9 @@ #ifndef VK_KHR_MAINTENANCE_8_EXTENSION_NAME #define VK_KHR_MAINTENANCE_8_EXTENSION_NAME "VK_KHR_maintenance8" #endif +#ifndef VK_KHR_MAINTENANCE_9_EXTENSION_NAME +#define VK_KHR_MAINTENANCE_9_EXTENSION_NAME "VK_KHR_maintenance9" +#endif // Sanitize macros #undef CreateEvent diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index c0a48f5f1a..515a0fc85e 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -1528,22 +1528,12 @@ void Device::RemoveUnsuitableExtensions() { } // VK_KHR_workgroup_memory_explicit_layout - workgroup_memory_explicit_layout_caps.supports_8bit = - features.workgroup_memory_explicit_layout.workgroupMemoryExplicitLayout8BitAccess; - workgroup_memory_explicit_layout_caps.supports_16bit = - features.workgroup_memory_explicit_layout.workgroupMemoryExplicitLayout16BitAccess; - const bool has_workgroup_base = + extensions.workgroup_memory_explicit_layout = + features.features.shaderInt16 && features.workgroup_memory_explicit_layout.workgroupMemoryExplicitLayout && + features.workgroup_memory_explicit_layout.workgroupMemoryExplicitLayout8BitAccess && + features.workgroup_memory_explicit_layout.workgroupMemoryExplicitLayout16BitAccess && features.workgroup_memory_explicit_layout.workgroupMemoryExplicitLayoutScalarBlockLayout; - if (is_qualcomm) { - extensions.workgroup_memory_explicit_layout = - features.features.shaderInt16 && has_workgroup_base; - } else { - extensions.workgroup_memory_explicit_layout = - features.features.shaderInt16 && has_workgroup_base && - workgroup_memory_explicit_layout_caps.supports_8bit && - workgroup_memory_explicit_layout_caps.supports_16bit; - } RemoveExtensionFeatureIfUnsuitable(extensions.workgroup_memory_explicit_layout, features.workgroup_memory_explicit_layout, VK_KHR_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_EXTENSION_NAME); @@ -1614,6 +1604,9 @@ void Device::RemoveUnsuitableExtensions() { extensions.maintenance8 = loaded_extensions.contains(VK_KHR_MAINTENANCE_8_EXTENSION_NAME); RemoveExtensionIfUnsuitable(extensions.maintenance8, VK_KHR_MAINTENANCE_8_EXTENSION_NAME); + // VK_KHR_maintenance9 (proposed for Vulkan 1.4, no features) + extensions.maintenance9 = loaded_extensions.contains(VK_KHR_MAINTENANCE_9_EXTENSION_NAME); + RemoveExtensionIfUnsuitable(extensions.maintenance9, VK_KHR_MAINTENANCE_9_EXTENSION_NAME); } bool Device::SupportsSubgroupStage(VkShaderStageFlags stage_mask) const { diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index a9cd7f4c34..06666a4190 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -100,6 +100,7 @@ VK_DEFINE_HANDLE(VmaAllocator) EXTENSION(KHR, MAINTENANCE_3, maintenance3) \ EXTENSION(KHR, MAINTENANCE_7, maintenance7) \ EXTENSION(KHR, MAINTENANCE_8, maintenance8) \ + EXTENSION(KHR, MAINTENANCE_9, maintenance9) \ EXTENSION(NV, DEVICE_DIAGNOSTICS_CONFIG, device_diagnostics_config) \ EXTENSION(NV, GEOMETRY_SHADER_PASSTHROUGH, geometry_shader_passthrough) \ EXTENSION(NV, VIEWPORT_ARRAY2, viewport_array2) \ @@ -510,16 +511,6 @@ public: return extensions.workgroup_memory_explicit_layout; } - /// Returns true if explicit workgroup layout supports 8-bit access. - bool SupportsWorkgroupExplicitLayout8Bit() const { - return workgroup_memory_explicit_layout_caps.supports_8bit; - } - - /// Returns true if explicit workgroup layout supports 16-bit access. - bool SupportsWorkgroupExplicitLayout16Bit() const { - return workgroup_memory_explicit_layout_caps.supports_16bit; - } - /// Returns true if the device supports VK_KHR_image_format_list. bool IsKhrImageFormatListSupported() const { return extensions.image_format_list || instance_version >= VK_API_VERSION_1_2; @@ -957,6 +948,11 @@ public: return extensions.maintenance8; } + /// Returns true if the device supports VK_KHR_maintenance9. + bool IsKhrMaintenance9Supported() const { + return extensions.maintenance9; + } + [[nodiscard]] static constexpr bool CheckBrokenCompute(VkDriverId driver_id, u32 driver_version) { if (driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS) { @@ -1084,13 +1080,6 @@ private: Features features{}; Properties properties{}; - struct WorkgroupExplicitLayoutCaps { - bool supports_8bit{}; - bool supports_16bit{}; - }; - - WorkgroupExplicitLayoutCaps workgroup_memory_explicit_layout_caps{}; - VkPhysicalDeviceFeatures2 features2{}; VkPhysicalDeviceProperties2 properties2{};