From ee5565077cfa4c37fdb449ba03e944823d683f12 Mon Sep 17 00:00:00 2001 From: CamilleLaVey Date: Fri, 14 Nov 2025 01:32:02 -0400 Subject: [PATCH] [memory, vk] TEST: Tiled GPU optimization try #1 --- src/common/settings.h | 54 +++++++++++++ src/common/settings_enums.h | 10 +++ .../backend/spirv/emit_spirv.cpp | 37 +++++++-- src/shader_recompiler/profile.h | 8 ++ .../renderer_vulkan/pipeline_helper.h | 17 ++++- .../renderer_vulkan/vk_pipeline_cache.cpp | 14 ++++ .../renderer_vulkan/vk_render_pass_cache.cpp | 73 +++++++++++++++--- .../renderer_vulkan/vk_render_pass_cache.h | 7 ++ .../renderer_vulkan/vk_texture_cache.cpp | 75 +++++++++++++------ .../vulkan_common/vulkan_device.cpp | 65 ++++++++++++++++ .../vulkan_common/vulkan_memory_allocator.cpp | 22 +++++- 11 files changed, 343 insertions(+), 39 deletions(-) diff --git a/src/common/settings.h b/src/common/settings.h index 2e16e4bc59..31e54e3be1 100644 --- a/src/common/settings.h +++ b/src/common/settings.h @@ -560,6 +560,60 @@ struct Values { false, &sample_shading}; +#ifdef ANDROID + // Shader Float Controls (Android only) - Eden Veil / Extensions + // Force enable VK_KHR_shader_float_controls even if driver has known issues + // Allows fine-tuning float behavior to match Switch/Maxwell or optimize performance + SwitchableSetting shader_float_controls_force_enable{linkage, + false, + "shader_float_controls_force_enable", + Category::RendererExtensions, + Specialization::Paired}; + + // Individual float behavior controls (visible only when force_enable is true) + // Multiple can be active simultaneously EXCEPT FTZ and DenormPreserve (mutually exclusive) + // + // Recommended configurations: + // Switch-native: FTZ=ON, RTE=ON, SignedZero=ON (matches Maxwell behavior) + // Performance: FTZ=ON only (fastest) + // Accuracy: DenormPreserve=ON, RTE=ON, SignedZero=ON (slowest, highest precision) + SwitchableSetting shader_float_ftz{linkage, + false, + "shader_float_ftz", + Category::RendererExtensions, + Specialization::Default, + true, + false, + &shader_float_controls_force_enable}; + + SwitchableSetting shader_float_denorm_preserve{linkage, + false, + "shader_float_denorm_preserve", + Category::RendererExtensions, + Specialization::Default, + true, + false, + &shader_float_controls_force_enable}; + + SwitchableSetting shader_float_rte{linkage, + false, + "shader_float_rte", + Category::RendererExtensions, + Specialization::Default, + true, + false, + &shader_float_controls_force_enable}; + + SwitchableSetting shader_float_signed_zero_inf_nan{linkage, + false, + "shader_float_signed_zero_inf_nan", + Category::RendererExtensions, + Specialization::Default, + true, + false, + &shader_float_controls_force_enable}; +#endif + Setting renderer_debug{linkage, false, "debug", Category::RendererDebug}; Setting renderer_shader_feedback{linkage, false, "shader_feedback", Category::RendererDebug}; diff --git a/src/common/settings_enums.h b/src/common/settings_enums.h index 3ba2144efc..33daa55519 100644 --- a/src/common/settings_enums.h +++ b/src/common/settings_enums.h @@ -152,6 +152,16 @@ ENUM(SpirvOptimizeMode, Never, OnLoad, Always); ENUM(GpuOverclock, Low, Medium, High) ENUM(TemperatureUnits, Celsius, Fahrenheit) +// Shader Float Controls behavior modes +// These control how floating-point denormals and special values are handled in shaders +ENUM(ShaderFloatBehavior, + DriverDefault, // Let driver choose (safest, may not match Switch behavior) + SwitchNative, // Emulate Switch/Maxwell behavior (FTZ + RTE + SignedZero) + FlushToZero, // FTZ only - flush denorms to zero (fastest, some precision loss) + PreserveDenorms, // Preserve denorms (slowest, highest precision) + RoundToEven, // RTE rounding mode (IEEE 754 compliant) + SignedZeroInfNan); // Preserve signed zero, inf, nan (accuracy for edge cases) + template inline std::string_view CanonicalizeEnum(Type id) { const auto group = EnumMetadata::Canonicalizations(); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 313a1deb30..d3faf4341b 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -341,19 +341,35 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) { void SetupDenormControl(const Profile& profile, const IR::Program& program, EmitContext& ctx, Id main_func) { const Info& info{program.info}; + + // User-forced behavior overrides (Android Eden Veil/Extensions) + // When force flags are active, they take precedence over shader-declared behavior + const bool force_flush = profile.force_fp32_denorm_flush; + const bool force_preserve = profile.force_fp32_denorm_preserve; + + if (force_flush && force_preserve) { + LOG_WARNING(Shader_SPIRV, "Both FTZ and Preserve forced simultaneously - FTZ takes precedence"); + } + if (info.uses_fp32_denorms_flush && info.uses_fp32_denorms_preserve) { LOG_DEBUG(Shader_SPIRV, "Fp32 denorm flush and preserve on the same shader"); - } else if (info.uses_fp32_denorms_flush) { + } else if (force_flush || info.uses_fp32_denorms_flush) { if (profile.support_fp32_denorm_flush) { ctx.AddCapability(spv::Capability::DenormFlushToZero); ctx.AddExecutionMode(main_func, spv::ExecutionMode::DenormFlushToZero, 32U); + if (force_flush) { + LOG_DEBUG(Shader_SPIRV, "Fp32 DenormFlushToZero FORCED by user setting"); + } } else { // Drivers will most likely flush denorms by default, no need to warn } - } else if (info.uses_fp32_denorms_preserve) { + } else if (force_preserve || info.uses_fp32_denorms_preserve) { if (profile.support_fp32_denorm_preserve) { ctx.AddCapability(spv::Capability::DenormPreserve); ctx.AddExecutionMode(main_func, spv::ExecutionMode::DenormPreserve, 32U); + if (force_preserve) { + LOG_DEBUG(Shader_SPIRV, "Fp32 DenormPreserve FORCED by user setting"); + } } else { LOG_DEBUG(Shader_SPIRV, "Fp32 denorm preserve used in shader without host support"); } @@ -386,13 +402,24 @@ void SetupSignedNanCapabilities(const Profile& profile, const IR::Program& progr if (profile.has_broken_fp16_float_controls && program.info.uses_fp16) { return; } + + // User-forced behavior (Android Eden Veil/Extensions) + const bool force_signed_zero_inf_nan = profile.force_fp32_signed_zero_inf_nan; + if (program.info.uses_fp16 && profile.support_fp16_signed_zero_nan_preserve) { ctx.AddCapability(spv::Capability::SignedZeroInfNanPreserve); ctx.AddExecutionMode(main_func, spv::ExecutionMode::SignedZeroInfNanPreserve, 16U); } - if (profile.support_fp32_signed_zero_nan_preserve) { - ctx.AddCapability(spv::Capability::SignedZeroInfNanPreserve); - ctx.AddExecutionMode(main_func, spv::ExecutionMode::SignedZeroInfNanPreserve, 32U); + if (force_signed_zero_inf_nan || profile.support_fp32_signed_zero_nan_preserve) { + if (profile.support_fp32_signed_zero_nan_preserve) { + ctx.AddCapability(spv::Capability::SignedZeroInfNanPreserve); + ctx.AddExecutionMode(main_func, spv::ExecutionMode::SignedZeroInfNanPreserve, 32U); + if (force_signed_zero_inf_nan) { + LOG_DEBUG(Shader_SPIRV, "Fp32 SignedZeroInfNanPreserve FORCED by user setting"); + } + } else if (force_signed_zero_inf_nan) { + LOG_WARNING(Shader_SPIRV, "SignedZeroInfNanPreserve forced but driver doesn't support it"); + } } if (program.info.uses_fp64 && profile.support_fp64_signed_zero_nan_preserve) { ctx.AddCapability(spv::Capability::SignedZeroInfNanPreserve); diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index 48db64e162..6014221e8f 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -28,6 +28,14 @@ struct Profile { bool support_fp16_signed_zero_nan_preserve{}; bool support_fp32_signed_zero_nan_preserve{}; bool support_fp64_signed_zero_nan_preserve{}; + + // User-forced float behavior overrides (Android Eden Veil/Extensions) + // When shader_float_controls_force_enable is true, these override shader-declared behavior + bool force_fp32_denorm_flush{}; // Force FTZ for all FP32 ops + bool force_fp32_denorm_preserve{}; // Force denorm preservation for all FP32 ops + bool force_fp32_rte_rounding{}; // Force Round-To-Even for all FP32 ops + bool force_fp32_signed_zero_inf_nan{}; // Force signed zero/inf/nan preservation + bool support_explicit_workgroup_layout{}; bool support_vote{}; bool support_viewport_index_layer_non_geometry{}; diff --git a/src/video_core/renderer_vulkan/pipeline_helper.h b/src/video_core/renderer_vulkan/pipeline_helper.h index 910e07a606..f8152f5add 100644 --- a/src/video_core/renderer_vulkan/pipeline_helper.h +++ b/src/video_core/renderer_vulkan/pipeline_helper.h @@ -24,8 +24,21 @@ public: DescriptorLayoutBuilder(const Device& device_) : device{&device_} {} bool CanUsePushDescriptor() const noexcept { - return device->IsKhrPushDescriptorSupported() && - num_descriptors <= device->MaxPushDescriptors(); + if (!device->IsKhrPushDescriptorSupported()) { + return false; + } + if (num_descriptors > device->MaxPushDescriptors()) { + return false; + } + + // Qualcomm has slow push descriptor implementation - use conservative threshold + // Prefer descriptor pools for complex shaders (>8 descriptors) + const bool is_qualcomm = device->GetDriverID() == VK_DRIVER_ID_QUALCOMM_PROPRIETARY; + if (is_qualcomm && num_descriptors > 8) { + return false; + } + + return true; } // TODO(crueter): utilize layout binding flags diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 0532df05d8..15805f8480 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -341,6 +341,20 @@ PipelineCache::PipelineCache(Tegra::MaxwellDeviceMemoryManager& device_memory_, float_control.shaderSignedZeroInfNanPreserveFloat32 != VK_FALSE, .support_fp64_signed_zero_nan_preserve = float_control.shaderSignedZeroInfNanPreserveFloat64 != VK_FALSE, + +#ifdef ANDROID + // User-forced float behavior overrides (Eden Veil/Extensions) + .force_fp32_denorm_flush = Settings::values.shader_float_ftz.GetValue(), + .force_fp32_denorm_preserve = Settings::values.shader_float_denorm_preserve.GetValue(), + .force_fp32_rte_rounding = Settings::values.shader_float_rte.GetValue(), + .force_fp32_signed_zero_inf_nan = Settings::values.shader_float_signed_zero_inf_nan.GetValue(), +#else + .force_fp32_denorm_flush = false, + .force_fp32_denorm_preserve = false, + .force_fp32_rte_rounding = false, + .force_fp32_signed_zero_inf_nan = false, +#endif + .support_explicit_workgroup_layout = device.IsKhrWorkgroupMemoryExplicitLayoutSupported(), .support_vote = device.IsSubgroupFeatureSupported(VK_SUBGROUP_FEATURE_VOTE_BIT), .support_viewport_index_layer_non_geometry = diff --git a/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp b/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp index 80ff75e3b9..9499690a85 100644 --- a/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp @@ -8,6 +8,7 @@ #include +#include "common/logging/log.h" #include "video_core/renderer_vulkan/maxwell_to_vk.h" #include "video_core/renderer_vulkan/vk_render_pass_cache.h" #include "video_core/surface.h" @@ -19,6 +20,23 @@ namespace { using VideoCore::Surface::PixelFormat; using VideoCore::Surface::SurfaceType; + // Check if the driver uses tile-based deferred rendering (TBDR) architecture + // These GPUs benefit from optimized load/store operations to keep data on-chip + // + // TBDR GPUs supported in Eden: + // - Qualcomm Adreno (Snapdragon): Most Android flagship/midrange devices + // - ARM Mali: Android devices (Samsung Exynos, MediaTek, etc.) + // - Imagination PowerVR: Older iOS devices, some Android tablets + // - Samsung Xclipse: Galaxy S22+ (AMD RDNA2-based, but uses TBDR mode) + // - Broadcom VideoCore: Raspberry Pi + [[nodiscard]] constexpr bool IsTBDRGPU(VkDriverId driver_id) { + return driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY || + driver_id == VK_DRIVER_ID_ARM_PROPRIETARY || + driver_id == VK_DRIVER_ID_IMAGINATION_PROPRIETARY || + driver_id == VK_DRIVER_ID_SAMSUNG_PROPRIETARY || + driver_id == VK_DRIVER_ID_BROADCOM_PROPRIETARY; + } + constexpr SurfaceType GetSurfaceType(PixelFormat format) { switch (format) { // Depth formats @@ -44,23 +62,51 @@ using VideoCore::Surface::SurfaceType; } VkAttachmentDescription AttachmentDescription(const Device& device, PixelFormat format, - VkSampleCountFlagBits samples) { + VkSampleCountFlagBits samples, + bool tbdr_will_clear, + bool tbdr_discard_after) { using MaxwellToVK::SurfaceFormat; const SurfaceType surface_type = GetSurfaceType(format); const bool has_stencil = surface_type == SurfaceType::DepthStencil || surface_type == SurfaceType::Stencil; + // TBDR optimization: Apply hints only on tile-based GPUs + // Desktop GPUs (NVIDIA/AMD/Intel) ignore these hints and use standard behavior + const bool is_tbdr = IsTBDRGPU(device.GetDriverID()); + + // On TBDR: Use DONT_CARE if clear is guaranteed (avoids loading from main memory) + // On Desktop: Always LOAD to preserve existing content (safer default) + VkAttachmentLoadOp load_op = VK_ATTACHMENT_LOAD_OP_LOAD; + if (is_tbdr && tbdr_will_clear) { + load_op = VK_ATTACHMENT_LOAD_OP_DONT_CARE; + } + + // On TBDR: Use DONT_CARE if content won't be read (avoids storing to main memory) + // On Desktop: Always STORE (safer default) + VkAttachmentStoreOp store_op = VK_ATTACHMENT_STORE_OP_STORE; + if (is_tbdr && tbdr_discard_after) { + store_op = VK_ATTACHMENT_STORE_OP_DONT_CARE; + } + + // Stencil operations follow same logic + VkAttachmentLoadOp stencil_load_op = VK_ATTACHMENT_LOAD_OP_DONT_CARE; + VkAttachmentStoreOp stencil_store_op = VK_ATTACHMENT_STORE_OP_DONT_CARE; + if (has_stencil) { + stencil_load_op = (is_tbdr && tbdr_will_clear) ? VK_ATTACHMENT_LOAD_OP_DONT_CARE + : VK_ATTACHMENT_LOAD_OP_LOAD; + stencil_store_op = (is_tbdr && tbdr_discard_after) ? VK_ATTACHMENT_STORE_OP_DONT_CARE + : VK_ATTACHMENT_STORE_OP_STORE; + } + return { .flags = {}, .format = SurfaceFormat(device, FormatType::Optimal, true, format).format, .samples = samples, - .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, - .storeOp = VK_ATTACHMENT_STORE_OP_STORE, - .stencilLoadOp = has_stencil ? VK_ATTACHMENT_LOAD_OP_LOAD - : VK_ATTACHMENT_LOAD_OP_DONT_CARE, - .stencilStoreOp = has_stencil ? VK_ATTACHMENT_STORE_OP_STORE - : VK_ATTACHMENT_STORE_OP_DONT_CARE, + .loadOp = load_op, + .storeOp = store_op, + .stencilLoadOp = stencil_load_op, + .stencilStoreOp = stencil_store_op, .initialLayout = VK_IMAGE_LAYOUT_GENERAL, .finalLayout = VK_IMAGE_LAYOUT_GENERAL, }; @@ -75,6 +121,13 @@ VkRenderPass RenderPassCache::Get(const RenderPassKey& key) { if (!is_new) { return *pair->second; } + + const bool is_tbdr = IsTBDRGPU(device->GetDriverID()); + if (is_tbdr && (key.tbdr_will_clear || key.tbdr_discard_after)) { + LOG_DEBUG(Render_Vulkan, "Creating TBDR-optimized render pass (driver={}, clear={}, discard={})", + static_cast(device->GetDriverID()), key.tbdr_will_clear, key.tbdr_discard_after); + } + boost::container::static_vector descriptions; std::array references{}; u32 num_attachments{}; @@ -87,7 +140,8 @@ VkRenderPass RenderPassCache::Get(const RenderPassKey& key) { .layout = VK_IMAGE_LAYOUT_GENERAL, }; if (is_valid) { - descriptions.push_back(AttachmentDescription(*device, format, key.samples)); + descriptions.push_back(AttachmentDescription(*device, format, key.samples, + key.tbdr_will_clear, key.tbdr_discard_after)); num_attachments = static_cast(index + 1); ++num_colors; } @@ -99,7 +153,8 @@ VkRenderPass RenderPassCache::Get(const RenderPassKey& key) { .attachment = num_colors, .layout = VK_IMAGE_LAYOUT_GENERAL, }; - descriptions.push_back(AttachmentDescription(*device, key.depth_format, key.samples)); + descriptions.push_back(AttachmentDescription(*device, key.depth_format, key.samples, + key.tbdr_will_clear, key.tbdr_discard_after)); } const VkSubpassDescription subpass{ .flags = 0, diff --git a/src/video_core/renderer_vulkan/vk_render_pass_cache.h b/src/video_core/renderer_vulkan/vk_render_pass_cache.h index 91ad4bf577..76302b5117 100644 --- a/src/video_core/renderer_vulkan/vk_render_pass_cache.h +++ b/src/video_core/renderer_vulkan/vk_render_pass_cache.h @@ -17,6 +17,11 @@ struct RenderPassKey { std::array color_formats; VideoCore::Surface::PixelFormat depth_format; VkSampleCountFlagBits samples; + + // TBDR optimization hints - only affect tile-based GPUs (Qualcomm, ARM, Imagination) + // These flags indicate the expected usage pattern to optimize load/store operations + bool tbdr_will_clear{false}; // Attachment will be cleared with vkCmdClearAttachments + bool tbdr_discard_after{false}; // Attachment won't be read after render pass }; } // namespace Vulkan @@ -27,6 +32,8 @@ struct hash { [[nodiscard]] size_t operator()(const Vulkan::RenderPassKey& key) const noexcept { size_t value = static_cast(key.depth_format) << 48; value ^= static_cast(key.samples) << 52; + value ^= (static_cast(key.tbdr_will_clear) << 56); + value ^= (static_cast(key.tbdr_discard_after) << 57); for (size_t i = 0; i < key.color_formats.size(); ++i) { value ^= static_cast(key.color_formats[i]) << (i * 6); } diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 1b0619afad..51a4954df1 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -160,6 +160,45 @@ constexpr VkBorderColor ConvertBorderColor(const std::array& color) { }; } +/// Emergency fallback for MSAA with HDR formats: degrade to non-MSAA if driver doesn't support +/// shaderStorageImageMultisample (required for msaa_copy_pass) +[[nodiscard]] ImageInfo AdjustMSAAForHDRFormats(const Device& device, ImageInfo info) { + // Only apply emergency fallback if MSAA is requested + if (info.num_samples <= 1) { + return info; + } + + // Check if this is an HDR format that commonly fails with MSAA + const auto vk_format = MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, + false, info.format).format; + const bool is_hdr_format = vk_format == VK_FORMAT_B10G11R11_UFLOAT_PACK32 || + vk_format == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32; + + if (!is_hdr_format) { + return info; // Not an HDR format, no adjustment needed + } + + // If driver doesn't support shader storage image multisample, MSAACopyPass will fail + // Emergency fallback: degrade to non-MSAA (1 sample) to avoid texture corruption + if (!device.IsStorageImageMultisampleSupported()) { + LOG_ERROR(Render_Vulkan, + "EMERGENCY MSAA FALLBACK: Driver doesn't support shaderStorageImageMultisample. " + "Degrading HDR format {} from {}x MSAA to 1x (non-MSAA) to prevent texture corruption. " + "This will cause visual quality loss but prevents black textures.", + vk_format, info.num_samples); + + // Degrade to non-MSAA + // NOTE: We only change num_samples, NOT dimensions. The ImageInfo dimensions are already + // in "logical" space (full resolution), and MakeImageCreateInfo will handle the conversion + // to physical GPU dimensions based on num_samples automatically. + info.num_samples = 1; + + return info; + } + + return info; // Driver supports MSAA storage images, no adjustment needed +} + [[nodiscard]] vk::Image MakeImage(const Device& device, const MemoryAllocator& allocator, const ImageInfo& info, std::span view_formats) { if (info.type == ImageType::Buffer) { @@ -1510,10 +1549,20 @@ void TextureCacheRuntime::TickFrame() {} Image::Image(TextureCacheRuntime& runtime_, const ImageInfo& info_, GPUVAddr gpu_addr_, VAddr cpu_addr_) : VideoCommon::ImageBase(info_, gpu_addr_, cpu_addr_), scheduler{&runtime_.scheduler}, - runtime{&runtime_}, original_image(MakeImage(runtime_.device, runtime_.memory_allocator, info, - runtime->ViewFormats(info.format))), - aspect_mask(ImageAspectMask(info.format)) { - if (IsPixelFormatASTC(info.format) && !runtime->device.IsOptimalAstcSupported()) { + runtime{&runtime_} { + // CRITICAL: Adjust MSAA for HDR formats if driver doesn't support shaderStorageImageMultisample + // This prevents texture corruption by degrading to non-MSAA when msaa_copy_pass would fail + const ImageInfo adjusted_info = AdjustMSAAForHDRFormats(runtime_.device, info_); + + // Update our stored info with adjusted values (may have num_samples=1 now) + info = adjusted_info; + + // Create image with adjusted info + original_image = MakeImage(runtime_.device, runtime_.memory_allocator, adjusted_info, + runtime->ViewFormats(adjusted_info.format)); + aspect_mask = ImageAspectMask(adjusted_info.format); + + if (IsPixelFormatASTC(adjusted_info.format) && !runtime->device.IsOptimalAstcSupported()) { switch (Settings::values.accelerate_astc.GetValue()) { case Settings::AstcDecodeMode::Gpu: if (Settings::values.astc_recompression.GetValue() == @@ -1549,24 +1598,6 @@ Image::Image(TextureCacheRuntime& runtime_, const ImageInfo& info_, GPUVAddr gpu MakeStorageView(device, level, *original_image, VK_FORMAT_A8B8G8R8_UNORM_PACK32); } } - - // Proactive warning for problematic HDR format + MSAA combinations on Android - // These combinations commonly cause texture flickering/black screens across multiple game engines - // Note: MSAA is native Switch rendering technique, cannot be disabled by emulator - if (info.num_samples > 1) { - const auto vk_format = MaxwellToVK::SurfaceFormat(runtime->device, FormatType::Optimal, - false, info.format).format; - const bool is_hdr_format = vk_format == VK_FORMAT_B10G11R11_UFLOAT_PACK32 || - vk_format == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32; - - if (is_hdr_format) { - LOG_WARNING(Render_Vulkan, - "Creating MSAA image ({}x samples) with HDR format {} (Maxwell: {}). " - "Driver support may be limited on Android (Qualcomm < 800, Mali pre-maintenance5). " - "Format fallback to RGBA16F should prevent issues.", - info.num_samples, vk_format, info.format); - } - } } Image::Image(const VideoCommon::NullImageParams& params) : VideoCommon::ImageBase{params} {} diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 84dfef6c5b..0f75f0b6cf 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -540,9 +540,74 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR "Qualcomm drivers have a slow VK_KHR_push_descriptor implementation"); //RemoveExtension(extensions.push_descriptor, VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME); +#ifdef ANDROID + // Shader Float Controls handling for Qualcomm Adreno + // Default: DISABLED due to historical issues with binning precision causing visual glitches + const bool force_enable = Settings::values.shader_float_controls_force_enable.GetValue(); + + if (force_enable) { + // User explicitly enabled float controls - log detected capabilities and user config + LOG_INFO(Render_Vulkan, "Shader Float Controls FORCE ENABLED by user (Eden Veil/Extensions)"); + + // Log driver capabilities + const auto& fc = float_control; + LOG_INFO(Render_Vulkan, "Driver Float Controls Capabilities:"); + LOG_INFO(Render_Vulkan, " - Denorm Flush FP32: {}", fc.shaderDenormFlushToZeroFloat32 ? "YES" : "NO"); + LOG_INFO(Render_Vulkan, " - Denorm Preserve FP32: {}", fc.shaderDenormPreserveFloat32 ? "YES" : "NO"); + LOG_INFO(Render_Vulkan, " - RTE Rounding FP32: {}", fc.shaderRoundingModeRTEFloat32 ? "YES" : "NO"); + LOG_INFO(Render_Vulkan, " - Signed Zero/Inf/Nan FP32: {}", fc.shaderSignedZeroInfNanPreserveFloat32 ? "YES" : "NO"); + LOG_INFO(Render_Vulkan, " - Independence: {}", + fc.denormBehaviorIndependence == VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL ? "ALL" : "LIMITED"); + + // Log user selections + bool ftz = Settings::values.shader_float_ftz.GetValue(); + bool preserve = Settings::values.shader_float_denorm_preserve.GetValue(); + const bool rte = Settings::values.shader_float_rte.GetValue(); + const bool signed_zero = Settings::values.shader_float_signed_zero_inf_nan.GetValue(); + + // Validate mutually exclusive options + if (ftz && preserve) { + LOG_WARNING(Render_Vulkan, + "CONFLICT: FTZ and DenormPreserve are mutually exclusive!"); + LOG_WARNING(Render_Vulkan, + " -> DenormPreserve will take precedence (accuracy over speed)"); + ftz = false; // Preserve takes priority for correctness + } + + LOG_INFO(Render_Vulkan, "User Float Behavior Selection:"); + LOG_INFO(Render_Vulkan, " - Flush To Zero (FTZ): {}", ftz ? "ENABLED" : "disabled"); + LOG_INFO(Render_Vulkan, " - Denorm Preserve: {}", preserve ? "ENABLED" : "disabled"); + LOG_INFO(Render_Vulkan, " - Round To Even (RTE): {}", rte ? "ENABLED" : "disabled"); + LOG_INFO(Render_Vulkan, " - Signed Zero/Inf/Nan: {}", signed_zero ? "ENABLED" : "disabled"); + + // Analyze configuration vs Switch native behavior + const bool matches_switch = ftz && !preserve && rte && signed_zero; + if (matches_switch) { + LOG_INFO(Render_Vulkan, "Configuration MATCHES Switch/Maxwell native behavior (FTZ+RTE+SignedZero)"); + } else if (!ftz && !preserve && !rte && !signed_zero) { + LOG_WARNING(Render_Vulkan, "No float behaviors selected - using driver default (may cause glitches)"); + } else { + LOG_INFO(Render_Vulkan, "Configuration is CUSTOM - testing mode active"); + } + + // Extension stays enabled + LOG_INFO(Render_Vulkan, "VK_KHR_shader_float_controls: ENABLED"); + } else { + // Default behavior - disable float controls + LOG_WARNING(Render_Vulkan, + "Disabling shader float controls on Qualcomm (historical binning precision issues)"); + LOG_INFO(Render_Vulkan, + "To enable: Eden Veil -> Extensions -> Shader Float Controls (Force Enable)"); + RemoveExtension(extensions.shader_float_controls, VK_KHR_SHADER_FLOAT_CONTROLS_EXTENSION_NAME); + } +#else + // Non-Android: keep original behavior LOG_WARNING(Render_Vulkan, "Disabling shader float controls and 64-bit integer features on Qualcomm proprietary drivers"); RemoveExtension(extensions.shader_float_controls, VK_KHR_SHADER_FLOAT_CONTROLS_EXTENSION_NAME); +#endif + + // Int64 atomics - genuinely broken, always disable RemoveExtensionFeature(extensions.shader_atomic_int64, features.shader_atomic_int64, VK_KHR_SHADER_ATOMIC_INT64_EXTENSION_NAME); features.shader_atomic_int64.shaderBufferInt64Atomics = false; diff --git a/src/video_core/vulkan_common/vulkan_memory_allocator.cpp b/src/video_core/vulkan_common/vulkan_memory_allocator.cpp index 4cd3442d97..ef41132d41 100644 --- a/src/video_core/vulkan_common/vulkan_memory_allocator.cpp +++ b/src/video_core/vulkan_common/vulkan_memory_allocator.cpp @@ -226,11 +226,24 @@ namespace Vulkan { vk::Buffer MemoryAllocator::CreateBuffer(const VkBufferCreateInfo &ci, MemoryUsage usage) const { + // Qualcomm uses unified memory architecture - prefer DEVICE_LOCAL + HOST_VISIBLE + // for zero-copy access without staging buffers + const bool is_qualcomm = device.GetDriverID() == VK_DRIVER_ID_QUALCOMM_PROPRIETARY; + const bool prefer_unified = is_qualcomm && (usage == MemoryUsage::Upload || + usage == MemoryUsage::Download || + usage == MemoryUsage::Stream); + + VkMemoryPropertyFlags preferred_flags = MemoryUsagePreferredVmaFlags(usage); + if (prefer_unified) { + // Request DEVICE_LOCAL + HOST_VISIBLE for zero-copy on unified memory architectures + preferred_flags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT; + } + const VmaAllocationCreateInfo alloc_ci = { .flags = VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT | MemoryUsageVmaFlags(usage), .usage = MemoryUsageVma(usage), .requiredFlags = 0, - .preferredFlags = MemoryUsagePreferredVmaFlags(usage), + .preferredFlags = preferred_flags, .memoryTypeBits = usage == MemoryUsage::Stream ? 0u : valid_memory_types, .pool = VK_NULL_HANDLE, .pUserData = nullptr, @@ -245,6 +258,13 @@ namespace Vulkan { vk::Check(vmaCreateBuffer(allocator, &ci, &alloc_ci, &handle, &allocation, &alloc_info)); vmaGetAllocationMemoryProperties(allocator, allocation, &property_flags); + if (is_qualcomm && prefer_unified) { + const bool got_unified = (property_flags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) && + (property_flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT); + LOG_DEBUG(Render_Vulkan, "Qualcomm buffer allocation: usage={}, unified={}, flags=0x{:X}", + static_cast(usage), got_unified, property_flags); + } + u8 *data = reinterpret_cast(alloc_info.pMappedData); const std::span mapped_data = data ? std::span{data, ci.size} : std::span{}; const bool is_coherent = (property_flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) != 0;