From e9d84d098d59667edd3828fd7c4914f37717455b Mon Sep 17 00:00:00 2001 From: lizzie Date: Fri, 14 Nov 2025 15:07:13 +0100 Subject: [PATCH 01/34] [dynarmic] attempt fix totk regression from #358 (#3013) Signed-off-by: lizzie Reviewed-on: https://git.eden-emu.dev/eden-emu/eden/pulls/3013 Reviewed-by: MaranBr Reviewed-by: Caio Oliveira Co-authored-by: lizzie Co-committed-by: lizzie --- src/core/arm/dynarmic/arm_dynarmic_32.cpp | 7 +++++-- src/core/arm/dynarmic/arm_dynarmic_64.cpp | 7 +++++-- src/dynarmic/src/dynarmic/backend/x64/a64_interface.cpp | 7 +++---- 3 files changed, 13 insertions(+), 8 deletions(-) diff --git a/src/core/arm/dynarmic/arm_dynarmic_32.cpp b/src/core/arm/dynarmic/arm_dynarmic_32.cpp index 163772d8d5..21641744d5 100644 --- a/src/core/arm/dynarmic/arm_dynarmic_32.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic_32.cpp @@ -210,9 +210,12 @@ std::shared_ptr ArmDynarmic32::MakeJit(Common::PageTable* pa config.wall_clock_cntpct = m_uses_wall_clock; config.enable_cycle_counting = !m_uses_wall_clock; - // Code cache size - max in ARM is 128MiB, max in x86_64 is 2GiB - // Solaris doesn't support kPageSize >= 512MiB + // Code cache size +#if defined(ARCHITECTURE_arm64) || defined(__sun__) config.code_cache_size = std::uint32_t(128_MiB); +#else + config.code_cache_size = std::uint32_t(512_MiB); +#endif // Allow memory fault handling to work if (m_system.DebuggerEnabled()) { diff --git a/src/core/arm/dynarmic/arm_dynarmic_64.cpp b/src/core/arm/dynarmic/arm_dynarmic_64.cpp index 1d74215971..b00a0d4346 100644 --- a/src/core/arm/dynarmic/arm_dynarmic_64.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic_64.cpp @@ -269,9 +269,12 @@ std::shared_ptr ArmDynarmic64::MakeJit(Common::PageTable* pa config.wall_clock_cntpct = m_uses_wall_clock; config.enable_cycle_counting = !m_uses_wall_clock; - // Code cache size - max in ARM is 128MiB, max in x86_64 is 2GiB - // Solaris doesn't support kPageSize >= 512MiB + // Code cache size +#if defined(ARCHITECTURE_arm64) || defined(__sun__) config.code_cache_size = std::uint32_t(128_MiB); +#else + config.code_cache_size = std::uint32_t(512_MiB); +#endif // Allow memory fault handling to work if (m_system.DebuggerEnabled()) { diff --git a/src/dynarmic/src/dynarmic/backend/x64/a64_interface.cpp b/src/dynarmic/src/dynarmic/backend/x64/a64_interface.cpp index b895e42251..0fe738e212 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/a64_interface.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/a64_interface.cpp @@ -80,16 +80,15 @@ public: }; // TODO: Check code alignment - const CodePtr aligned_code_ptr = CodePtr((uintptr_t(GetCurrentBlock()) + 15) & ~uintptr_t(15)); - const CodePtr current_code_ptr = [this, aligned_code_ptr] { + + const CodePtr current_code_ptr = [this] { // RSB optimization const u32 new_rsb_ptr = (jit_state.rsb_ptr - 1) & A64JitState::RSBPtrMask; if (jit_state.GetUniqueHash() == jit_state.rsb_location_descriptors[new_rsb_ptr]) { jit_state.rsb_ptr = new_rsb_ptr; return CodePtr(jit_state.rsb_codeptrs[new_rsb_ptr]); } - return aligned_code_ptr; - //return GetCurrentBlock(); + return CodePtr((uintptr_t(GetCurrentBlock()) + 15) & ~uintptr_t(15)); }(); const HaltReason hr = block_of_code.RunCode(&jit_state, current_code_ptr); From 3f226678dd293f41d48edbef43eee528e129ae46 Mon Sep 17 00:00:00 2001 From: Shinmegumi Date: Fri, 14 Nov 2025 15:13:29 +0100 Subject: [PATCH 02/34] [vk] Fix fallback viewport/scissor origin handling (#294) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When viewport_scale_offset_enabled is disabled, the fallback path previously assumed a top-left origin for both viewport and scissor. This caused incorrect positioning or inverted geometry when the GPU state expected a lower-left origin. This change: - Adjusts viewport setup: if window_origin is lower-left, shift Y and flip height negative to emulate lower-left in Vulkan’s top-left space. - Updates scissor setup: recalculates Y for lower-left origin and ensures width/height fall back to 1 if zero, avoiding invalid extents. This aligns Vulkan’s viewport/scissor behavior with Maxwell state, fixing rendering issues in paths without scale/offset enabled. Co-authored-by: MaranBr Reviewed-on: https://git.eden-emu.dev/eden-emu/eden/pulls/294 Co-authored-by: Shinmegumi Co-committed-by: Shinmegumi --- .../renderer_vulkan/vk_graphics_pipeline.h | 2 +- .../renderer_vulkan/vk_rasterizer.cpp | 55 +++++++++++-------- 2 files changed, 33 insertions(+), 24 deletions(-) diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index 650c8e07ed..c8e89d60a4 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -80,7 +80,7 @@ public: PipelineStatistics* pipeline_statistics, RenderPassCache& render_pass_cache, const GraphicsPipelineCacheKey& key, std::array stages, const std::array& infos); - // True if this pipeline was created with VK_DYNAMIC_STATE_VERTEX_INPUT_EXT + bool HasDynamicVertexInput() const noexcept { return key.state.dynamic_vertex_input; } GraphicsPipeline& operator=(GraphicsPipeline&&) noexcept = delete; GraphicsPipeline(GraphicsPipeline&&) noexcept = delete; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 134327fa8d..44fe42ce9e 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -984,11 +984,9 @@ void RasterizerVulkan::UpdateDynamicStates() { auto has_float = std::any_of(regs.vertex_attrib_format.begin(), regs.vertex_attrib_format.end(), In(Maxwell3D::Regs::VertexAttribute::Type::Float)); if (regs.logic_op.enable) { regs.logic_op.enable = static_cast(!has_float); - } - UpdateLogicOpEnable(regs); - } else { - UpdateLogicOpEnable(regs); - } + } + } + UpdateLogicOpEnable(regs); UpdateDepthClampEnable(regs); UpdateLineStippleEnable(regs); UpdateConservativeRasterizationMode(regs); @@ -1031,19 +1029,25 @@ void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& reg return; } if (!regs.viewport_scale_offset_enabled) { - const auto x = static_cast(regs.surface_clip.x); - const auto y = static_cast(regs.surface_clip.y); - const auto width = static_cast(regs.surface_clip.width); - const auto height = static_cast(regs.surface_clip.height); + float x = static_cast(regs.surface_clip.x); + float y = static_cast(regs.surface_clip.y); + float width = std::max(1.0f, static_cast(regs.surface_clip.width)); + float height = std::max(1.0f, static_cast(regs.surface_clip.height)); + if (regs.window_origin.mode != Maxwell::WindowOrigin::Mode::UpperLeft) { + y += height; + height = -height; + } VkViewport viewport{ .x = x, .y = y, - .width = width != 0.0f ? width : 1.0f, - .height = height != 0.0f ? height : 1.0f, + .width = width, + .height = height, .minDepth = 0.0f, .maxDepth = 1.0f, }; - scheduler.Record([viewport](vk::CommandBuffer cmdbuf) { cmdbuf.SetViewport(0, viewport); }); + scheduler.Record([viewport](vk::CommandBuffer cmdbuf) { + cmdbuf.SetViewport(0, viewport); + }); return; } const bool is_rescaling{texture_cache.IsRescaling()}; @@ -1070,16 +1074,21 @@ void RasterizerVulkan::UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs return; } if (!regs.viewport_scale_offset_enabled) { - const auto x = static_cast(regs.surface_clip.x); - const auto y = static_cast(regs.surface_clip.y); - const auto width = static_cast(regs.surface_clip.width); - const auto height = static_cast(regs.surface_clip.height); - VkRect2D scissor; - scissor.offset.x = static_cast(x); - scissor.offset.y = static_cast(y); - scissor.extent.width = static_cast(width != 0.0f ? width : 1.0f); - scissor.extent.height = static_cast(height != 0.0f ? height : 1.0f); - scheduler.Record([scissor](vk::CommandBuffer cmdbuf) { cmdbuf.SetScissor(0, scissor); }); + u32 x = regs.surface_clip.x; + u32 y = regs.surface_clip.y; + u32 width = std::max(1u, static_cast(regs.surface_clip.width)); + u32 height = std::max(1u, static_cast(regs.surface_clip.height)); + if (regs.window_origin.mode != Maxwell::WindowOrigin::Mode::UpperLeft) { + y = regs.surface_clip.height - (y + height); + } + VkRect2D scissor{}; + scissor.offset.x = static_cast(x); + scissor.offset.y = static_cast(y); + scissor.extent.width = width; + scissor.extent.height = height; + scheduler.Record([scissor](vk::CommandBuffer cmdbuf) { + cmdbuf.SetScissor(0, scissor); + }); return; } u32 up_scale = 1; @@ -1607,7 +1616,7 @@ void RasterizerVulkan::UpdateVertexInput(Tegra::Engines::Maxwell3D::Regs& regs) highest_dirty_attr = index; } } - for (size_t index = 0; index < highest_dirty_attr; ++index) { + for (size_t index = 0; index <= highest_dirty_attr; ++index) { const Maxwell::VertexAttribute attribute{regs.vertex_attrib_format[index]}; const u32 binding{attribute.buffer}; dirty[Dirty::VertexAttribute0 + index] = false; From e820f304a521837250405203f1e7e33c2a059fc8 Mon Sep 17 00:00:00 2001 From: CamilleLaVey Date: Sat, 8 Nov 2025 01:09:05 -0400 Subject: [PATCH 03/34] Giving maintance to driver features and unused extensions --- .../renderer_vulkan/vk_swapchain.cpp | 36 +++++++++++++++- src/video_core/renderer_vulkan/vk_swapchain.h | 2 + src/video_core/vulkan_common/vulkan.h | 28 +++++++++++++ .../vulkan_common/vulkan_device.cpp | 42 +++++++++++++++++-- src/video_core/vulkan_common/vulkan_device.h | 17 ++++++++ 5 files changed, 121 insertions(+), 4 deletions(-) diff --git a/src/video_core/renderer_vulkan/vk_swapchain.cpp b/src/video_core/renderer_vulkan/vk_swapchain.cpp index fdd2de2379..cb2b3d7520 100644 --- a/src/video_core/renderer_vulkan/vk_swapchain.cpp +++ b/src/video_core/renderer_vulkan/vk_swapchain.cpp @@ -171,6 +171,10 @@ void Swapchain::Create( resource_ticks.clear(); resource_ticks.resize(image_count); + + // Initialize incremental-present probe flags for this swapchain. + incremental_present_usable = device.IsKhrIncrementalPresentSupported(); + incremental_present_probed = false; } bool Swapchain::AcquireNextImage() { @@ -202,7 +206,13 @@ bool Swapchain::AcquireNextImage() { void Swapchain::Present(VkSemaphore render_semaphore) { const auto present_queue{device.GetPresentQueue()}; - const VkPresentInfoKHR present_info{ + // If the device advertises VK_KHR_incremental_present, we attempt a one-time probe + // on the first present to validate the driver/compositor accepts present-region info. + VkPresentRegionsKHR present_regions{}; + VkPresentRegionKHR region{}; + VkRect2D rect{}; + + VkPresentInfoKHR present_info{ .sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR, .pNext = nullptr, .waitSemaphoreCount = render_semaphore ? 1U : 0U, @@ -212,6 +222,20 @@ void Swapchain::Present(VkSemaphore render_semaphore) { .pImageIndices = &image_index, .pResults = nullptr, }; + + if (incremental_present_usable && !incremental_present_probed) { + // Build a minimal present-region describing a single 1x1 dirty rect at (0,0). + rect.offset = {0, 0}; + rect.extent = {1, 1}; + region.rectangleCount = 1; + region.pRectangles = ▭ + present_regions.sType = VK_STRUCTURE_TYPE_PRESENT_REGIONS_KHR; + present_regions.pNext = nullptr; + present_regions.swapchainCount = 1; + present_regions.pRegions = ®ion; + + present_info.pNext = &present_regions; + } std::scoped_lock lock{scheduler.submit_mutex}; switch (const VkResult result = present_queue.Present(present_info)) { case VK_SUCCESS: @@ -227,8 +251,18 @@ void Swapchain::Present(VkSemaphore render_semaphore) { break; default: LOG_CRITICAL(Render_Vulkan, "Failed to present with error {}", string_VkResult(result)); + // If the first present with incremental-present pNext failed, disable future use. + if (incremental_present_usable && !incremental_present_probed) { + incremental_present_usable = false; + LOG_WARNING(Render_Vulkan, "Disabling VK_KHR_incremental_present for this swapchain due to present failure: {}", string_VkResult(result)); + } break; } + if (incremental_present_usable && !incremental_present_probed) { + // Mark probe as completed if we reached here (success or handled failure above). + incremental_present_probed = true; + LOG_INFO(Render_Vulkan, "VK_KHR_incremental_present probe completed: usable={}", incremental_present_usable); + } ++frame_index; if (frame_index >= image_count) { frame_index = 0; diff --git a/src/video_core/renderer_vulkan/vk_swapchain.h b/src/video_core/renderer_vulkan/vk_swapchain.h index b3e1c4f025..2c6055fba3 100644 --- a/src/video_core/renderer_vulkan/vk_swapchain.h +++ b/src/video_core/renderer_vulkan/vk_swapchain.h @@ -158,6 +158,8 @@ private: bool is_outdated{}; bool is_suboptimal{}; + bool incremental_present_usable{}; + bool incremental_present_probed{}; }; } // namespace Vulkan diff --git a/src/video_core/vulkan_common/vulkan.h b/src/video_core/vulkan_common/vulkan.h index 13f679ff54..7092243f7f 100644 --- a/src/video_core/vulkan_common/vulkan.h +++ b/src/video_core/vulkan_common/vulkan.h @@ -22,6 +22,34 @@ #include +#ifndef VK_KHR_MAINTENANCE_1_EXTENSION_NAME +# define VK_KHR_MAINTENANCE_1_EXTENSION_NAME "VK_KHR_maintenance1" +#endif +#ifndef VK_KHR_MAINTENANCE_2_EXTENSION_NAME +# define VK_KHR_MAINTENANCE_2_EXTENSION_NAME "VK_KHR_maintenance2" +#endif +#ifndef VK_KHR_MAINTENANCE_3_EXTENSION_NAME +# define VK_KHR_MAINTENANCE_3_EXTENSION_NAME "VK_KHR_maintenance3" +#endif +#ifndef VK_KHR_MAINTENANCE_4_EXTENSION_NAME +# define VK_KHR_MAINTENANCE_4_EXTENSION_NAME "VK_KHR_maintenance4" +#endif +#ifndef VK_KHR_MAINTENANCE_5_EXTENSION_NAME +# define VK_KHR_MAINTENANCE_5_EXTENSION_NAME "VK_KHR_maintenance5" +#endif +#ifndef VK_KHR_MAINTENANCE_6_EXTENSION_NAME +# define VK_KHR_MAINTENANCE_6_EXTENSION_NAME "VK_KHR_maintenance6" +#endif +#ifndef VK_KHR_MAINTENANCE_7_EXTENSION_NAME +# define VK_KHR_MAINTENANCE_7_EXTENSION_NAME "VK_KHR_maintenance7" +#endif +#ifndef VK_KHR_MAINTENANCE_8_EXTENSION_NAME +# define VK_KHR_MAINTENANCE_8_EXTENSION_NAME "VK_KHR_maintenance8" +#endif +#ifndef VK_KHR_MAINTENANCE_9_EXTENSION_NAME +# define VK_KHR_MAINTENANCE_9_EXTENSION_NAME "VK_KHR_maintenance9" +#endif + // Sanitize macros #undef CreateEvent #undef CreateSemaphore diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 81e60f1c6a..dd794af70f 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -416,7 +416,6 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR const bool is_suitable = GetSuitability(surface != nullptr); const VkDriverId driver_id = properties.driver.driverID; - const auto device_id = properties.properties.deviceID; const bool is_radv = driver_id == VK_DRIVER_ID_MESA_RADV; const bool is_amd_driver = driver_id == VK_DRIVER_ID_AMD_PROPRIETARY || driver_id == VK_DRIVER_ID_AMD_OPEN_SOURCE; @@ -681,9 +680,31 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR has_broken_compute = CheckBrokenCompute(properties.driver.driverID, properties.properties.driverVersion) && !Settings::values.enable_compute_pipelines.GetValue(); - if (is_intel_anv || (is_qualcomm && !is_s8gen2)) { - LOG_WARNING(Render_Vulkan, "Driver does not support native BGR format"); + must_emulate_bgr565 = false; // Default: assume emulation isn't required + + if (is_intel_anv) { + LOG_WARNING(Render_Vulkan, "Intel ANV driver does not support native BGR format"); must_emulate_bgr565 = true; + } else if (is_qualcomm) { + // Qualcomm driver version where VK_KHR_maintenance5 and A1B5G5R5 become reliable + constexpr uint32_t QUALCOMM_FIXED_DRIVER_VERSION = VK_MAKE_VERSION(512, 800, 1); + // Check if VK_KHR_maintenance5 is supported + if (extensions.maintenance5 && properties.properties.driverVersion >= QUALCOMM_FIXED_DRIVER_VERSION) { + LOG_INFO(Render_Vulkan, "Qualcomm driver supports VK_KHR_maintenance5, disabling BGR emulation"); + must_emulate_bgr565 = false; + } else { + LOG_WARNING(Render_Vulkan, "Qualcomm driver doesn't support native BGR, emulating formats"); + must_emulate_bgr565 = true; + } + } else if (is_turnip) { + // Mesa Turnip added support for maintenance5 in Mesa 25.0 + if (extensions.maintenance5) { + LOG_INFO(Render_Vulkan, "Turnip driver supports VK_KHR_maintenance5, disabling BGR emulation"); + must_emulate_bgr565 = false; + } else { + LOG_WARNING(Render_Vulkan, "Turnip driver doesn't support native BGR, emulating formats"); + must_emulate_bgr565 = true; + } } if (extensions.push_descriptor && is_intel_anv) { const u32 version = (properties.properties.driverVersion << 3) >> 3; @@ -1300,6 +1321,21 @@ void Device::RemoveUnsuitableExtensions() { RemoveExtensionFeatureIfUnsuitable(extensions.transform_feedback, features.transform_feedback, VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME); + // VK_EXT_robustness2 + extensions.robustness_2 = + features.robustness2.robustBufferAccess2 && features.robustness2.robustImageAccess2; + RemoveExtensionFeatureIfUnsuitable(extensions.robustness_2, features.robustness2, + VK_EXT_ROBUSTNESS_2_EXTENSION_NAME); + + // VK_EXT_image_robustness + extensions.image_robustness = features.image_robustness.robustImageAccess; + RemoveExtensionFeatureIfUnsuitable(extensions.image_robustness, features.image_robustness, + VK_EXT_IMAGE_ROBUSTNESS_EXTENSION_NAME); + + // VK_EXT_swapchain_maintenance1 + extensions.swapchain_maintenance1 = loaded_extensions.contains(VK_EXT_SWAPCHAIN_MAINTENANCE_1_EXTENSION_NAME); + RemoveExtensionIfUnsuitable(extensions.swapchain_maintenance1, VK_EXT_SWAPCHAIN_MAINTENANCE_1_EXTENSION_NAME); + // VK_EXT_vertex_input_dynamic_state extensions.vertex_input_dynamic_state = features.vertex_input_dynamic_state.vertexInputDynamicState; diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index cb13f28523..6f60741fea 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -52,6 +52,7 @@ VK_DEFINE_HANDLE(VmaAllocator) FEATURE(EXT, 4444Formats, 4444_FORMATS, format_a4b4g4r4) \ FEATURE(EXT, IndexTypeUint8, INDEX_TYPE_UINT8, index_type_uint8) \ FEATURE(EXT, LineRasterization, LINE_RASTERIZATION, line_rasterization) \ + FEATURE(EXT, ImageRobustness, IMAGE_ROBUSTNESS, image_robustness) \ FEATURE(EXT, PrimitiveTopologyListRestart, PRIMITIVE_TOPOLOGY_LIST_RESTART, \ primitive_topology_list_restart) \ FEATURE(EXT, ProvokingVertex, PROVOKING_VERTEX, provoking_vertex) \ @@ -82,7 +83,9 @@ VK_DEFINE_HANDLE(VmaAllocator) EXTENSION(KHR, SHADER_FLOAT_CONTROLS, shader_float_controls) \ EXTENSION(KHR, SPIRV_1_4, spirv_1_4) \ EXTENSION(KHR, SWAPCHAIN, swapchain) \ + EXTENSION(KHR, INCREMENTAL_PRESENT, incremental_present) \ EXTENSION(KHR, SWAPCHAIN_MUTABLE_FORMAT, swapchain_mutable_format) \ + EXTENSION(EXT, SWAPCHAIN_MAINTENANCE_1, swapchain_maintenance1) \ EXTENSION(KHR, IMAGE_FORMAT_LIST, image_format_list) \ EXTENSION(NV, DEVICE_DIAGNOSTICS_CONFIG, device_diagnostics_config) \ EXTENSION(NV, GEOMETRY_SHADER_PASSTHROUGH, geometry_shader_passthrough) \ @@ -91,6 +94,15 @@ VK_DEFINE_HANDLE(VmaAllocator) EXTENSION(EXT, DESCRIPTOR_INDEXING, descriptor_indexing) \ EXTENSION(EXT, FILTER_CUBIC, filter_cubic) \ EXTENSION(QCOM, FILTER_CUBIC_WEIGHTS, filter_cubic_weights) + EXTENSION(KHR, MAINTENANCE_1, maintenance1) \ + EXTENSION(KHR, MAINTENANCE_2, maintenance2) \ + EXTENSION(KHR, MAINTENANCE_3, maintenance3) \ + EXTENSION(KHR, MAINTENANCE_4, maintenance4) \ + EXTENSION(KHR, MAINTENANCE_5, maintenance5) \ + EXTENSION(KHR, MAINTENANCE_6, maintenance6) \ + EXTENSION(KHR, MAINTENANCE_7, maintenance7) \ + EXTENSION(KHR, MAINTENANCE_8, maintenance8) \ + EXTENSION(KHR, MAINTENANCE_9, maintenance9) \ // Define extensions which must be supported. #define FOR_EACH_VK_MANDATORY_EXTENSION(EXTENSION_NAME) \ @@ -455,6 +467,11 @@ public: return extensions.image_format_list || instance_version >= VK_API_VERSION_1_2; } + /// Returns true if the device supports VK_KHR_incremental_present. + bool IsKhrIncrementalPresentSupported() const { + return extensions.incremental_present; + } + /// Returns true if the device supports VK_EXT_primitive_topology_list_restart. bool IsTopologyListPrimitiveRestartSupported() const { return features.primitive_topology_list_restart.primitiveTopologyListRestart; From 5f501d6ec0f2e1b2f444b8dff9998a3d4efbac8b Mon Sep 17 00:00:00 2001 From: CamilleLaVey Date: Sat, 8 Nov 2025 01:28:35 -0400 Subject: [PATCH 04/34] attempt to fix building issues --- src/video_core/renderer_vulkan/present/fsr.cpp | 3 +++ src/video_core/renderer_vulkan/present/fxaa.cpp | 3 +++ src/video_core/renderer_vulkan/present/layer.cpp | 1 + src/video_core/renderer_vulkan/present/util.cpp | 1 + src/video_core/renderer_vulkan/vk_swapchain.h | 3 +++ 5 files changed, 11 insertions(+) diff --git a/src/video_core/renderer_vulkan/present/fsr.cpp b/src/video_core/renderer_vulkan/present/fsr.cpp index 3f708be704..8422a00204 100644 --- a/src/video_core/renderer_vulkan/present/fsr.cpp +++ b/src/video_core/renderer_vulkan/present/fsr.cpp @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + // SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later diff --git a/src/video_core/renderer_vulkan/present/fxaa.cpp b/src/video_core/renderer_vulkan/present/fxaa.cpp index bdafd1f4d0..d53fd29b9e 100644 --- a/src/video_core/renderer_vulkan/present/fxaa.cpp +++ b/src/video_core/renderer_vulkan/present/fxaa.cpp @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + // SPDX-FileCopyrightText: Copyright 2024 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later diff --git a/src/video_core/renderer_vulkan/present/layer.cpp b/src/video_core/renderer_vulkan/present/layer.cpp index 5676dfe62a..a3bb5739cf 100644 --- a/src/video_core/renderer_vulkan/present/layer.cpp +++ b/src/video_core/renderer_vulkan/present/layer.cpp @@ -17,6 +17,7 @@ #include "video_core/renderer_vulkan/present/util.h" #include "video_core/renderer_vulkan/vk_blit_screen.h" #include "video_core/textures/decoders.h" +#include "video_core/vulkan_common/vulkan_device.h" namespace Vulkan { diff --git a/src/video_core/renderer_vulkan/present/util.cpp b/src/video_core/renderer_vulkan/present/util.cpp index 29a1c34976..76ccc6c0e0 100644 --- a/src/video_core/renderer_vulkan/present/util.cpp +++ b/src/video_core/renderer_vulkan/present/util.cpp @@ -7,6 +7,7 @@ #include "common/assert.h" #include #include "video_core/renderer_vulkan/present/util.h" +#include "video_core/vulkan_common/vulkan_device.h" namespace Vulkan { diff --git a/src/video_core/renderer_vulkan/vk_swapchain.h b/src/video_core/renderer_vulkan/vk_swapchain.h index 2c6055fba3..2d619959b0 100644 --- a/src/video_core/renderer_vulkan/vk_swapchain.h +++ b/src/video_core/renderer_vulkan/vk_swapchain.h @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + // SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later From b5f7735dba24e3f1321d7cd8acdbf61957360c54 Mon Sep 17 00:00:00 2001 From: crueter Date: Sat, 8 Nov 2025 12:41:15 -0500 Subject: [PATCH 05/34] build Signed-off-by: crueter --- src/CMakeLists.txt | 2 +- .../renderer_vulkan/present/fsr.cpp | 3 --- .../renderer_vulkan/present/fxaa.cpp | 3 --- .../renderer_vulkan/present/layer.cpp | 1 - .../renderer_vulkan/present/util.cpp | 2 -- .../renderer_vulkan/vk_swapchain.cpp | 8 +++---- .../vulkan_common/vulkan_device.cpp | 4 +++- src/video_core/vulkan_common/vulkan_device.h | 22 +++++++++---------- 8 files changed, 19 insertions(+), 26 deletions(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 2510458812..5387de6191 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -121,7 +121,7 @@ else() -Werror=unused -Wno-attributes - -Wno-invalid-offsetof + $<$:-Wno-invalid-offsetof> -Wno-unused-parameter -Wno-missing-field-initializers ) diff --git a/src/video_core/renderer_vulkan/present/fsr.cpp b/src/video_core/renderer_vulkan/present/fsr.cpp index 8422a00204..3f708be704 100644 --- a/src/video_core/renderer_vulkan/present/fsr.cpp +++ b/src/video_core/renderer_vulkan/present/fsr.cpp @@ -1,6 +1,3 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project -// SPDX-License-Identifier: GPL-3.0-or-later - // SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later diff --git a/src/video_core/renderer_vulkan/present/fxaa.cpp b/src/video_core/renderer_vulkan/present/fxaa.cpp index d53fd29b9e..bdafd1f4d0 100644 --- a/src/video_core/renderer_vulkan/present/fxaa.cpp +++ b/src/video_core/renderer_vulkan/present/fxaa.cpp @@ -1,6 +1,3 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project -// SPDX-License-Identifier: GPL-3.0-or-later - // SPDX-FileCopyrightText: Copyright 2024 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later diff --git a/src/video_core/renderer_vulkan/present/layer.cpp b/src/video_core/renderer_vulkan/present/layer.cpp index a3bb5739cf..5676dfe62a 100644 --- a/src/video_core/renderer_vulkan/present/layer.cpp +++ b/src/video_core/renderer_vulkan/present/layer.cpp @@ -17,7 +17,6 @@ #include "video_core/renderer_vulkan/present/util.h" #include "video_core/renderer_vulkan/vk_blit_screen.h" #include "video_core/textures/decoders.h" -#include "video_core/vulkan_common/vulkan_device.h" namespace Vulkan { diff --git a/src/video_core/renderer_vulkan/present/util.cpp b/src/video_core/renderer_vulkan/present/util.cpp index 76ccc6c0e0..148e99b477 100644 --- a/src/video_core/renderer_vulkan/present/util.cpp +++ b/src/video_core/renderer_vulkan/present/util.cpp @@ -5,9 +5,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include "common/assert.h" -#include #include "video_core/renderer_vulkan/present/util.h" -#include "video_core/vulkan_common/vulkan_device.h" namespace Vulkan { diff --git a/src/video_core/renderer_vulkan/vk_swapchain.cpp b/src/video_core/renderer_vulkan/vk_swapchain.cpp index cb2b3d7520..d3fd0c340b 100644 --- a/src/video_core/renderer_vulkan/vk_swapchain.cpp +++ b/src/video_core/renderer_vulkan/vk_swapchain.cpp @@ -210,7 +210,7 @@ void Swapchain::Present(VkSemaphore render_semaphore) { // on the first present to validate the driver/compositor accepts present-region info. VkPresentRegionsKHR present_regions{}; VkPresentRegionKHR region{}; - VkRect2D rect{}; + VkRectLayerKHR layer{}; VkPresentInfoKHR present_info{ .sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR, @@ -225,10 +225,10 @@ void Swapchain::Present(VkSemaphore render_semaphore) { if (incremental_present_usable && !incremental_present_probed) { // Build a minimal present-region describing a single 1x1 dirty rect at (0,0). - rect.offset = {0, 0}; - rect.extent = {1, 1}; + layer.offset = {0, 0}; + layer.extent = {1, 1}; region.rectangleCount = 1; - region.pRectangles = ▭ + region.pRectangles = &layer; present_regions.sType = VK_STRUCTURE_TYPE_PRESENT_REGIONS_KHR; present_regions.pNext = nullptr; present_regions.swapchainCount = 1; diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index dd794af70f..1f70093379 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -416,6 +416,9 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR const bool is_suitable = GetSuitability(surface != nullptr); const VkDriverId driver_id = properties.driver.driverID; + // uncomment this if you want per-device overrides :P + // const u32 device_id = properties.properties.deviceID; + const bool is_radv = driver_id == VK_DRIVER_ID_MESA_RADV; const bool is_amd_driver = driver_id == VK_DRIVER_ID_AMD_PROPRIETARY || driver_id == VK_DRIVER_ID_AMD_OPEN_SOURCE; @@ -426,7 +429,6 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR const bool is_mvk = driver_id == VK_DRIVER_ID_MOLTENVK; const bool is_qualcomm = driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY; const bool is_turnip = driver_id == VK_DRIVER_ID_MESA_TURNIP; - const bool is_s8gen2 = device_id == 0x43050a01; const bool is_arm = driver_id == VK_DRIVER_ID_ARM_PROPRIETARY; if ((is_mvk || is_qualcomm || is_turnip || is_arm) && !is_suitable) { diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index 6f60741fea..71b431c9fc 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -83,7 +83,7 @@ VK_DEFINE_HANDLE(VmaAllocator) EXTENSION(KHR, SHADER_FLOAT_CONTROLS, shader_float_controls) \ EXTENSION(KHR, SPIRV_1_4, spirv_1_4) \ EXTENSION(KHR, SWAPCHAIN, swapchain) \ - EXTENSION(KHR, INCREMENTAL_PRESENT, incremental_present) \ + EXTENSION(KHR, INCREMENTAL_PRESENT, incremental_present) \ EXTENSION(KHR, SWAPCHAIN_MUTABLE_FORMAT, swapchain_mutable_format) \ EXTENSION(EXT, SWAPCHAIN_MAINTENANCE_1, swapchain_maintenance1) \ EXTENSION(KHR, IMAGE_FORMAT_LIST, image_format_list) \ @@ -93,16 +93,16 @@ VK_DEFINE_HANDLE(VmaAllocator) EXTENSION(NV, VIEWPORT_SWIZZLE, viewport_swizzle) \ EXTENSION(EXT, DESCRIPTOR_INDEXING, descriptor_indexing) \ EXTENSION(EXT, FILTER_CUBIC, filter_cubic) \ - EXTENSION(QCOM, FILTER_CUBIC_WEIGHTS, filter_cubic_weights) - EXTENSION(KHR, MAINTENANCE_1, maintenance1) \ - EXTENSION(KHR, MAINTENANCE_2, maintenance2) \ - EXTENSION(KHR, MAINTENANCE_3, maintenance3) \ - EXTENSION(KHR, MAINTENANCE_4, maintenance4) \ - EXTENSION(KHR, MAINTENANCE_5, maintenance5) \ - EXTENSION(KHR, MAINTENANCE_6, maintenance6) \ - EXTENSION(KHR, MAINTENANCE_7, maintenance7) \ - EXTENSION(KHR, MAINTENANCE_8, maintenance8) \ - EXTENSION(KHR, MAINTENANCE_9, maintenance9) \ + EXTENSION(QCOM, FILTER_CUBIC_WEIGHTS, filter_cubic_weights) \ + EXTENSION(KHR, MAINTENANCE_1, maintenance1) \ + EXTENSION(KHR, MAINTENANCE_2, maintenance2) \ + EXTENSION(KHR, MAINTENANCE_3, maintenance3) \ + EXTENSION(KHR, MAINTENANCE_4, maintenance4) \ + EXTENSION(KHR, MAINTENANCE_5, maintenance5) \ + EXTENSION(KHR, MAINTENANCE_6, maintenance6) \ + EXTENSION(KHR, MAINTENANCE_7, maintenance7) \ + EXTENSION(KHR, MAINTENANCE_8, maintenance8) \ + EXTENSION(KHR, MAINTENANCE_9, maintenance9) // Define extensions which must be supported. #define FOR_EACH_VK_MANDATORY_EXTENSION(EXTENSION_NAME) \ From ec9e0f37ea81a57794833a61dbd939d3d04a90e0 Mon Sep 17 00:00:00 2001 From: CamilleLaVey Date: Sat, 8 Nov 2025 16:13:24 -0400 Subject: [PATCH 06/34] Implement handling for texture cache flickering --- src/video_core/texture_cache/texture_cache.h | 27 +++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 2a44a5e8b2..3926fd87ac 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -1491,7 +1491,32 @@ ImageId TextureCache

::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, DA for (const ImageId overlap_id : join_ignore_textures) { Image& overlap = slot_images[overlap_id]; if (True(overlap.flags & ImageFlagBits::GpuModified)) { - UNIMPLEMENTED(); + // Merge GPU-modified contents from the overlapping image into the newly + // created image to preserve guest-visible data. Compute shrink/scale + // copies and dispatch a GPU-side copy. This mirrors the behavior used + // for overlaps handled in join_copies_to_do above. + new_image.flags |= ImageFlagBits::GpuModified; + const auto& resolution = Settings::values.resolution_info; + const auto base_opt = new_image.TryFindBase(overlap.gpu_addr); + if (base_opt) { + const SubresourceBase base = base_opt.value(); + const u32 up_scale = can_rescale ? resolution.up_scale : 1; + const u32 down_shift = can_rescale ? resolution.down_shift : 0; + auto copies = MakeShrinkImageCopies(new_info, overlap.info, base, up_scale, down_shift); + if (overlap.info.num_samples != new_image.info.num_samples) { + runtime.CopyImageMSAA(new_image, overlap, FixSmallVectorADL(copies)); + } else { + runtime.CopyImage(new_image, overlap, FixSmallVectorADL(copies)); + } + new_image.modification_tick = overlap.modification_tick; + } else { + // If we cannot determine a base mapping, fallback to preserving the + // overlap (avoid deleting GPU-modified data) and log the event so + // it can be investigated, we're trying to pinpoint the issue of texture flickering. + LOG_WARNING(HW_GPU, "Could not map overlap gpu_addr {:#x} into new image; preserving overlap {}", + overlap.gpu_addr, overlap_id); + continue; + } } if (True(overlap.flags & ImageFlagBits::Tracked)) { UntrackImage(overlap, overlap_id); From 4f3e4bf9cb7b5ec52f25fc42323cbeda32e22d57 Mon Sep 17 00:00:00 2001 From: lizzie Date: Sat, 8 Nov 2025 22:48:36 +0000 Subject: [PATCH 07/34] fix --- src/video_core/texture_cache/texture_cache.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 3926fd87ac..8ac025f1df 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -1513,8 +1513,7 @@ ImageId TextureCache

::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, DA // If we cannot determine a base mapping, fallback to preserving the // overlap (avoid deleting GPU-modified data) and log the event so // it can be investigated, we're trying to pinpoint the issue of texture flickering. - LOG_WARNING(HW_GPU, "Could not map overlap gpu_addr {:#x} into new image; preserving overlap {}", - overlap.gpu_addr, overlap_id); + LOG_WARNING(HW_GPU, "Could not map overlap gpu_addr {:#x} into new image; preserving overlap", u64(overlap.gpu_addr)); continue; } } From 8133d4a8b4eba3759e158a89dce582539d8d8ad0 Mon Sep 17 00:00:00 2001 From: CamilleLaVey Date: Sun, 9 Nov 2025 01:22:15 -0400 Subject: [PATCH 08/34] Improved handling for Custom Border Color buggy impl on ARM/ QCOM and Turnip --- .../vulkan_common/vulkan_device.cpp | 31 ++++++++++++++++--- 1 file changed, 26 insertions(+), 5 deletions(-) diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 1f70093379..5d3fabbd43 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -495,11 +495,23 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR CollectPhysicalMemoryInfo(); CollectToolingInfo(); - if (is_qualcomm || is_turnip) { - LOG_WARNING(Render_Vulkan, - "Qualcomm and Turnip drivers have broken VK_EXT_custom_border_color"); - //RemoveExtensionFeature(extensions.custom_border_color, features.custom_border_color, - //VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME); + // Driver-specific handling for VK_EXT_custom_border_color + // On some Qualcomm/Turnip/ARM drivers the extension may be partially implemented. + // Enable it if ANY useful feature bit is reported; otherwise, let the removal pass drop it. + if (is_qualcomm || is_turnip || is_arm) { + const bool has_any_custom_border_color = + features.custom_border_color.customBorderColors || + features.custom_border_color.customBorderColorWithoutFormat; + if (!has_any_custom_border_color) { + LOG_WARNING(Render_Vulkan, + "Disabling VK_EXT_custom_border_color on '{}' — no usable custom border color features reported", + properties.driver.driverName); + // Do not clear here; final removal happens in RemoveUnsuitableExtensions based on bits. + } else { + LOG_INFO(Render_Vulkan, + "Partial VK_EXT_custom_border_color support detected on '{}' — enabling available features", + properties.driver.driverName); + } } if (is_qualcomm) { @@ -707,6 +719,15 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR LOG_WARNING(Render_Vulkan, "Turnip driver doesn't support native BGR, emulating formats"); must_emulate_bgr565 = true; } + } else if (is_arm) { + // ARM Mali: stop emulating BGR5 formats when VK_KHR_maintenance5 is available + if (extensions.maintenance5) { + LOG_INFO(Render_Vulkan, "ARM driver supports VK_KHR_maintenance5, disabling BGR emulation"); + must_emulate_bgr565 = false; + } else { + LOG_WARNING(Render_Vulkan, "ARM driver doesn't support native BGR, emulating formats"); + must_emulate_bgr565 = true; + } } if (extensions.push_descriptor && is_intel_anv) { const u32 version = (properties.properties.driverVersion << 3) >> 3; From ec274a855eadcff04c2846012c5ceb774e4780b8 Mon Sep 17 00:00:00 2001 From: CamilleLaVey Date: Sun, 9 Nov 2025 02:23:53 -0400 Subject: [PATCH 09/34] TEST: Enabling TimelineSemaphores for QCOM and Turnip --- src/video_core/vulkan_common/vulkan_device.cpp | 7 ------- 1 file changed, 7 deletions(-) diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 5d3fabbd43..5d59fdbc4d 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -1017,13 +1017,6 @@ bool Device::ShouldBoostClocks() const { } bool Device::HasTimelineSemaphore() const { - if (GetDriverID() == VK_DRIVER_ID_QUALCOMM_PROPRIETARY || - GetDriverID() == VK_DRIVER_ID_MESA_TURNIP) { - // Timeline semaphores do not work properly on all Qualcomm drivers. - // They generally work properly with Turnip drivers, but are problematic on some devices - // (e.g. ZTE handsets with Snapdragon 870). - return false; - } return features.timeline_semaphore.timelineSemaphore; } From d25da944eddd8a41ceb49b1a199a2c0a9151f325 Mon Sep 17 00:00:00 2001 From: CamilleLaVey Date: Sun, 9 Nov 2025 16:18:01 -0400 Subject: [PATCH 10/34] Changing checks in HostMemor for virtual memory mapping --- src/common/host_memory.cpp | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/src/common/host_memory.cpp b/src/common/host_memory.cpp index 5400b97018..ff7859a1f7 100644 --- a/src/common/host_memory.cpp +++ b/src/common/host_memory.cpp @@ -730,7 +730,9 @@ void HostMemory::Map(size_t virtual_offset, size_t host_offset, size_t length, ASSERT(virtual_offset % PageAlignment == 0); ASSERT(host_offset % PageAlignment == 0); ASSERT(length % PageAlignment == 0); - ASSERT(virtual_offset + length <= virtual_size); + if (impl && virtual_base) { + ASSERT(virtual_offset + length <= virtual_size); + } ASSERT(host_offset + length <= backing_size); if (length == 0 || !virtual_base || !impl) { return; @@ -741,7 +743,9 @@ void HostMemory::Map(size_t virtual_offset, size_t host_offset, size_t length, void HostMemory::Unmap(size_t virtual_offset, size_t length, bool separate_heap) { ASSERT(virtual_offset % PageAlignment == 0); ASSERT(length % PageAlignment == 0); - ASSERT(virtual_offset + length <= virtual_size); + if (impl && virtual_base) { + ASSERT(virtual_offset + length <= virtual_size); + } if (length == 0 || !virtual_base || !impl) { return; } @@ -751,7 +755,9 @@ void HostMemory::Unmap(size_t virtual_offset, size_t length, bool separate_heap) void HostMemory::Protect(size_t virtual_offset, size_t length, MemoryPermission perm) { ASSERT(virtual_offset % PageAlignment == 0); ASSERT(length % PageAlignment == 0); - ASSERT(virtual_offset + length <= virtual_size); + if (impl && virtual_base) { + ASSERT(virtual_offset + length <= virtual_size); + } if (length == 0 || !virtual_base || !impl) { return; } From 5f88deeebfd8c5d5b78744c96e92482ae93702d8 Mon Sep 17 00:00:00 2001 From: CamilleLaVey Date: Sun, 9 Nov 2025 19:54:03 -0400 Subject: [PATCH 11/34] [gl. vk] Extending impl for atomic floats operations --- .../backend/glsl/glsl_emit_context.cpp | 7 ++-- src/shader_recompiler/profile.h | 3 ++ src/video_core/renderer_opengl/gl_device.cpp | 3 ++ src/video_core/renderer_opengl/gl_device.h | 15 ++++++++ .../renderer_opengl/gl_shader_cache.cpp | 3 ++ .../vulkan_common/vulkan_device.cpp | 37 +++++++++++++++++++ src/video_core/vulkan_common/vulkan_device.h | 6 +++ 7 files changed, 71 insertions(+), 3 deletions(-) diff --git a/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp b/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp index 2bf7f4de13..579b6ceeeb 100644 --- a/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp @@ -380,13 +380,14 @@ void EmitContext::SetupExtensions() { if (info.uses_int64 && profile.support_int64) { header += "#extension GL_ARB_gpu_shader_int64 : enable\n"; } - if (info.uses_int64_bit_atomics) { + if (info.uses_int64_bit_atomics && profile.support_gl_shader_atomic_int64) { header += "#extension GL_NV_shader_atomic_int64 : enable\n"; } - if (info.uses_atomic_f32_add) { + if (info.uses_atomic_f32_add && profile.support_gl_shader_atomic_float) { header += "#extension GL_NV_shader_atomic_float : enable\n"; } - if (info.uses_atomic_f16x2_add || info.uses_atomic_f16x2_min || info.uses_atomic_f16x2_max) { + if ((info.uses_atomic_f16x2_add || info.uses_atomic_f16x2_min || info.uses_atomic_f16x2_max) && + profile.support_gl_shader_atomic_fp16_vector) { header += "#extension GL_NV_shader_atomic_fp16_vector : enable\n"; } if (info.uses_fp16) { diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index 90e46bb1ba..c6851959f0 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -38,6 +38,9 @@ struct Profile { bool support_gl_nv_gpu_shader_5{}; bool support_gl_amd_gpu_shader_half_float{}; bool support_gl_texture_shadow_lod{}; + bool support_gl_shader_atomic_float{}; + bool support_gl_shader_atomic_fp16_vector{}; + bool support_gl_shader_atomic_int64{}; bool support_gl_warp_intrinsics{}; bool support_gl_variable_aoffi{}; bool support_gl_sparse_textures{}; diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index f5bf995d00..131808c25a 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -225,6 +225,9 @@ Device::Device(Core::Frontend::EmuWindow& emu_window) { has_amd_shader_half_float = GLAD_GL_AMD_gpu_shader_half_float; has_sparse_texture_2 = GLAD_GL_ARB_sparse_texture2; has_draw_texture = GLAD_GL_NV_draw_texture; + has_shader_atomic_float = GLAD_GL_NV_shader_atomic_float; + has_shader_atomic_fp16_vector = GLAD_GL_NV_shader_atomic_fp16_vector; + has_shader_atomic_int64 = GLAD_GL_NV_shader_atomic_int64; warp_size_potentially_larger_than_guest = !is_nvidia && !is_intel; need_fastmath_off = is_nvidia; can_report_memory = GLAD_GL_NVX_gpu_memory_info; diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index a5a6bbbba7..a25daba8eb 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -152,6 +152,18 @@ public: return has_draw_texture; } + bool HasShaderAtomicFloat() const { + return has_shader_atomic_float; + } + + bool HasShaderAtomicFp16Vector() const { + return has_shader_atomic_fp16_vector; + } + + bool HasShaderAtomicInt64() const { + return has_shader_atomic_int64; + } + bool IsWarpSizePotentiallyLargerThanGuest() const { return warp_size_potentially_larger_than_guest; } @@ -235,6 +247,9 @@ private: bool has_amd_shader_half_float{}; bool has_sparse_texture_2{}; bool has_draw_texture{}; + bool has_shader_atomic_float{}; + bool has_shader_atomic_fp16_vector{}; + bool has_shader_atomic_int64{}; bool warp_size_potentially_larger_than_guest{}; bool need_fastmath_off{}; bool has_cbuf_ftou_bug{}; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 45f729698e..881c906b79 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -215,6 +215,9 @@ ShaderCache::ShaderCache(Tegra::MaxwellDeviceMemoryManager& device_memory_, .support_gl_nv_gpu_shader_5 = device.HasNvGpuShader5(), .support_gl_amd_gpu_shader_half_float = device.HasAmdShaderHalfFloat(), .support_gl_texture_shadow_lod = device.HasTextureShadowLod(), + .support_gl_shader_atomic_float = device.HasShaderAtomicFloat(), + .support_gl_shader_atomic_fp16_vector = device.HasShaderAtomicFp16Vector(), + .support_gl_shader_atomic_int64 = device.HasShaderAtomicInt64(), .support_gl_warp_intrinsics = false, .support_gl_variable_aoffi = device.HasVariableAoffi(), .support_gl_sparse_textures = device.HasSparseTexture2(), diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 5d59fdbc4d..44cd114bd3 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -1304,6 +1304,43 @@ void Device::RemoveUnsuitableExtensions() { VK_EXT_PROVOKING_VERTEX_EXTENSION_NAME); } + // VK_KHR_shader_float16_int8 + const bool float16_int8_requested = extensions.shader_float16_int8; + const bool float16_int8_usable = + features.shader_float16_int8.shaderFloat16 || features.shader_float16_int8.shaderInt8; + if (float16_int8_requested && !float16_int8_usable) { + LOG_WARNING(Render_Vulkan, + "Disabling VK_KHR_shader_float16_int8 — no shaderFloat16/shaderInt8 features reported"); + } + extensions.shader_float16_int8 = float16_int8_requested && float16_int8_usable; + RemoveExtensionFeatureIfUnsuitable(float16_int8_usable, features.shader_float16_int8, + VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME); + + // VK_EXT_shader_atomic_float + const bool atomic_float_requested = extensions.shader_atomic_float; + const auto& atomic_float_features = features.shader_atomic_float; + const bool supports_buffer_f32 = atomic_float_features.shaderBufferFloat32Atomics || + atomic_float_features.shaderBufferFloat32AtomicAdd; + const bool supports_shared_f32 = atomic_float_features.shaderSharedFloat32Atomics || + atomic_float_features.shaderSharedFloat32AtomicAdd; + const bool supports_image_f32 = atomic_float_features.shaderImageFloat32Atomics || + atomic_float_features.shaderImageFloat32AtomicAdd; + const bool supports_sparse_f32 = atomic_float_features.sparseImageFloat32Atomics || + atomic_float_features.sparseImageFloat32AtomicAdd; + const bool supports_buffer_f64 = atomic_float_features.shaderBufferFloat64Atomics || + atomic_float_features.shaderBufferFloat64AtomicAdd; + const bool supports_shared_f64 = atomic_float_features.shaderSharedFloat64Atomics || + atomic_float_features.shaderSharedFloat64AtomicAdd; + const bool atomic_float_usable = supports_buffer_f32 || supports_shared_f32 || supports_image_f32 || + supports_sparse_f32 || supports_buffer_f64 || supports_shared_f64; + if (atomic_float_requested && !atomic_float_usable) { + LOG_WARNING(Render_Vulkan, + "Disabling VK_EXT_shader_atomic_float — no usable atomic float feature bits reported"); + } + extensions.shader_atomic_float = atomic_float_requested && atomic_float_usable; + RemoveExtensionFeatureIfUnsuitable(atomic_float_usable, features.shader_atomic_float, + VK_EXT_SHADER_ATOMIC_FLOAT_EXTENSION_NAME); + // VK_KHR_shader_atomic_int64 extensions.shader_atomic_int64 = features.shader_atomic_int64.shaderBufferInt64Atomics && features.shader_atomic_int64.shaderSharedInt64Atomics; diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index 71b431c9fc..a6f7d5fb07 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -49,6 +49,7 @@ VK_DEFINE_HANDLE(VmaAllocator) FEATURE(EXT, ExtendedDynamicState, EXTENDED_DYNAMIC_STATE, extended_dynamic_state) \ FEATURE(EXT, ExtendedDynamicState2, EXTENDED_DYNAMIC_STATE_2, extended_dynamic_state2) \ FEATURE(EXT, ExtendedDynamicState3, EXTENDED_DYNAMIC_STATE_3, extended_dynamic_state3) \ + FEATURE(EXT, ShaderAtomicFloat, SHADER_ATOMIC_FLOAT, shader_atomic_float) \ FEATURE(EXT, 4444Formats, 4444_FORMATS, format_a4b4g4r4) \ FEATURE(EXT, IndexTypeUint8, INDEX_TYPE_UINT8, index_type_uint8) \ FEATURE(EXT, LineRasterization, LINE_RASTERIZATION, line_rasterization) \ @@ -611,6 +612,11 @@ public: return extensions.shader_atomic_int64; } + /// Returns true if the device supports VK_EXT_shader_atomic_float. + bool IsExtShaderAtomicFloatSupported() const { + return extensions.shader_atomic_float; + } + bool IsExtConditionalRendering() const { return extensions.conditional_rendering; } From b9954de1ca80d4610ffe896c711cd404db480103 Mon Sep 17 00:00:00 2001 From: CamilleLaVey Date: Sun, 9 Nov 2025 19:58:57 -0400 Subject: [PATCH 12/34] Fixing missing headers --- src/shader_recompiler/profile.h | 3 +++ src/video_core/renderer_opengl/gl_device.h | 3 +++ 2 files changed, 6 insertions(+) diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index c6851959f0..48db64e162 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + // SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index a25daba8eb..96a84bb874 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + // SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later From 033531509bc3465f743527af3a8cd59f3b3b0c70 Mon Sep 17 00:00:00 2001 From: CamilleLaVey Date: Sun, 9 Nov 2025 23:14:51 -0400 Subject: [PATCH 13/34] [shader_recompiler, spir-v] Adding INT64 emulation path --- .../backend/spirv/emit_spirv_memory.cpp | 12 +- .../backend/spirv/spirv_emit_context.cpp | 195 ++++++++++++++++-- .../backend/spirv/spirv_emit_context.h | 9 + 3 files changed, 192 insertions(+), 24 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp index bdcbccfde9..0ac7086995 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp @@ -92,7 +92,7 @@ void EmitLoadGlobalS16(EmitContext&) { } Id EmitLoadGlobal32(EmitContext& ctx, Id address) { - if (ctx.profile.support_int64) { + if (ctx.SupportsNativeInt64() || ctx.UsesInt64Emulation()) { return ctx.OpFunctionCall(ctx.U32[1], ctx.load_global_func_u32, address); } LOG_WARNING(Shader_SPIRV, "Int64 not supported, ignoring memory operation"); @@ -100,7 +100,7 @@ Id EmitLoadGlobal32(EmitContext& ctx, Id address) { } Id EmitLoadGlobal64(EmitContext& ctx, Id address) { - if (ctx.profile.support_int64) { + if (ctx.SupportsNativeInt64() || ctx.UsesInt64Emulation()) { return ctx.OpFunctionCall(ctx.U32[2], ctx.load_global_func_u32x2, address); } LOG_WARNING(Shader_SPIRV, "Int64 not supported, ignoring memory operation"); @@ -108,7 +108,7 @@ Id EmitLoadGlobal64(EmitContext& ctx, Id address) { } Id EmitLoadGlobal128(EmitContext& ctx, Id address) { - if (ctx.profile.support_int64) { + if (ctx.SupportsNativeInt64() || ctx.UsesInt64Emulation()) { return ctx.OpFunctionCall(ctx.U32[4], ctx.load_global_func_u32x4, address); } LOG_WARNING(Shader_SPIRV, "Int64 not supported, ignoring memory operation"); @@ -132,7 +132,7 @@ void EmitWriteGlobalS16(EmitContext&) { } void EmitWriteGlobal32(EmitContext& ctx, Id address, Id value) { - if (ctx.profile.support_int64) { + if (ctx.SupportsNativeInt64() || ctx.UsesInt64Emulation()) { ctx.OpFunctionCall(ctx.void_id, ctx.write_global_func_u32, address, value); return; } @@ -140,7 +140,7 @@ void EmitWriteGlobal32(EmitContext& ctx, Id address, Id value) { } void EmitWriteGlobal64(EmitContext& ctx, Id address, Id value) { - if (ctx.profile.support_int64) { + if (ctx.SupportsNativeInt64() || ctx.UsesInt64Emulation()) { ctx.OpFunctionCall(ctx.void_id, ctx.write_global_func_u32x2, address, value); return; } @@ -148,7 +148,7 @@ void EmitWriteGlobal64(EmitContext& ctx, Id address, Id value) { } void EmitWriteGlobal128(EmitContext& ctx, Id address, Id value) { - if (ctx.profile.support_int64) { + if (ctx.SupportsNativeInt64() || ctx.UsesInt64Emulation()) { ctx.OpFunctionCall(ctx.void_id, ctx.write_global_func_u32x4, address, value); return; } diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index 4c3e101433..c4b72b5888 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -460,9 +460,10 @@ void VectorTypes::Define(Sirit::Module& sirit_ctx, Id base_type, std::string_vie EmitContext::EmitContext(const Profile& profile_, const RuntimeInfo& runtime_info_, IR::Program& program, Bindings& bindings) - : Sirit::Module(profile_.supported_spirv), profile{profile_}, runtime_info{runtime_info_}, - stage{program.stage}, texture_rescaling_index{bindings.texture_scaling_index}, - image_rescaling_index{bindings.image_scaling_index} { + : Sirit::Module(profile_.supported_spirv), profile{profile_}, runtime_info{runtime_info_}, + stage{program.stage}, emulate_int64{program.info.uses_int64 && !profile.support_int64}, + texture_rescaling_index{bindings.texture_scaling_index}, + image_rescaling_index{bindings.image_scaling_index} { const bool is_unified{profile.unified_descriptor_binding}; u32& uniform_binding{is_unified ? bindings.unified : bindings.uniform_buffer}; u32& storage_binding{is_unified ? bindings.unified : bindings.storage_buffer}; @@ -932,11 +933,163 @@ void EmitContext::DefineWriteStorageCasLoopFunction(const Info& info) { } void EmitContext::DefineGlobalMemoryFunctions(const Info& info) { - if (!info.uses_global_memory || !profile.support_int64) { + if (!info.uses_global_memory) { return; } using DefPtr = Id StorageDefinitions::*; const Id zero{u32_zero_value}; + + if (SupportsNativeInt64()) { + const auto define_body{[&](DefPtr ssbo_member, Id addr, Id element_pointer, u32 shift, + auto&& callback) { + AddLabel(); + const size_t num_buffers{info.storage_buffers_descriptors.size()}; + for (size_t index = 0; index < num_buffers; ++index) { + if (!info.nvn_buffer_used[index]) { + continue; + } + const auto& ssbo{info.storage_buffers_descriptors[index]}; + const Id ssbo_addr_cbuf_offset{Const(ssbo.cbuf_offset / 8)}; + const Id ssbo_size_cbuf_offset{Const(ssbo.cbuf_offset / 4 + 2)}; + const Id ssbo_addr_pointer{OpAccessChain( + uniform_types.U32x2, cbufs[ssbo.cbuf_index].U32x2, zero, + ssbo_addr_cbuf_offset)}; + const Id ssbo_size_pointer{OpAccessChain( + uniform_types.U32, cbufs[ssbo.cbuf_index].U32, zero, ssbo_size_cbuf_offset)}; + + const u64 ssbo_align_mask{~(profile.min_ssbo_alignment - 1U)}; + const Id unaligned_addr{OpBitcast(U64, OpLoad(U32[2], ssbo_addr_pointer))}; + const Id ssbo_addr{OpBitwiseAnd(U64, unaligned_addr, Constant(U64, ssbo_align_mask))}; + const Id ssbo_size{OpUConvert(U64, OpLoad(U32[1], ssbo_size_pointer))}; + const Id ssbo_end{OpIAdd(U64, ssbo_addr, ssbo_size)}; + const Id cond{OpLogicalAnd(U1, OpUGreaterThanEqual(U1, addr, ssbo_addr), + OpULessThan(U1, addr, ssbo_end))}; + const Id then_label{OpLabel()}; + const Id else_label{OpLabel()}; + OpSelectionMerge(else_label, spv::SelectionControlMask::MaskNone); + OpBranchConditional(cond, then_label, else_label); + AddLabel(then_label); + const Id ssbo_id{ssbos[index].*ssbo_member}; + const Id ssbo_offset{OpUConvert(U32[1], OpISub(U64, addr, ssbo_addr))}; + const Id ssbo_index{OpShiftRightLogical(U32[1], ssbo_offset, Const(shift))}; + const Id ssbo_pointer{OpAccessChain(element_pointer, ssbo_id, zero, ssbo_index)}; + callback(ssbo_pointer); + AddLabel(else_label); + } + }}; + const auto define_load{[&](DefPtr ssbo_member, Id element_pointer, Id type, u32 shift) { + const Id function_type{TypeFunction(type, U64)}; + const Id func_id{OpFunction(type, spv::FunctionControlMask::MaskNone, function_type)}; + const Id addr{OpFunctionParameter(U64)}; + define_body(ssbo_member, addr, element_pointer, shift, + [&](Id ssbo_pointer) { OpReturnValue(OpLoad(type, ssbo_pointer)); }); + OpReturnValue(ConstantNull(type)); + OpFunctionEnd(); + return func_id; + }}; + const auto define_write{[&](DefPtr ssbo_member, Id element_pointer, Id type, u32 shift) { + const Id function_type{TypeFunction(void_id, U64, type)}; + const Id func_id{ + OpFunction(void_id, spv::FunctionControlMask::MaskNone, function_type)}; + const Id addr{OpFunctionParameter(U64)}; + const Id data{OpFunctionParameter(type)}; + define_body(ssbo_member, addr, element_pointer, shift, [&](Id ssbo_pointer) { + OpStore(ssbo_pointer, data); + OpReturn(); + }); + OpReturn(); + OpFunctionEnd(); + return func_id; + }}; + const auto define{ + [&](DefPtr ssbo_member, const StorageTypeDefinition& type_def, Id type, size_t size) { + const Id element_type{type_def.element}; + const u32 shift{static_cast(std::countr_zero(size))}; + const Id load_func{define_load(ssbo_member, element_type, type, shift)}; + const Id write_func{define_write(ssbo_member, element_type, type, shift)}; + return std::make_pair(load_func, write_func); + }}; + std::tie(load_global_func_u32, write_global_func_u32) = + define(&StorageDefinitions::U32, storage_types.U32, U32[1], sizeof(u32)); + std::tie(load_global_func_u32x2, write_global_func_u32x2) = + define(&StorageDefinitions::U32x2, storage_types.U32x2, U32[2], sizeof(u32[2])); + std::tie(load_global_func_u32x4, write_global_func_u32x4) = + define(&StorageDefinitions::U32x4, storage_types.U32x4, U32[4], sizeof(u32[4])); + return; + } + + if (!UsesInt64Emulation()) { + return; + } + + const auto make_pair = [&](Id lo, Id hi) { + return OpCompositeConstruct(U32[2], lo, hi); + }; + const auto split_pair = [&](Id value) { + return std::array{OpCompositeExtract(U32[1], value, 0U), + OpCompositeExtract(U32[1], value, 1U)}; + }; + const auto bool_to_u32 = [&](Id predicate) { + return OpSelect(U32[1], predicate, Const(1u), zero); + }; + const auto and_pair = [&](Id value, Id mask) { + const auto value_parts{split_pair(value)}; + const auto mask_parts{split_pair(mask)}; + return make_pair(OpBitwiseAnd(U32[1], value_parts[0], mask_parts[0]), + OpBitwiseAnd(U32[1], value_parts[1], mask_parts[1])); + }; + const auto add_pair = [&](Id lhs, Id rhs) { + const auto lhs_parts{split_pair(lhs)}; + const auto rhs_parts{split_pair(rhs)}; + const Id sum_lo{OpIAdd(U32[1], lhs_parts[0], rhs_parts[0])}; + const Id carry{OpULessThan(U1, sum_lo, lhs_parts[0])}; + Id sum_hi{OpIAdd(U32[1], lhs_parts[1], rhs_parts[1])}; + sum_hi = OpIAdd(U32[1], sum_hi, bool_to_u32(carry)); + return make_pair(sum_lo, sum_hi); + }; + const auto sub_pair = [&](Id lhs, Id rhs) { + const auto lhs_parts{split_pair(lhs)}; + const auto rhs_parts{split_pair(rhs)}; + const Id borrow{OpULessThan(U1, lhs_parts[0], rhs_parts[0])}; + const Id diff_lo{OpISub(U32[1], lhs_parts[0], rhs_parts[0])}; + Id diff_hi{OpISub(U32[1], lhs_parts[1], rhs_parts[1])}; + diff_hi = OpISub(U32[1], diff_hi, bool_to_u32(borrow)); + return make_pair(diff_lo, diff_hi); + }; + const auto shift_right_pair = [&](Id value, u32 shift) { + if (shift == 0) { + return value; + } + const auto parts{split_pair(value)}; + const Id shift_id{Const(shift)}; + const Id high_shifted{OpShiftRightLogical(U32[1], parts[1], shift_id)}; + Id low_shifted{OpShiftRightLogical(U32[1], parts[0], shift_id)}; + const Id carry_bits{OpShiftLeftLogical(U32[1], parts[1], Const(32u - shift))}; + low_shifted = OpBitwiseOr(U32[1], low_shifted, carry_bits); + return make_pair(low_shifted, high_shifted); + }; + const auto greater_equal_pair = [&](Id lhs, Id rhs) { + const auto lhs_parts{split_pair(lhs)}; + const auto rhs_parts{split_pair(rhs)}; + const Id hi_gt{OpUGreaterThan(U1, lhs_parts[1], rhs_parts[1])}; + const Id hi_eq{OpIEqual(U1, lhs_parts[1], rhs_parts[1])}; + const Id lo_ge{OpUGreaterThanEqual(U1, lhs_parts[0], rhs_parts[0])}; + return OpLogicalOr(U1, hi_gt, OpLogicalAnd(U1, hi_eq, lo_ge)); + }; + const auto less_than_pair = [&](Id lhs, Id rhs) { + const auto lhs_parts{split_pair(lhs)}; + const auto rhs_parts{split_pair(rhs)}; + const Id hi_lt{OpULessThan(U1, lhs_parts[1], rhs_parts[1])}; + const Id hi_eq{OpIEqual(U1, lhs_parts[1], rhs_parts[1])}; + const Id lo_lt{OpULessThan(U1, lhs_parts[0], rhs_parts[0])}; + return OpLogicalOr(U1, hi_lt, OpLogicalAnd(U1, hi_eq, lo_lt)); + }; + + const u64 ssbo_align_mask_value{~(profile.min_ssbo_alignment - 1U)}; + const Id ssbo_align_mask{ + Const(static_cast(ssbo_align_mask_value & 0xFFFFFFFFu), + static_cast(ssbo_align_mask_value >> 32))}; + const auto define_body{[&](DefPtr ssbo_member, Id addr, Id element_pointer, u32 shift, auto&& callback) { AddLabel(); @@ -953,40 +1106,44 @@ void EmitContext::DefineGlobalMemoryFunctions(const Info& info) { const Id ssbo_size_pointer{OpAccessChain(uniform_types.U32, cbufs[ssbo.cbuf_index].U32, zero, ssbo_size_cbuf_offset)}; - const u64 ssbo_align_mask{~(profile.min_ssbo_alignment - 1U)}; - const Id unaligned_addr{OpBitcast(U64, OpLoad(U32[2], ssbo_addr_pointer))}; - const Id ssbo_addr{OpBitwiseAnd(U64, unaligned_addr, Constant(U64, ssbo_align_mask))}; - const Id ssbo_size{OpUConvert(U64, OpLoad(U32[1], ssbo_size_pointer))}; - const Id ssbo_end{OpIAdd(U64, ssbo_addr, ssbo_size)}; - const Id cond{OpLogicalAnd(U1, OpUGreaterThanEqual(U1, addr, ssbo_addr), - OpULessThan(U1, addr, ssbo_end))}; + const Id unaligned_addr_pair{OpLoad(U32[2], ssbo_addr_pointer)}; + const Id ssbo_addr_pair{and_pair(unaligned_addr_pair, ssbo_align_mask)}; + const Id ssbo_size_value{OpLoad(U32[1], ssbo_size_pointer)}; + const Id ssbo_size_pair{make_pair(ssbo_size_value, zero)}; + const Id ssbo_end_pair{add_pair(ssbo_addr_pair, ssbo_size_pair)}; + const Id cond{OpLogicalAnd(U1, greater_equal_pair(addr, ssbo_addr_pair), + less_than_pair(addr, ssbo_end_pair))}; const Id then_label{OpLabel()}; const Id else_label{OpLabel()}; OpSelectionMerge(else_label, spv::SelectionControlMask::MaskNone); OpBranchConditional(cond, then_label, else_label); AddLabel(then_label); const Id ssbo_id{ssbos[index].*ssbo_member}; - const Id ssbo_offset{OpUConvert(U32[1], OpISub(U64, addr, ssbo_addr))}; - const Id ssbo_index{OpShiftRightLogical(U32[1], ssbo_offset, Const(shift))}; + const Id ssbo_offset_pair{sub_pair(addr, ssbo_addr_pair)}; + const Id ssbo_index_pair{shift_right_pair(ssbo_offset_pair, shift)}; + const Id ssbo_index{OpCompositeExtract(U32[1], ssbo_index_pair, 0U)}; const Id ssbo_pointer{OpAccessChain(element_pointer, ssbo_id, zero, ssbo_index)}; callback(ssbo_pointer); AddLabel(else_label); } }}; + const auto define_load{[&](DefPtr ssbo_member, Id element_pointer, Id type, u32 shift) { - const Id function_type{TypeFunction(type, U64)}; + const Id function_type{TypeFunction(type, U32[2])}; const Id func_id{OpFunction(type, spv::FunctionControlMask::MaskNone, function_type)}; - const Id addr{OpFunctionParameter(U64)}; + const Id addr{OpFunctionParameter(U32[2])}; define_body(ssbo_member, addr, element_pointer, shift, [&](Id ssbo_pointer) { OpReturnValue(OpLoad(type, ssbo_pointer)); }); OpReturnValue(ConstantNull(type)); OpFunctionEnd(); return func_id; }}; + const auto define_write{[&](DefPtr ssbo_member, Id element_pointer, Id type, u32 shift) { - const Id function_type{TypeFunction(void_id, U64, type)}; - const Id func_id{OpFunction(void_id, spv::FunctionControlMask::MaskNone, function_type)}; - const Id addr{OpFunctionParameter(U64)}; + const Id function_type{TypeFunction(void_id, U32[2], type)}; + const Id func_id{ + OpFunction(void_id, spv::FunctionControlMask::MaskNone, function_type)}; + const Id addr{OpFunctionParameter(U32[2])}; const Id data{OpFunctionParameter(type)}; define_body(ssbo_member, addr, element_pointer, shift, [&](Id ssbo_pointer) { OpStore(ssbo_pointer, data); @@ -996,6 +1153,7 @@ void EmitContext::DefineGlobalMemoryFunctions(const Info& info) { OpFunctionEnd(); return func_id; }}; + const auto define{ [&](DefPtr ssbo_member, const StorageTypeDefinition& type_def, Id type, size_t size) { const Id element_type{type_def.element}; @@ -1004,6 +1162,7 @@ void EmitContext::DefineGlobalMemoryFunctions(const Info& info) { const Id write_func{define_write(ssbo_member, element_type, type, shift)}; return std::make_pair(load_func, write_func); }}; + std::tie(load_global_func_u32, write_global_func_u32) = define(&StorageDefinitions::U32, storage_types.U32, U32[1], sizeof(u32)); std::tie(load_global_func_u32x2, write_global_func_u32x2) = diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.h b/src/shader_recompiler/backend/spirv/spirv_emit_context.h index 66cdb1d3db..2dbeeb0911 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.h +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.h @@ -207,6 +207,15 @@ public: const Profile& profile; const RuntimeInfo& runtime_info; Stage stage{}; + const bool emulate_int64{}; + + bool SupportsNativeInt64() const { + return profile.support_int64; + } + + bool UsesInt64Emulation() const { + return emulate_int64; + } Id void_id{}; Id U1{}; From d3595fd2b1287633bc778c4b52dc894889001450 Mon Sep 17 00:00:00 2001 From: CamilleLaVey Date: Mon, 10 Nov 2025 00:22:08 -0400 Subject: [PATCH 14/34] [gl, vk, texture cache] Attempt to get correct MSAA image upload and download --- .../renderer_opengl/gl_texture_cache.h | 4 ++ .../renderer_vulkan/vk_texture_cache.h | 4 ++ src/video_core/texture_cache/image_base.cpp | 4 -- src/video_core/texture_cache/texture_cache.h | 50 ++++++++++++++++--- .../texture_cache/texture_cache_base.h | 2 + 5 files changed, 54 insertions(+), 10 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index d4165d8e4d..a673c4feef 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -97,6 +97,10 @@ public: return true; } + bool CanDownloadMSAA() const noexcept { + return true; + } + void CopyImage(Image& dst, Image& src, std::span copies); void CopyImageMSAA(Image& dst, Image& src, std::span copies); diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index cd11cc8fc7..17db56bb43 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h @@ -85,6 +85,10 @@ public: return msaa_copy_pass.operator bool(); } + bool CanDownloadMSAA() const noexcept { + return msaa_copy_pass.operator bool(); + } + void AccelerateImageUpload(Image&, const StagingBufferRef&, std::span); diff --git a/src/video_core/texture_cache/image_base.cpp b/src/video_core/texture_cache/image_base.cpp index 077df28fb3..93abad3c5c 100644 --- a/src/video_core/texture_cache/image_base.cpp +++ b/src/video_core/texture_cache/image_base.cpp @@ -131,10 +131,6 @@ bool ImageBase::IsSafeDownload() const noexcept { if (True(flags & ImageFlagBits::CpuModified)) { return false; } - if (info.num_samples > 1) { - LOG_WARNING(HW_GPU, "MSAA image downloads are not implemented"); - return false; - } return true; } diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 8ac025f1df..f1ac55555c 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -101,8 +101,12 @@ void TextureCache

::RunGarbageCollector() { if (!aggressive_mode && True(image.flags & ImageFlagBits::CostlyLoad)) { return false; } - const bool must_download = - image.IsSafeDownload() && False(image.flags & ImageFlagBits::BadOverlap); + const bool supports_msaa_download = HasMsaaDownloadSupport(image.info); + if (!supports_msaa_download && image.info.num_samples > 1) { + LOG_WARNING(HW_GPU, "MSAA image downloads are not implemented"); + } + const bool must_download = supports_msaa_download && image.IsSafeDownload() && + False(image.flags & ImageFlagBits::BadOverlap); if (!high_priority_mode && must_download) { return false; } @@ -548,10 +552,14 @@ void TextureCache

::WriteMemory(DAddr cpu_addr, size_t size) { template void TextureCache

::DownloadMemory(DAddr cpu_addr, size_t size) { boost::container::small_vector images; - ForEachImageInRegion(cpu_addr, size, [&images](ImageId image_id, ImageBase& image) { + ForEachImageInRegion(cpu_addr, size, [this, &images](ImageId image_id, ImageBase& image) { if (!image.IsSafeDownload()) { return; } + if (!HasMsaaDownloadSupport(image.info)) { + LOG_WARNING(HW_GPU, "MSAA image downloads are not implemented"); + return; + } image.flags &= ~ImageFlagBits::GpuModified; images.push_back(image_id); }); @@ -930,6 +938,17 @@ ImageId TextureCache

::DmaImageId(const Tegra::DMA::ImageOperand& operand, boo return NULL_IMAGE_ID; } auto& image = slot_images[dst_id]; + if (image.info.num_samples > 1) { + if (is_upload) { + if (!HasMsaaUploadSupport(image.info)) { + return NULL_IMAGE_ID; + } + } else { + if (!HasMsaaDownloadSupport(image.info)) { + return NULL_IMAGE_ID; + } + } + } if (False(image.flags & ImageFlagBits::GpuModified)) { // No need to waste time on an image that's synced with guest return NULL_IMAGE_ID; @@ -1056,7 +1075,7 @@ void TextureCache

::RefreshContents(Image& image, ImageId image_id) { image.flags &= ~ImageFlagBits::CpuModified; TrackImage(image, image_id); - if (image.info.num_samples > 1 && !runtime.CanUploadMSAA()) { + if (!HasMsaaUploadSupport(image.info)) { LOG_WARNING(HW_GPU, "MSAA image uploads are not implemented"); runtime.TransitionImageLayout(image); return; @@ -1274,6 +1293,16 @@ u64 TextureCache

::GetScaledImageSizeBytes(const ImageBase& image) { return fitted_size; } +template +bool TextureCache

::HasMsaaUploadSupport(const ImageInfo& info) const noexcept { + return info.num_samples <= 1 || runtime.CanUploadMSAA(); +} + +template +bool TextureCache

::HasMsaaDownloadSupport(const ImageInfo& info) const noexcept { + return info.num_samples <= 1 || runtime.CanDownloadMSAA(); +} + template void TextureCache

::QueueAsyncDecode(Image& image, ImageId image_id) { UNIMPLEMENTED_IF(False(image.flags & ImageFlagBits::Converted)); @@ -1575,6 +1604,10 @@ ImageId TextureCache

::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, DA for (const auto& copy_object : join_copies_to_do) { Image& overlap = slot_images[copy_object.id]; if (copy_object.is_alias) { + if (!HasMsaaDownloadSupport(overlap.info)) { + LOG_WARNING(HW_GPU, "MSAA image downloads are not implemented"); + continue; + } if (!overlap.IsSafeDownload()) { continue; } @@ -2491,8 +2524,13 @@ void TextureCache

::BindRenderTarget(ImageViewId* old_id, ImageViewId new_id) if (new_id) { const ImageViewBase& old_view = slot_image_views[new_id]; if (True(old_view.flags & ImageViewFlagBits::PreemtiveDownload)) { - const PendingDownload new_download{true, 0, old_view.image_id}; - uncommitted_downloads.emplace_back(new_download); + const ImageBase& image = slot_images[old_view.image_id]; + if (!HasMsaaDownloadSupport(image.info)) { + LOG_WARNING(HW_GPU, "MSAA image downloads are not implemented"); + } else { + const PendingDownload new_download{true, 0, old_view.image_id}; + uncommitted_downloads.emplace_back(new_download); + } } } *old_id = new_id; diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index 01a9a6a3f1..2435f6fa75 100644 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h @@ -426,6 +426,8 @@ private: bool ScaleUp(Image& image); bool ScaleDown(Image& image); u64 GetScaledImageSizeBytes(const ImageBase& image); + [[nodiscard]] bool HasMsaaUploadSupport(const ImageInfo& info) const noexcept; + [[nodiscard]] bool HasMsaaDownloadSupport(const ImageInfo& info) const noexcept; void QueueAsyncDecode(Image& image, ImageId image_id); void TickAsyncDecode(); From 2e68f8795de548766833ba8b9ef6fdcc02892692 Mon Sep 17 00:00:00 2001 From: CamilleLaVey Date: Mon, 10 Nov 2025 00:27:38 -0400 Subject: [PATCH 15/34] Adding missing headers --- src/video_core/renderer_opengl/gl_texture_cache.h | 1 + src/video_core/renderer_vulkan/vk_texture_cache.h | 1 + 2 files changed, 2 insertions(+) diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index a673c4feef..7833de54a5 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -1,3 +1,4 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project // SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index 17db56bb43..437d082c4c 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h @@ -1,3 +1,4 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project // SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later From ddd78c3b37a7047d7d37a90ae1d5012e00c36733 Mon Sep 17 00:00:00 2001 From: PavelBARABANOV Date: Mon, 10 Nov 2025 18:52:31 +0300 Subject: [PATCH 16/34] Revert "TEST: Enabling TimelineSemaphores for QCOM and Turnip" This reverts commit 3cd33fce44cf0412d4f76f3cf601b73b881662cb. --- src/video_core/vulkan_common/vulkan_device.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 44cd114bd3..ddaca2669c 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -1017,6 +1017,13 @@ bool Device::ShouldBoostClocks() const { } bool Device::HasTimelineSemaphore() const { + if (GetDriverID() == VK_DRIVER_ID_QUALCOMM_PROPRIETARY || + GetDriverID() == VK_DRIVER_ID_MESA_TURNIP) { + // Timeline semaphores do not work properly on all Qualcomm drivers. + // They generally work properly with Turnip drivers, but are problematic on some devices + // (e.g. ZTE handsets with Snapdragon 870). + return false; + } return features.timeline_semaphore.timelineSemaphore; } From 1ca19af7fb4eec93480bfcaaa785a4f2ac33108d Mon Sep 17 00:00:00 2001 From: CamilleLaVey Date: Thu, 13 Nov 2025 18:42:49 -0400 Subject: [PATCH 17/34] [shader_recompiler, spir-v] verifying int64 emulation path activation --- .../backend/spirv/emit_spirv_memory.cpp | 3 +++ .../backend/spirv/spirv_emit_context.cpp | 6 ++++- .../backend/spirv/spirv_emit_context.h | 3 +++ .../global_memory_to_storage_buffer_pass.cpp | 27 +++++++++++++++++++ 4 files changed, 38 insertions(+), 1 deletion(-) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp index 0ac7086995..88b3717498 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + // SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index c4b72b5888..ccaa8da9e0 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -461,7 +461,11 @@ void VectorTypes::Define(Sirit::Module& sirit_ctx, Id base_type, std::string_vie EmitContext::EmitContext(const Profile& profile_, const RuntimeInfo& runtime_info_, IR::Program& program, Bindings& bindings) : Sirit::Module(profile_.supported_spirv), profile{profile_}, runtime_info{runtime_info_}, - stage{program.stage}, emulate_int64{program.info.uses_int64 && !profile.support_int64}, + stage{program.stage}, + // Enable int64 emulation if host lacks int64 but we either use int64 ops + // or we need 64-bit addressing for global memory operations. + emulate_int64{!profile.support_int64 && + (program.info.uses_int64 || program.info.uses_global_memory)}, texture_rescaling_index{bindings.texture_scaling_index}, image_rescaling_index{bindings.image_scaling_index} { const bool is_unified{profile.unified_descriptor_binding}; diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.h b/src/shader_recompiler/backend/spirv/spirv_emit_context.h index 2dbeeb0911..c0c28e4e3f 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.h +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.h @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + // SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp index 2d4feca02c..14ada93ac2 100644 --- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp +++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + // SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later @@ -293,6 +296,14 @@ std::optional TrackLowAddress(IR::Inst* inst) { } // This address is expected to either be a PackUint2x32, a IAdd64, or a CompositeConstructU32x2 IR::Inst* addr_inst{addr.InstRecursive()}; + // Unwrap Identity ops introduced by lowerings (e.g., PackUint2x32 -> Identity) + while (addr_inst->GetOpcode() == IR::Opcode::Identity) { + const IR::Value id_arg{addr_inst->Arg(0)}; + if (id_arg.IsImmediate()) { + return std::nullopt; + } + addr_inst = id_arg.InstRecursive(); + } s32 imm_offset{0}; if (addr_inst->GetOpcode() == IR::Opcode::IAdd64) { // If it's an IAdd64, get the immediate offset it is applying and grab the address @@ -308,6 +319,14 @@ std::optional TrackLowAddress(IR::Inst* inst) { return std::nullopt; } addr_inst = iadd_addr.InstRecursive(); + // Unwrap Identity again if present after folding IAdd64 + while (addr_inst->GetOpcode() == IR::Opcode::Identity) { + const IR::Value id_arg{addr_inst->Arg(0)}; + if (id_arg.IsImmediate()) { + return std::nullopt; + } + addr_inst = id_arg.InstRecursive(); + } } // With IAdd64 handled, now PackUint2x32 is expected if (addr_inst->GetOpcode() == IR::Opcode::PackUint2x32) { @@ -317,6 +336,14 @@ std::optional TrackLowAddress(IR::Inst* inst) { return std::nullopt; } addr_inst = vector.InstRecursive(); + // Unwrap Identity that may replace PackUint2x32 + while (addr_inst->GetOpcode() == IR::Opcode::Identity) { + const IR::Value id_arg{addr_inst->Arg(0)}; + if (id_arg.IsImmediate()) { + return std::nullopt; + } + addr_inst = id_arg.InstRecursive(); + } } // The vector is expected to be a CompositeConstructU32x2 if (addr_inst->GetOpcode() != IR::Opcode::CompositeConstructU32x2) { From 0fd603c094e1ff87cc56227ad7ea6f47442a644d Mon Sep 17 00:00:00 2001 From: CamilleLaVey Date: Thu, 13 Nov 2025 20:37:47 -0400 Subject: [PATCH 18/34] [texture_cache, gl, vk] Initial implementation for HDR + MSAA detection on QCOM drivers. --- .../renderer_opengl/gl_texture_cache.h | 2 + .../renderer_vulkan/maxwell_to_vk.cpp | 2 +- .../renderer_vulkan/vk_texture_cache.cpp | 18 +++++ .../renderer_vulkan/vk_texture_cache.h | 2 + .../vulkan_common/vulkan_device.cpp | 70 +++++++++++++++++-- 5 files changed, 88 insertions(+), 6 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index 7833de54a5..695ca9833b 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -1,4 +1,6 @@ // SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + // SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp index a7a878f18c..24da3591ec 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp @@ -214,7 +214,7 @@ struct FormatTuple { {VK_FORMAT_ASTC_8x6_SRGB_BLOCK}, // ASTC_2D_8X6_SRGB {VK_FORMAT_ASTC_6x5_UNORM_BLOCK}, // ASTC_2D_6X5_UNORM {VK_FORMAT_ASTC_6x5_SRGB_BLOCK}, // ASTC_2D_6X5_SRGB - {VK_FORMAT_E5B9G9R9_UFLOAT_PACK32}, // E5B9G9R9_FLOAT + {VK_FORMAT_E5B9G9R9_UFLOAT_PACK32, Attachable | Storage}, // E5B9G9R9_FLOAT // Depth formats {VK_FORMAT_D32_SFLOAT, Attachable}, // D32_FLOAT diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 136a11f78d..1b0619afad 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -1549,6 +1549,24 @@ Image::Image(TextureCacheRuntime& runtime_, const ImageInfo& info_, GPUVAddr gpu MakeStorageView(device, level, *original_image, VK_FORMAT_A8B8G8R8_UNORM_PACK32); } } + + // Proactive warning for problematic HDR format + MSAA combinations on Android + // These combinations commonly cause texture flickering/black screens across multiple game engines + // Note: MSAA is native Switch rendering technique, cannot be disabled by emulator + if (info.num_samples > 1) { + const auto vk_format = MaxwellToVK::SurfaceFormat(runtime->device, FormatType::Optimal, + false, info.format).format; + const bool is_hdr_format = vk_format == VK_FORMAT_B10G11R11_UFLOAT_PACK32 || + vk_format == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32; + + if (is_hdr_format) { + LOG_WARNING(Render_Vulkan, + "Creating MSAA image ({}x samples) with HDR format {} (Maxwell: {}). " + "Driver support may be limited on Android (Qualcomm < 800, Mali pre-maintenance5). " + "Format fallback to RGBA16F should prevent issues.", + info.num_samples, vk_format, info.format); + } + } } Image::Image(const VideoCommon::NullImageParams& params) : VideoCommon::ImageBase{params} {} diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index 437d082c4c..be4b246b79 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h @@ -1,4 +1,6 @@ // SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + // SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index ddaca2669c..f73365fd97 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -90,6 +90,31 @@ constexpr std::array VK_FORMAT_A4B4G4R4_UNORM_PACK16{ VK_FORMAT_UNDEFINED, }; +// B10G11R11_UFLOAT (R11G11B10 float) is used by Unreal Engine 5 for HDR textures +// Some Android drivers (Qualcomm pre-800, Mali pre-maintenance5) have issues with this format +// when used with MSAA or certain tiling modes, causing texture flickering/black screens +constexpr std::array B10G11R11_UFLOAT_PACK32{ + VK_FORMAT_R16G16B16A16_SFLOAT, // Fallback: RGBA16F (more memory, but widely supported) + VK_FORMAT_E5B9G9R9_UFLOAT_PACK32, // Alternative: E5B9G9R9 shared exponent format + VK_FORMAT_UNDEFINED, +}; + +// E5B9G9R9_UFLOAT (shared exponent RGB9E5) used by various engines (Unity, custom engines) +// Also problematic on some Android drivers, especially with MSAA and as render target +constexpr std::array E5B9G9R9_UFLOAT_PACK32{ + VK_FORMAT_R16G16B16A16_SFLOAT, // Fallback: RGBA16F (safest option) + VK_FORMAT_B10G11R11_UFLOAT_PACK32, // Alternative: might work if E5B9G9R9 fails + VK_FORMAT_UNDEFINED, +}; + +/// Helper function to detect HDR formats that commonly fail with MSAA on some Android drivers +[[nodiscard]] constexpr bool IsProblematicHDRFormat(VkFormat format) { + // These formats are known to cause texture flickering/black screens across multiple game engines + // when combined with MSAA on certain Android drivers (Qualcomm < 800, Mali pre-maintenance5) + return format == VK_FORMAT_B10G11R11_UFLOAT_PACK32 || // UE5, custom engines + format == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32; // Unity, RE Engine, others +} + } // namespace Alternatives template @@ -122,6 +147,10 @@ constexpr const VkFormat* GetFormatAlternatives(VkFormat format) { return Alternatives::VK_FORMAT_R32G32B32_SFLOAT.data(); case VK_FORMAT_A4B4G4R4_UNORM_PACK16_EXT: return Alternatives::VK_FORMAT_A4B4G4R4_UNORM_PACK16.data(); + case VK_FORMAT_B10G11R11_UFLOAT_PACK32: + return Alternatives::B10G11R11_UFLOAT_PACK32.data(); + case VK_FORMAT_E5B9G9R9_UFLOAT_PACK32: + return Alternatives::E5B9G9R9_UFLOAT_PACK32.data(); default: return nullptr; } @@ -844,15 +873,33 @@ Device::~Device() { VkFormat Device::GetSupportedFormat(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage, FormatType format_type) const { if (IsFormatSupported(wanted_format, wanted_usage, format_type)) { - return wanted_format; + // CRITICAL FIX: Even if format is "supported", check for STORAGE + HDR + no MSAA support + // Driver may report STORAGE_IMAGE_BIT but shaderStorageImageMultisample=false means + // it will fail at runtime when used with MSAA (CopyImageMSAA silently fails) + const bool requests_storage = (wanted_usage & VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT) != 0; + const bool is_hdr_format = wanted_format == VK_FORMAT_B10G11R11_UFLOAT_PACK32 || + wanted_format == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32; + + // If driver doesn't support shader storage image with MSAA, and we're requesting storage + // for an HDR format (which will likely be used with MSAA), force fallback + if (requests_storage && is_hdr_format && !features.features.shaderStorageImageMultisample) { + LOG_WARNING(Render_Vulkan, + "Format {} reports STORAGE_IMAGE_BIT but driver doesn't support " + "shaderStorageImageMultisample. Forcing fallback for MSAA compatibility.", + wanted_format); + // Continue to alternatives search below + } else { + return wanted_format; + } } // The wanted format is not supported by hardware, search for alternatives const VkFormat* alternatives = GetFormatAlternatives(wanted_format); if (alternatives == nullptr) { LOG_ERROR(Render_Vulkan, - "Format={} with usage={} and type={} has no defined alternatives and host " - "hardware does not support it", - wanted_format, wanted_usage, format_type); + "Format={} (0x{:X}) with usage={} and type={} has no defined alternatives and host " + "hardware does not support it. Driver: {} Device: {}", + wanted_format, static_cast(wanted_format), wanted_usage, format_type, + GetDriverName(), properties.properties.deviceName); return wanted_format; } @@ -861,9 +908,22 @@ VkFormat Device::GetSupportedFormat(VkFormat wanted_format, VkFormatFeatureFlags if (!IsFormatSupported(alternative, wanted_usage, format_type)) { continue; } - LOG_DEBUG(Render_Vulkan, + // Special logging for HDR formats (common across multiple engines) on problematic drivers + if (wanted_format == VK_FORMAT_B10G11R11_UFLOAT_PACK32) { + LOG_WARNING(Render_Vulkan, + "Emulating B10G11R11_UFLOAT (HDR format: UE5, custom engines) with {} on {}. " + "Native format not supported by driver, using fallback.", + alternative, properties.properties.deviceName); + } else if (wanted_format == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32) { + LOG_WARNING(Render_Vulkan, + "Emulating E5B9G9R9_UFLOAT (HDR format: Unity, RE Engine) with {} on {}. " + "Native format not supported by driver, using fallback.", + alternative, properties.properties.deviceName); + } else { + LOG_DEBUG(Render_Vulkan, "Emulating format={} with alternative format={} with usage={} and type={}", wanted_format, alternative, wanted_usage, format_type); + } return alternative; } From b1208f03ee79a5a0932fd78d19cbf0e0091385b9 Mon Sep 17 00:00:00 2001 From: CamilleLaVey Date: Thu, 13 Nov 2025 21:30:19 -0400 Subject: [PATCH 19/34] Fix building issues --- src/video_core/vulkan_common/vulkan_device.cpp | 8 -------- 1 file changed, 8 deletions(-) diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index f73365fd97..84dfef6c5b 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -107,14 +107,6 @@ constexpr std::array E5B9G9R9_UFLOAT_PACK32{ VK_FORMAT_UNDEFINED, }; -/// Helper function to detect HDR formats that commonly fail with MSAA on some Android drivers -[[nodiscard]] constexpr bool IsProblematicHDRFormat(VkFormat format) { - // These formats are known to cause texture flickering/black screens across multiple game engines - // when combined with MSAA on certain Android drivers (Qualcomm < 800, Mali pre-maintenance5) - return format == VK_FORMAT_B10G11R11_UFLOAT_PACK32 || // UE5, custom engines - format == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32; // Unity, RE Engine, others -} - } // namespace Alternatives template From 47f0563c1bd5014ea74977b71f322e926432640b Mon Sep 17 00:00:00 2001 From: CamilleLaVey Date: Sat, 8 Nov 2025 01:09:05 -0400 Subject: [PATCH 20/34] Giving maintance to driver features and unused extensions --- src/video_core/vulkan_common/vulkan_device.h | 1 - 1 file changed, 1 deletion(-) diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index a6f7d5fb07..ace7ee611f 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -840,7 +840,6 @@ private: VkPhysicalDevicePushDescriptorPropertiesKHR push_descriptor{}; VkPhysicalDeviceSubgroupSizeControlProperties subgroup_size_control{}; VkPhysicalDeviceTransformFeedbackPropertiesEXT transform_feedback{}; - VkPhysicalDeviceProperties properties{}; }; From 4860050358da0dd61fadc4af097568ca25a71750 Mon Sep 17 00:00:00 2001 From: CamilleLaVey Date: Sat, 8 Nov 2025 01:28:35 -0400 Subject: [PATCH 21/34] attempt to fix building issues --- src/video_core/renderer_vulkan/present/fsr.cpp | 3 +++ src/video_core/renderer_vulkan/present/fxaa.cpp | 3 +++ src/video_core/renderer_vulkan/present/layer.cpp | 1 + src/video_core/renderer_vulkan/present/util.cpp | 1 + 4 files changed, 8 insertions(+) diff --git a/src/video_core/renderer_vulkan/present/fsr.cpp b/src/video_core/renderer_vulkan/present/fsr.cpp index 3f708be704..8422a00204 100644 --- a/src/video_core/renderer_vulkan/present/fsr.cpp +++ b/src/video_core/renderer_vulkan/present/fsr.cpp @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + // SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later diff --git a/src/video_core/renderer_vulkan/present/fxaa.cpp b/src/video_core/renderer_vulkan/present/fxaa.cpp index bdafd1f4d0..d53fd29b9e 100644 --- a/src/video_core/renderer_vulkan/present/fxaa.cpp +++ b/src/video_core/renderer_vulkan/present/fxaa.cpp @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + // SPDX-FileCopyrightText: Copyright 2024 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later diff --git a/src/video_core/renderer_vulkan/present/layer.cpp b/src/video_core/renderer_vulkan/present/layer.cpp index 5676dfe62a..a3bb5739cf 100644 --- a/src/video_core/renderer_vulkan/present/layer.cpp +++ b/src/video_core/renderer_vulkan/present/layer.cpp @@ -17,6 +17,7 @@ #include "video_core/renderer_vulkan/present/util.h" #include "video_core/renderer_vulkan/vk_blit_screen.h" #include "video_core/textures/decoders.h" +#include "video_core/vulkan_common/vulkan_device.h" namespace Vulkan { diff --git a/src/video_core/renderer_vulkan/present/util.cpp b/src/video_core/renderer_vulkan/present/util.cpp index 148e99b477..66a47fc634 100644 --- a/src/video_core/renderer_vulkan/present/util.cpp +++ b/src/video_core/renderer_vulkan/present/util.cpp @@ -6,6 +6,7 @@ #include "common/assert.h" #include "video_core/renderer_vulkan/present/util.h" +#include "video_core/vulkan_common/vulkan_device.h" namespace Vulkan { From 52b630dfdcb905e340d1a1b18bb8f35772d44fe7 Mon Sep 17 00:00:00 2001 From: crueter Date: Sat, 8 Nov 2025 12:41:15 -0500 Subject: [PATCH 22/34] build Signed-off-by: crueter --- src/video_core/renderer_vulkan/present/fsr.cpp | 3 --- src/video_core/renderer_vulkan/present/fxaa.cpp | 3 --- src/video_core/renderer_vulkan/present/layer.cpp | 1 - src/video_core/renderer_vulkan/present/util.cpp | 1 - 4 files changed, 8 deletions(-) diff --git a/src/video_core/renderer_vulkan/present/fsr.cpp b/src/video_core/renderer_vulkan/present/fsr.cpp index 8422a00204..3f708be704 100644 --- a/src/video_core/renderer_vulkan/present/fsr.cpp +++ b/src/video_core/renderer_vulkan/present/fsr.cpp @@ -1,6 +1,3 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project -// SPDX-License-Identifier: GPL-3.0-or-later - // SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later diff --git a/src/video_core/renderer_vulkan/present/fxaa.cpp b/src/video_core/renderer_vulkan/present/fxaa.cpp index d53fd29b9e..bdafd1f4d0 100644 --- a/src/video_core/renderer_vulkan/present/fxaa.cpp +++ b/src/video_core/renderer_vulkan/present/fxaa.cpp @@ -1,6 +1,3 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project -// SPDX-License-Identifier: GPL-3.0-or-later - // SPDX-FileCopyrightText: Copyright 2024 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later diff --git a/src/video_core/renderer_vulkan/present/layer.cpp b/src/video_core/renderer_vulkan/present/layer.cpp index a3bb5739cf..5676dfe62a 100644 --- a/src/video_core/renderer_vulkan/present/layer.cpp +++ b/src/video_core/renderer_vulkan/present/layer.cpp @@ -17,7 +17,6 @@ #include "video_core/renderer_vulkan/present/util.h" #include "video_core/renderer_vulkan/vk_blit_screen.h" #include "video_core/textures/decoders.h" -#include "video_core/vulkan_common/vulkan_device.h" namespace Vulkan { diff --git a/src/video_core/renderer_vulkan/present/util.cpp b/src/video_core/renderer_vulkan/present/util.cpp index 66a47fc634..148e99b477 100644 --- a/src/video_core/renderer_vulkan/present/util.cpp +++ b/src/video_core/renderer_vulkan/present/util.cpp @@ -6,7 +6,6 @@ #include "common/assert.h" #include "video_core/renderer_vulkan/present/util.h" -#include "video_core/vulkan_common/vulkan_device.h" namespace Vulkan { From 6a62fa7ee3368c32aceab80da1b069de6a29ec27 Mon Sep 17 00:00:00 2001 From: CamilleLaVey Date: Sat, 8 Nov 2025 16:13:24 -0400 Subject: [PATCH 23/34] Implement handling for texture cache flickering --- src/video_core/texture_cache/texture_cache.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index f1ac55555c..2b9135eab0 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -1540,8 +1540,8 @@ ImageId TextureCache

::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, DA new_image.modification_tick = overlap.modification_tick; } else { // If we cannot determine a base mapping, fallback to preserving the - // overlap (avoid deleting GPU-modified data) and log the event so - // it can be investigated, we're trying to pinpoint the issue of texture flickering. + // overlap (avoid deleting GPU-modified data) and log the event so it can be + // investigated, we're trying to pinpoint the issue of texture flickering. LOG_WARNING(HW_GPU, "Could not map overlap gpu_addr {:#x} into new image; preserving overlap", u64(overlap.gpu_addr)); continue; } From e72a206aeea2195587de8a93272ce62da2b88e03 Mon Sep 17 00:00:00 2001 From: lizzie Date: Sat, 8 Nov 2025 22:48:36 +0000 Subject: [PATCH 24/34] fix --- src/video_core/texture_cache/texture_cache.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 2b9135eab0..f1ac55555c 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -1540,8 +1540,8 @@ ImageId TextureCache

::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, DA new_image.modification_tick = overlap.modification_tick; } else { // If we cannot determine a base mapping, fallback to preserving the - // overlap (avoid deleting GPU-modified data) and log the event so it can be - // investigated, we're trying to pinpoint the issue of texture flickering. + // overlap (avoid deleting GPU-modified data) and log the event so + // it can be investigated, we're trying to pinpoint the issue of texture flickering. LOG_WARNING(HW_GPU, "Could not map overlap gpu_addr {:#x} into new image; preserving overlap", u64(overlap.gpu_addr)); continue; } From 8bd87204f5670539be83e5ab07519073f3bec6df Mon Sep 17 00:00:00 2001 From: CamilleLaVey Date: Sun, 9 Nov 2025 02:23:53 -0400 Subject: [PATCH 25/34] TEST: Enabling TimelineSemaphores for QCOM and Turnip --- src/video_core/vulkan_common/vulkan_device.cpp | 7 ------- 1 file changed, 7 deletions(-) diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 84dfef6c5b..9da5571314 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -1069,13 +1069,6 @@ bool Device::ShouldBoostClocks() const { } bool Device::HasTimelineSemaphore() const { - if (GetDriverID() == VK_DRIVER_ID_QUALCOMM_PROPRIETARY || - GetDriverID() == VK_DRIVER_ID_MESA_TURNIP) { - // Timeline semaphores do not work properly on all Qualcomm drivers. - // They generally work properly with Turnip drivers, but are problematic on some devices - // (e.g. ZTE handsets with Snapdragon 870). - return false; - } return features.timeline_semaphore.timelineSemaphore; } From 31c168efe1bae110855f16bd45352751998b6ec3 Mon Sep 17 00:00:00 2001 From: CamilleLaVey Date: Sun, 9 Nov 2025 23:14:51 -0400 Subject: [PATCH 26/34] [shader_recompiler, spir-v] Adding INT64 emulation path --- src/shader_recompiler/backend/spirv/spirv_emit_context.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index ccaa8da9e0..4699c82219 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -462,8 +462,8 @@ EmitContext::EmitContext(const Profile& profile_, const RuntimeInfo& runtime_inf IR::Program& program, Bindings& bindings) : Sirit::Module(profile_.supported_spirv), profile{profile_}, runtime_info{runtime_info_}, stage{program.stage}, - // Enable int64 emulation if host lacks int64 but we either use int64 ops - // or we need 64-bit addressing for global memory operations. + // Enable int64 emulation if host lacks int64 but we either use + // int64 ops or we need 64-bit addressing for global memory operations. emulate_int64{!profile.support_int64 && (program.info.uses_int64 || program.info.uses_global_memory)}, texture_rescaling_index{bindings.texture_scaling_index}, From c845b6086f8d82ed85be67077f7f159f8543d00c Mon Sep 17 00:00:00 2001 From: CamilleLaVey Date: Mon, 10 Nov 2025 00:27:38 -0400 Subject: [PATCH 27/34] Adding missing headers --- src/video_core/vulkan_common/vulkan_device.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 9da5571314..a6910a5496 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -1,6 +1,5 @@ // SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later - // SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later From 6134a573671c225cf4ca6e23dcef7542e3e1fd72 Mon Sep 17 00:00:00 2001 From: PavelBARABANOV Date: Mon, 10 Nov 2025 18:52:31 +0300 Subject: [PATCH 28/34] Revert "TEST: Enabling TimelineSemaphores for QCOM and Turnip" This reverts commit 3cd33fce44cf0412d4f76f3cf601b73b881662cb. --- src/video_core/vulkan_common/vulkan_device.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index a6910a5496..6dc958efe7 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -1068,6 +1068,13 @@ bool Device::ShouldBoostClocks() const { } bool Device::HasTimelineSemaphore() const { + if (GetDriverID() == VK_DRIVER_ID_QUALCOMM_PROPRIETARY || + GetDriverID() == VK_DRIVER_ID_MESA_TURNIP) { + // Timeline semaphores do not work properly on all Qualcomm drivers. + // They generally work properly with Turnip drivers, but are problematic on some devices + // (e.g. ZTE handsets with Snapdragon 870). + return false; + } return features.timeline_semaphore.timelineSemaphore; } From a51d875d9176775e880ca970fa544253eb1ca1f9 Mon Sep 17 00:00:00 2001 From: CamilleLaVey Date: Thu, 13 Nov 2025 18:42:49 -0400 Subject: [PATCH 29/34] [shader_recompiler, spir-v] verifying int64 emulation path activation --- src/shader_recompiler/backend/spirv/spirv_emit_context.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index 4699c82219..ccaa8da9e0 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -462,8 +462,8 @@ EmitContext::EmitContext(const Profile& profile_, const RuntimeInfo& runtime_inf IR::Program& program, Bindings& bindings) : Sirit::Module(profile_.supported_spirv), profile{profile_}, runtime_info{runtime_info_}, stage{program.stage}, - // Enable int64 emulation if host lacks int64 but we either use - // int64 ops or we need 64-bit addressing for global memory operations. + // Enable int64 emulation if host lacks int64 but we either use int64 ops + // or we need 64-bit addressing for global memory operations. emulate_int64{!profile.support_int64 && (program.info.uses_int64 || program.info.uses_global_memory)}, texture_rescaling_index{bindings.texture_scaling_index}, From bcc53909434efa6a5a377f00e6bbe01e643e1a2d Mon Sep 17 00:00:00 2001 From: CamilleLaVey Date: Thu, 13 Nov 2025 20:37:47 -0400 Subject: [PATCH 30/34] [texture_cache, gl, vk] Initial implementation for HDR + MSAA detection on QCOM drivers. --- src/video_core/vulkan_common/vulkan_device.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 6dc958efe7..4bc398710a 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -105,7 +105,6 @@ constexpr std::array E5B9G9R9_UFLOAT_PACK32{ VK_FORMAT_B10G11R11_UFLOAT_PACK32, // Alternative: might work if E5B9G9R9 fails VK_FORMAT_UNDEFINED, }; - } // namespace Alternatives template From 5e7fb6eeadc68b4bfb4ba723132472cbba1349ee Mon Sep 17 00:00:00 2001 From: CamilleLaVey Date: Thu, 13 Nov 2025 21:30:19 -0400 Subject: [PATCH 31/34] Fix building issues --- src/video_core/vulkan_common/vulkan_device.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 4bc398710a..6dc958efe7 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -105,6 +105,7 @@ constexpr std::array E5B9G9R9_UFLOAT_PACK32{ VK_FORMAT_B10G11R11_UFLOAT_PACK32, // Alternative: might work if E5B9G9R9 fails VK_FORMAT_UNDEFINED, }; + } // namespace Alternatives template From 3db41fbce60e776481214981bbca97c89a300b82 Mon Sep 17 00:00:00 2001 From: CamilleLaVey Date: Fri, 14 Nov 2025 01:32:02 -0400 Subject: [PATCH 32/34] [memory, vk] TEST: Tiled GPU optimization try #1 --- src/common/settings.h | 54 +++++++++++++ src/common/settings_enums.h | 10 +++ .../backend/spirv/emit_spirv.cpp | 37 +++++++-- src/shader_recompiler/profile.h | 8 ++ .../renderer_vulkan/pipeline_helper.h | 17 ++++- .../renderer_vulkan/vk_pipeline_cache.cpp | 14 ++++ .../renderer_vulkan/vk_render_pass_cache.cpp | 73 +++++++++++++++--- .../renderer_vulkan/vk_render_pass_cache.h | 7 ++ .../renderer_vulkan/vk_texture_cache.cpp | 75 +++++++++++++------ .../vulkan_common/vulkan_device.cpp | 65 ++++++++++++++++ .../vulkan_common/vulkan_memory_allocator.cpp | 22 +++++- 11 files changed, 343 insertions(+), 39 deletions(-) diff --git a/src/common/settings.h b/src/common/settings.h index 2e16e4bc59..31e54e3be1 100644 --- a/src/common/settings.h +++ b/src/common/settings.h @@ -560,6 +560,60 @@ struct Values { false, &sample_shading}; +#ifdef ANDROID + // Shader Float Controls (Android only) - Eden Veil / Extensions + // Force enable VK_KHR_shader_float_controls even if driver has known issues + // Allows fine-tuning float behavior to match Switch/Maxwell or optimize performance + SwitchableSetting shader_float_controls_force_enable{linkage, + false, + "shader_float_controls_force_enable", + Category::RendererExtensions, + Specialization::Paired}; + + // Individual float behavior controls (visible only when force_enable is true) + // Multiple can be active simultaneously EXCEPT FTZ and DenormPreserve (mutually exclusive) + // + // Recommended configurations: + // Switch-native: FTZ=ON, RTE=ON, SignedZero=ON (matches Maxwell behavior) + // Performance: FTZ=ON only (fastest) + // Accuracy: DenormPreserve=ON, RTE=ON, SignedZero=ON (slowest, highest precision) + SwitchableSetting shader_float_ftz{linkage, + false, + "shader_float_ftz", + Category::RendererExtensions, + Specialization::Default, + true, + false, + &shader_float_controls_force_enable}; + + SwitchableSetting shader_float_denorm_preserve{linkage, + false, + "shader_float_denorm_preserve", + Category::RendererExtensions, + Specialization::Default, + true, + false, + &shader_float_controls_force_enable}; + + SwitchableSetting shader_float_rte{linkage, + false, + "shader_float_rte", + Category::RendererExtensions, + Specialization::Default, + true, + false, + &shader_float_controls_force_enable}; + + SwitchableSetting shader_float_signed_zero_inf_nan{linkage, + false, + "shader_float_signed_zero_inf_nan", + Category::RendererExtensions, + Specialization::Default, + true, + false, + &shader_float_controls_force_enable}; +#endif + Setting renderer_debug{linkage, false, "debug", Category::RendererDebug}; Setting renderer_shader_feedback{linkage, false, "shader_feedback", Category::RendererDebug}; diff --git a/src/common/settings_enums.h b/src/common/settings_enums.h index 3ba2144efc..33daa55519 100644 --- a/src/common/settings_enums.h +++ b/src/common/settings_enums.h @@ -152,6 +152,16 @@ ENUM(SpirvOptimizeMode, Never, OnLoad, Always); ENUM(GpuOverclock, Low, Medium, High) ENUM(TemperatureUnits, Celsius, Fahrenheit) +// Shader Float Controls behavior modes +// These control how floating-point denormals and special values are handled in shaders +ENUM(ShaderFloatBehavior, + DriverDefault, // Let driver choose (safest, may not match Switch behavior) + SwitchNative, // Emulate Switch/Maxwell behavior (FTZ + RTE + SignedZero) + FlushToZero, // FTZ only - flush denorms to zero (fastest, some precision loss) + PreserveDenorms, // Preserve denorms (slowest, highest precision) + RoundToEven, // RTE rounding mode (IEEE 754 compliant) + SignedZeroInfNan); // Preserve signed zero, inf, nan (accuracy for edge cases) + template inline std::string_view CanonicalizeEnum(Type id) { const auto group = EnumMetadata::Canonicalizations(); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 313a1deb30..d3faf4341b 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -341,19 +341,35 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) { void SetupDenormControl(const Profile& profile, const IR::Program& program, EmitContext& ctx, Id main_func) { const Info& info{program.info}; + + // User-forced behavior overrides (Android Eden Veil/Extensions) + // When force flags are active, they take precedence over shader-declared behavior + const bool force_flush = profile.force_fp32_denorm_flush; + const bool force_preserve = profile.force_fp32_denorm_preserve; + + if (force_flush && force_preserve) { + LOG_WARNING(Shader_SPIRV, "Both FTZ and Preserve forced simultaneously - FTZ takes precedence"); + } + if (info.uses_fp32_denorms_flush && info.uses_fp32_denorms_preserve) { LOG_DEBUG(Shader_SPIRV, "Fp32 denorm flush and preserve on the same shader"); - } else if (info.uses_fp32_denorms_flush) { + } else if (force_flush || info.uses_fp32_denorms_flush) { if (profile.support_fp32_denorm_flush) { ctx.AddCapability(spv::Capability::DenormFlushToZero); ctx.AddExecutionMode(main_func, spv::ExecutionMode::DenormFlushToZero, 32U); + if (force_flush) { + LOG_DEBUG(Shader_SPIRV, "Fp32 DenormFlushToZero FORCED by user setting"); + } } else { // Drivers will most likely flush denorms by default, no need to warn } - } else if (info.uses_fp32_denorms_preserve) { + } else if (force_preserve || info.uses_fp32_denorms_preserve) { if (profile.support_fp32_denorm_preserve) { ctx.AddCapability(spv::Capability::DenormPreserve); ctx.AddExecutionMode(main_func, spv::ExecutionMode::DenormPreserve, 32U); + if (force_preserve) { + LOG_DEBUG(Shader_SPIRV, "Fp32 DenormPreserve FORCED by user setting"); + } } else { LOG_DEBUG(Shader_SPIRV, "Fp32 denorm preserve used in shader without host support"); } @@ -386,13 +402,24 @@ void SetupSignedNanCapabilities(const Profile& profile, const IR::Program& progr if (profile.has_broken_fp16_float_controls && program.info.uses_fp16) { return; } + + // User-forced behavior (Android Eden Veil/Extensions) + const bool force_signed_zero_inf_nan = profile.force_fp32_signed_zero_inf_nan; + if (program.info.uses_fp16 && profile.support_fp16_signed_zero_nan_preserve) { ctx.AddCapability(spv::Capability::SignedZeroInfNanPreserve); ctx.AddExecutionMode(main_func, spv::ExecutionMode::SignedZeroInfNanPreserve, 16U); } - if (profile.support_fp32_signed_zero_nan_preserve) { - ctx.AddCapability(spv::Capability::SignedZeroInfNanPreserve); - ctx.AddExecutionMode(main_func, spv::ExecutionMode::SignedZeroInfNanPreserve, 32U); + if (force_signed_zero_inf_nan || profile.support_fp32_signed_zero_nan_preserve) { + if (profile.support_fp32_signed_zero_nan_preserve) { + ctx.AddCapability(spv::Capability::SignedZeroInfNanPreserve); + ctx.AddExecutionMode(main_func, spv::ExecutionMode::SignedZeroInfNanPreserve, 32U); + if (force_signed_zero_inf_nan) { + LOG_DEBUG(Shader_SPIRV, "Fp32 SignedZeroInfNanPreserve FORCED by user setting"); + } + } else if (force_signed_zero_inf_nan) { + LOG_WARNING(Shader_SPIRV, "SignedZeroInfNanPreserve forced but driver doesn't support it"); + } } if (program.info.uses_fp64 && profile.support_fp64_signed_zero_nan_preserve) { ctx.AddCapability(spv::Capability::SignedZeroInfNanPreserve); diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index 48db64e162..6014221e8f 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -28,6 +28,14 @@ struct Profile { bool support_fp16_signed_zero_nan_preserve{}; bool support_fp32_signed_zero_nan_preserve{}; bool support_fp64_signed_zero_nan_preserve{}; + + // User-forced float behavior overrides (Android Eden Veil/Extensions) + // When shader_float_controls_force_enable is true, these override shader-declared behavior + bool force_fp32_denorm_flush{}; // Force FTZ for all FP32 ops + bool force_fp32_denorm_preserve{}; // Force denorm preservation for all FP32 ops + bool force_fp32_rte_rounding{}; // Force Round-To-Even for all FP32 ops + bool force_fp32_signed_zero_inf_nan{}; // Force signed zero/inf/nan preservation + bool support_explicit_workgroup_layout{}; bool support_vote{}; bool support_viewport_index_layer_non_geometry{}; diff --git a/src/video_core/renderer_vulkan/pipeline_helper.h b/src/video_core/renderer_vulkan/pipeline_helper.h index 910e07a606..f8152f5add 100644 --- a/src/video_core/renderer_vulkan/pipeline_helper.h +++ b/src/video_core/renderer_vulkan/pipeline_helper.h @@ -24,8 +24,21 @@ public: DescriptorLayoutBuilder(const Device& device_) : device{&device_} {} bool CanUsePushDescriptor() const noexcept { - return device->IsKhrPushDescriptorSupported() && - num_descriptors <= device->MaxPushDescriptors(); + if (!device->IsKhrPushDescriptorSupported()) { + return false; + } + if (num_descriptors > device->MaxPushDescriptors()) { + return false; + } + + // Qualcomm has slow push descriptor implementation - use conservative threshold + // Prefer descriptor pools for complex shaders (>8 descriptors) + const bool is_qualcomm = device->GetDriverID() == VK_DRIVER_ID_QUALCOMM_PROPRIETARY; + if (is_qualcomm && num_descriptors > 8) { + return false; + } + + return true; } // TODO(crueter): utilize layout binding flags diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 0532df05d8..15805f8480 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -341,6 +341,20 @@ PipelineCache::PipelineCache(Tegra::MaxwellDeviceMemoryManager& device_memory_, float_control.shaderSignedZeroInfNanPreserveFloat32 != VK_FALSE, .support_fp64_signed_zero_nan_preserve = float_control.shaderSignedZeroInfNanPreserveFloat64 != VK_FALSE, + +#ifdef ANDROID + // User-forced float behavior overrides (Eden Veil/Extensions) + .force_fp32_denorm_flush = Settings::values.shader_float_ftz.GetValue(), + .force_fp32_denorm_preserve = Settings::values.shader_float_denorm_preserve.GetValue(), + .force_fp32_rte_rounding = Settings::values.shader_float_rte.GetValue(), + .force_fp32_signed_zero_inf_nan = Settings::values.shader_float_signed_zero_inf_nan.GetValue(), +#else + .force_fp32_denorm_flush = false, + .force_fp32_denorm_preserve = false, + .force_fp32_rte_rounding = false, + .force_fp32_signed_zero_inf_nan = false, +#endif + .support_explicit_workgroup_layout = device.IsKhrWorkgroupMemoryExplicitLayoutSupported(), .support_vote = device.IsSubgroupFeatureSupported(VK_SUBGROUP_FEATURE_VOTE_BIT), .support_viewport_index_layer_non_geometry = diff --git a/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp b/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp index 80ff75e3b9..9499690a85 100644 --- a/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp @@ -8,6 +8,7 @@ #include +#include "common/logging/log.h" #include "video_core/renderer_vulkan/maxwell_to_vk.h" #include "video_core/renderer_vulkan/vk_render_pass_cache.h" #include "video_core/surface.h" @@ -19,6 +20,23 @@ namespace { using VideoCore::Surface::PixelFormat; using VideoCore::Surface::SurfaceType; + // Check if the driver uses tile-based deferred rendering (TBDR) architecture + // These GPUs benefit from optimized load/store operations to keep data on-chip + // + // TBDR GPUs supported in Eden: + // - Qualcomm Adreno (Snapdragon): Most Android flagship/midrange devices + // - ARM Mali: Android devices (Samsung Exynos, MediaTek, etc.) + // - Imagination PowerVR: Older iOS devices, some Android tablets + // - Samsung Xclipse: Galaxy S22+ (AMD RDNA2-based, but uses TBDR mode) + // - Broadcom VideoCore: Raspberry Pi + [[nodiscard]] constexpr bool IsTBDRGPU(VkDriverId driver_id) { + return driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY || + driver_id == VK_DRIVER_ID_ARM_PROPRIETARY || + driver_id == VK_DRIVER_ID_IMAGINATION_PROPRIETARY || + driver_id == VK_DRIVER_ID_SAMSUNG_PROPRIETARY || + driver_id == VK_DRIVER_ID_BROADCOM_PROPRIETARY; + } + constexpr SurfaceType GetSurfaceType(PixelFormat format) { switch (format) { // Depth formats @@ -44,23 +62,51 @@ using VideoCore::Surface::SurfaceType; } VkAttachmentDescription AttachmentDescription(const Device& device, PixelFormat format, - VkSampleCountFlagBits samples) { + VkSampleCountFlagBits samples, + bool tbdr_will_clear, + bool tbdr_discard_after) { using MaxwellToVK::SurfaceFormat; const SurfaceType surface_type = GetSurfaceType(format); const bool has_stencil = surface_type == SurfaceType::DepthStencil || surface_type == SurfaceType::Stencil; + // TBDR optimization: Apply hints only on tile-based GPUs + // Desktop GPUs (NVIDIA/AMD/Intel) ignore these hints and use standard behavior + const bool is_tbdr = IsTBDRGPU(device.GetDriverID()); + + // On TBDR: Use DONT_CARE if clear is guaranteed (avoids loading from main memory) + // On Desktop: Always LOAD to preserve existing content (safer default) + VkAttachmentLoadOp load_op = VK_ATTACHMENT_LOAD_OP_LOAD; + if (is_tbdr && tbdr_will_clear) { + load_op = VK_ATTACHMENT_LOAD_OP_DONT_CARE; + } + + // On TBDR: Use DONT_CARE if content won't be read (avoids storing to main memory) + // On Desktop: Always STORE (safer default) + VkAttachmentStoreOp store_op = VK_ATTACHMENT_STORE_OP_STORE; + if (is_tbdr && tbdr_discard_after) { + store_op = VK_ATTACHMENT_STORE_OP_DONT_CARE; + } + + // Stencil operations follow same logic + VkAttachmentLoadOp stencil_load_op = VK_ATTACHMENT_LOAD_OP_DONT_CARE; + VkAttachmentStoreOp stencil_store_op = VK_ATTACHMENT_STORE_OP_DONT_CARE; + if (has_stencil) { + stencil_load_op = (is_tbdr && tbdr_will_clear) ? VK_ATTACHMENT_LOAD_OP_DONT_CARE + : VK_ATTACHMENT_LOAD_OP_LOAD; + stencil_store_op = (is_tbdr && tbdr_discard_after) ? VK_ATTACHMENT_STORE_OP_DONT_CARE + : VK_ATTACHMENT_STORE_OP_STORE; + } + return { .flags = {}, .format = SurfaceFormat(device, FormatType::Optimal, true, format).format, .samples = samples, - .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, - .storeOp = VK_ATTACHMENT_STORE_OP_STORE, - .stencilLoadOp = has_stencil ? VK_ATTACHMENT_LOAD_OP_LOAD - : VK_ATTACHMENT_LOAD_OP_DONT_CARE, - .stencilStoreOp = has_stencil ? VK_ATTACHMENT_STORE_OP_STORE - : VK_ATTACHMENT_STORE_OP_DONT_CARE, + .loadOp = load_op, + .storeOp = store_op, + .stencilLoadOp = stencil_load_op, + .stencilStoreOp = stencil_store_op, .initialLayout = VK_IMAGE_LAYOUT_GENERAL, .finalLayout = VK_IMAGE_LAYOUT_GENERAL, }; @@ -75,6 +121,13 @@ VkRenderPass RenderPassCache::Get(const RenderPassKey& key) { if (!is_new) { return *pair->second; } + + const bool is_tbdr = IsTBDRGPU(device->GetDriverID()); + if (is_tbdr && (key.tbdr_will_clear || key.tbdr_discard_after)) { + LOG_DEBUG(Render_Vulkan, "Creating TBDR-optimized render pass (driver={}, clear={}, discard={})", + static_cast(device->GetDriverID()), key.tbdr_will_clear, key.tbdr_discard_after); + } + boost::container::static_vector descriptions; std::array references{}; u32 num_attachments{}; @@ -87,7 +140,8 @@ VkRenderPass RenderPassCache::Get(const RenderPassKey& key) { .layout = VK_IMAGE_LAYOUT_GENERAL, }; if (is_valid) { - descriptions.push_back(AttachmentDescription(*device, format, key.samples)); + descriptions.push_back(AttachmentDescription(*device, format, key.samples, + key.tbdr_will_clear, key.tbdr_discard_after)); num_attachments = static_cast(index + 1); ++num_colors; } @@ -99,7 +153,8 @@ VkRenderPass RenderPassCache::Get(const RenderPassKey& key) { .attachment = num_colors, .layout = VK_IMAGE_LAYOUT_GENERAL, }; - descriptions.push_back(AttachmentDescription(*device, key.depth_format, key.samples)); + descriptions.push_back(AttachmentDescription(*device, key.depth_format, key.samples, + key.tbdr_will_clear, key.tbdr_discard_after)); } const VkSubpassDescription subpass{ .flags = 0, diff --git a/src/video_core/renderer_vulkan/vk_render_pass_cache.h b/src/video_core/renderer_vulkan/vk_render_pass_cache.h index 91ad4bf577..76302b5117 100644 --- a/src/video_core/renderer_vulkan/vk_render_pass_cache.h +++ b/src/video_core/renderer_vulkan/vk_render_pass_cache.h @@ -17,6 +17,11 @@ struct RenderPassKey { std::array color_formats; VideoCore::Surface::PixelFormat depth_format; VkSampleCountFlagBits samples; + + // TBDR optimization hints - only affect tile-based GPUs (Qualcomm, ARM, Imagination) + // These flags indicate the expected usage pattern to optimize load/store operations + bool tbdr_will_clear{false}; // Attachment will be cleared with vkCmdClearAttachments + bool tbdr_discard_after{false}; // Attachment won't be read after render pass }; } // namespace Vulkan @@ -27,6 +32,8 @@ struct hash { [[nodiscard]] size_t operator()(const Vulkan::RenderPassKey& key) const noexcept { size_t value = static_cast(key.depth_format) << 48; value ^= static_cast(key.samples) << 52; + value ^= (static_cast(key.tbdr_will_clear) << 56); + value ^= (static_cast(key.tbdr_discard_after) << 57); for (size_t i = 0; i < key.color_formats.size(); ++i) { value ^= static_cast(key.color_formats[i]) << (i * 6); } diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 1b0619afad..51a4954df1 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -160,6 +160,45 @@ constexpr VkBorderColor ConvertBorderColor(const std::array& color) { }; } +/// Emergency fallback for MSAA with HDR formats: degrade to non-MSAA if driver doesn't support +/// shaderStorageImageMultisample (required for msaa_copy_pass) +[[nodiscard]] ImageInfo AdjustMSAAForHDRFormats(const Device& device, ImageInfo info) { + // Only apply emergency fallback if MSAA is requested + if (info.num_samples <= 1) { + return info; + } + + // Check if this is an HDR format that commonly fails with MSAA + const auto vk_format = MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, + false, info.format).format; + const bool is_hdr_format = vk_format == VK_FORMAT_B10G11R11_UFLOAT_PACK32 || + vk_format == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32; + + if (!is_hdr_format) { + return info; // Not an HDR format, no adjustment needed + } + + // If driver doesn't support shader storage image multisample, MSAACopyPass will fail + // Emergency fallback: degrade to non-MSAA (1 sample) to avoid texture corruption + if (!device.IsStorageImageMultisampleSupported()) { + LOG_ERROR(Render_Vulkan, + "EMERGENCY MSAA FALLBACK: Driver doesn't support shaderStorageImageMultisample. " + "Degrading HDR format {} from {}x MSAA to 1x (non-MSAA) to prevent texture corruption. " + "This will cause visual quality loss but prevents black textures.", + vk_format, info.num_samples); + + // Degrade to non-MSAA + // NOTE: We only change num_samples, NOT dimensions. The ImageInfo dimensions are already + // in "logical" space (full resolution), and MakeImageCreateInfo will handle the conversion + // to physical GPU dimensions based on num_samples automatically. + info.num_samples = 1; + + return info; + } + + return info; // Driver supports MSAA storage images, no adjustment needed +} + [[nodiscard]] vk::Image MakeImage(const Device& device, const MemoryAllocator& allocator, const ImageInfo& info, std::span view_formats) { if (info.type == ImageType::Buffer) { @@ -1510,10 +1549,20 @@ void TextureCacheRuntime::TickFrame() {} Image::Image(TextureCacheRuntime& runtime_, const ImageInfo& info_, GPUVAddr gpu_addr_, VAddr cpu_addr_) : VideoCommon::ImageBase(info_, gpu_addr_, cpu_addr_), scheduler{&runtime_.scheduler}, - runtime{&runtime_}, original_image(MakeImage(runtime_.device, runtime_.memory_allocator, info, - runtime->ViewFormats(info.format))), - aspect_mask(ImageAspectMask(info.format)) { - if (IsPixelFormatASTC(info.format) && !runtime->device.IsOptimalAstcSupported()) { + runtime{&runtime_} { + // CRITICAL: Adjust MSAA for HDR formats if driver doesn't support shaderStorageImageMultisample + // This prevents texture corruption by degrading to non-MSAA when msaa_copy_pass would fail + const ImageInfo adjusted_info = AdjustMSAAForHDRFormats(runtime_.device, info_); + + // Update our stored info with adjusted values (may have num_samples=1 now) + info = adjusted_info; + + // Create image with adjusted info + original_image = MakeImage(runtime_.device, runtime_.memory_allocator, adjusted_info, + runtime->ViewFormats(adjusted_info.format)); + aspect_mask = ImageAspectMask(adjusted_info.format); + + if (IsPixelFormatASTC(adjusted_info.format) && !runtime->device.IsOptimalAstcSupported()) { switch (Settings::values.accelerate_astc.GetValue()) { case Settings::AstcDecodeMode::Gpu: if (Settings::values.astc_recompression.GetValue() == @@ -1549,24 +1598,6 @@ Image::Image(TextureCacheRuntime& runtime_, const ImageInfo& info_, GPUVAddr gpu MakeStorageView(device, level, *original_image, VK_FORMAT_A8B8G8R8_UNORM_PACK32); } } - - // Proactive warning for problematic HDR format + MSAA combinations on Android - // These combinations commonly cause texture flickering/black screens across multiple game engines - // Note: MSAA is native Switch rendering technique, cannot be disabled by emulator - if (info.num_samples > 1) { - const auto vk_format = MaxwellToVK::SurfaceFormat(runtime->device, FormatType::Optimal, - false, info.format).format; - const bool is_hdr_format = vk_format == VK_FORMAT_B10G11R11_UFLOAT_PACK32 || - vk_format == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32; - - if (is_hdr_format) { - LOG_WARNING(Render_Vulkan, - "Creating MSAA image ({}x samples) with HDR format {} (Maxwell: {}). " - "Driver support may be limited on Android (Qualcomm < 800, Mali pre-maintenance5). " - "Format fallback to RGBA16F should prevent issues.", - info.num_samples, vk_format, info.format); - } - } } Image::Image(const VideoCommon::NullImageParams& params) : VideoCommon::ImageBase{params} {} diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 6dc958efe7..f4409ccd6a 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -539,9 +539,74 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR "Qualcomm drivers have a slow VK_KHR_push_descriptor implementation"); //RemoveExtension(extensions.push_descriptor, VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME); +#ifdef ANDROID + // Shader Float Controls handling for Qualcomm Adreno + // Default: DISABLED due to historical issues with binning precision causing visual glitches + const bool force_enable = Settings::values.shader_float_controls_force_enable.GetValue(); + + if (force_enable) { + // User explicitly enabled float controls - log detected capabilities and user config + LOG_INFO(Render_Vulkan, "Shader Float Controls FORCE ENABLED by user (Eden Veil/Extensions)"); + + // Log driver capabilities + const auto& fc = float_control; + LOG_INFO(Render_Vulkan, "Driver Float Controls Capabilities:"); + LOG_INFO(Render_Vulkan, " - Denorm Flush FP32: {}", fc.shaderDenormFlushToZeroFloat32 ? "YES" : "NO"); + LOG_INFO(Render_Vulkan, " - Denorm Preserve FP32: {}", fc.shaderDenormPreserveFloat32 ? "YES" : "NO"); + LOG_INFO(Render_Vulkan, " - RTE Rounding FP32: {}", fc.shaderRoundingModeRTEFloat32 ? "YES" : "NO"); + LOG_INFO(Render_Vulkan, " - Signed Zero/Inf/Nan FP32: {}", fc.shaderSignedZeroInfNanPreserveFloat32 ? "YES" : "NO"); + LOG_INFO(Render_Vulkan, " - Independence: {}", + fc.denormBehaviorIndependence == VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL ? "ALL" : "LIMITED"); + + // Log user selections + bool ftz = Settings::values.shader_float_ftz.GetValue(); + bool preserve = Settings::values.shader_float_denorm_preserve.GetValue(); + const bool rte = Settings::values.shader_float_rte.GetValue(); + const bool signed_zero = Settings::values.shader_float_signed_zero_inf_nan.GetValue(); + + // Validate mutually exclusive options + if (ftz && preserve) { + LOG_WARNING(Render_Vulkan, + "CONFLICT: FTZ and DenormPreserve are mutually exclusive!"); + LOG_WARNING(Render_Vulkan, + " -> DenormPreserve will take precedence (accuracy over speed)"); + ftz = false; // Preserve takes priority for correctness + } + + LOG_INFO(Render_Vulkan, "User Float Behavior Selection:"); + LOG_INFO(Render_Vulkan, " - Flush To Zero (FTZ): {}", ftz ? "ENABLED" : "disabled"); + LOG_INFO(Render_Vulkan, " - Denorm Preserve: {}", preserve ? "ENABLED" : "disabled"); + LOG_INFO(Render_Vulkan, " - Round To Even (RTE): {}", rte ? "ENABLED" : "disabled"); + LOG_INFO(Render_Vulkan, " - Signed Zero/Inf/Nan: {}", signed_zero ? "ENABLED" : "disabled"); + + // Analyze configuration vs Switch native behavior + const bool matches_switch = ftz && !preserve && rte && signed_zero; + if (matches_switch) { + LOG_INFO(Render_Vulkan, "Configuration MATCHES Switch/Maxwell native behavior (FTZ+RTE+SignedZero)"); + } else if (!ftz && !preserve && !rte && !signed_zero) { + LOG_WARNING(Render_Vulkan, "No float behaviors selected - using driver default (may cause glitches)"); + } else { + LOG_INFO(Render_Vulkan, "Configuration is CUSTOM - testing mode active"); + } + + // Extension stays enabled + LOG_INFO(Render_Vulkan, "VK_KHR_shader_float_controls: ENABLED"); + } else { + // Default behavior - disable float controls + LOG_WARNING(Render_Vulkan, + "Disabling shader float controls on Qualcomm (historical binning precision issues)"); + LOG_INFO(Render_Vulkan, + "To enable: Eden Veil -> Extensions -> Shader Float Controls (Force Enable)"); + RemoveExtension(extensions.shader_float_controls, VK_KHR_SHADER_FLOAT_CONTROLS_EXTENSION_NAME); + } +#else + // Non-Android: keep original behavior LOG_WARNING(Render_Vulkan, "Disabling shader float controls and 64-bit integer features on Qualcomm proprietary drivers"); RemoveExtension(extensions.shader_float_controls, VK_KHR_SHADER_FLOAT_CONTROLS_EXTENSION_NAME); +#endif + + // Int64 atomics - genuinely broken, always disable RemoveExtensionFeature(extensions.shader_atomic_int64, features.shader_atomic_int64, VK_KHR_SHADER_ATOMIC_INT64_EXTENSION_NAME); features.shader_atomic_int64.shaderBufferInt64Atomics = false; diff --git a/src/video_core/vulkan_common/vulkan_memory_allocator.cpp b/src/video_core/vulkan_common/vulkan_memory_allocator.cpp index 4cd3442d97..ef41132d41 100644 --- a/src/video_core/vulkan_common/vulkan_memory_allocator.cpp +++ b/src/video_core/vulkan_common/vulkan_memory_allocator.cpp @@ -226,11 +226,24 @@ namespace Vulkan { vk::Buffer MemoryAllocator::CreateBuffer(const VkBufferCreateInfo &ci, MemoryUsage usage) const { + // Qualcomm uses unified memory architecture - prefer DEVICE_LOCAL + HOST_VISIBLE + // for zero-copy access without staging buffers + const bool is_qualcomm = device.GetDriverID() == VK_DRIVER_ID_QUALCOMM_PROPRIETARY; + const bool prefer_unified = is_qualcomm && (usage == MemoryUsage::Upload || + usage == MemoryUsage::Download || + usage == MemoryUsage::Stream); + + VkMemoryPropertyFlags preferred_flags = MemoryUsagePreferredVmaFlags(usage); + if (prefer_unified) { + // Request DEVICE_LOCAL + HOST_VISIBLE for zero-copy on unified memory architectures + preferred_flags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT; + } + const VmaAllocationCreateInfo alloc_ci = { .flags = VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT | MemoryUsageVmaFlags(usage), .usage = MemoryUsageVma(usage), .requiredFlags = 0, - .preferredFlags = MemoryUsagePreferredVmaFlags(usage), + .preferredFlags = preferred_flags, .memoryTypeBits = usage == MemoryUsage::Stream ? 0u : valid_memory_types, .pool = VK_NULL_HANDLE, .pUserData = nullptr, @@ -245,6 +258,13 @@ namespace Vulkan { vk::Check(vmaCreateBuffer(allocator, &ci, &alloc_ci, &handle, &allocation, &alloc_info)); vmaGetAllocationMemoryProperties(allocator, allocation, &property_flags); + if (is_qualcomm && prefer_unified) { + const bool got_unified = (property_flags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) && + (property_flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT); + LOG_DEBUG(Render_Vulkan, "Qualcomm buffer allocation: usage={}, unified={}, flags=0x{:X}", + static_cast(usage), got_unified, property_flags); + } + u8 *data = reinterpret_cast(alloc_info.pMappedData); const std::span mapped_data = data ? std::span{data, ci.size} : std::span{}; const bool is_coherent = (property_flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) != 0; From 8a83cf0271362e88348beeaa753a61cd36d38a50 Mon Sep 17 00:00:00 2001 From: CamilleLaVey Date: Fri, 14 Nov 2025 20:31:14 -0400 Subject: [PATCH 33/34] [service, hle] Add defensive check in WriteBuffer --- src/core/hle/service/hle_ipc.cpp | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/src/core/hle/service/hle_ipc.cpp b/src/core/hle/service/hle_ipc.cpp index e0367e774c..7b8f318db4 100644 --- a/src/core/hle/service/hle_ipc.cpp +++ b/src/core/hle/service/hle_ipc.cpp @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + // SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later @@ -393,6 +396,24 @@ std::size_t HLERequestContext::WriteBuffer(const void* buffer, std::size_t size, const bool is_buffer_b{BufferDescriptorB().size() > buffer_index && BufferDescriptorB()[buffer_index].Size()}; const std::size_t buffer_size{GetWriteBufferSize(buffer_index)}; + + // Defensive check: if client didn't provide output buffer, log detailed error but don't crash + if (buffer_size == 0) { + LOG_ERROR(Core, + "WriteBuffer called but client provided NO output buffer! " + "Requested size: 0x{:X}, buffer_index: {}, is_buffer_b: {}, " + "BufferB count: {}, BufferC count: {}", + size, buffer_index, is_buffer_b, BufferDescriptorB().size(), + BufferDescriptorC().size()); + + // Log command context for debugging + LOG_ERROR(Core, "IPC Command: 0x{:X}, Type: {}", GetCommand(), + static_cast(GetCommandType())); + + // Return 0 instead of crashing - let service handle error + return 0; + } + if (size > buffer_size) { LOG_CRITICAL(Core, "size ({:016X}) is greater than buffer_size ({:016X})", size, buffer_size); From c168755c657631e0f6867c6d20583045fb9e4db9 Mon Sep 17 00:00:00 2001 From: lizzie Date: Sat, 15 Nov 2025 03:38:33 +0000 Subject: [PATCH 34/34] fix license --- src/video_core/renderer_vulkan/pipeline_helper.h | 3 +++ src/video_core/renderer_vulkan/vk_render_pass_cache.h | 3 +++ 2 files changed, 6 insertions(+) diff --git a/src/video_core/renderer_vulkan/pipeline_helper.h b/src/video_core/renderer_vulkan/pipeline_helper.h index f8152f5add..c6469e0925 100644 --- a/src/video_core/renderer_vulkan/pipeline_helper.h +++ b/src/video_core/renderer_vulkan/pipeline_helper.h @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + // SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later diff --git a/src/video_core/renderer_vulkan/vk_render_pass_cache.h b/src/video_core/renderer_vulkan/vk_render_pass_cache.h index 76302b5117..4375327ca4 100644 --- a/src/video_core/renderer_vulkan/vk_render_pass_cache.h +++ b/src/video_core/renderer_vulkan/vk_render_pass_cache.h @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + // SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later