From 56de1618b6ca643e61353c1dd24327afa9245839 Mon Sep 17 00:00:00 2001 From: CamilleLaVey Date: Sat, 15 Nov 2025 23:06:47 -0400 Subject: [PATCH] [vk, texture_cache, pipeline_cache] Refining handling of R32 --- .../backend/spirv/spirv_emit_context.cpp | 27 ++++++++++++++++--- src/shader_recompiler/profile.h | 2 ++ src/shader_recompiler/shader_info.h | 2 +- .../renderer_opengl/maxwell_to_gl.h | 1 - .../renderer_vulkan/maxwell_to_vk.cpp | 2 +- .../renderer_vulkan/vk_pipeline_cache.cpp | 6 +++++ .../renderer_vulkan/vk_texture_cache.cpp | 9 ++++--- src/video_core/shader_environment.cpp | 14 +++++++++- src/video_core/surface.h | 7 +++-- .../texture_cache/format_lookup_table.cpp | 5 +++- src/video_core/texture_cache/formatter.h | 6 +++-- .../vulkan_common/vulkan_device.cpp | 15 ++--------- 12 files changed, 64 insertions(+), 32 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index ccaa8da9e0..9b267fb883 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -33,11 +33,17 @@ Id ImageType(EmitContext& ctx, const TextureDescriptor& desc) { const Id type{ctx.F32[1]}; const bool depth{desc.is_depth}; const bool ms{desc.is_multisample}; + + // Mobile GPUs lack Sampled1D SPIR-V capability - emulate 1D as 2D with array layer + const bool emulate_1d = ctx.profile.needs_1d_texture_emulation; + switch (desc.type) { case TextureType::Color1D: - return ctx.TypeImage(type, spv::Dim::Dim1D, depth, false, false, 1, format); + return emulate_1d ? ctx.TypeImage(type, spv::Dim::Dim2D, depth, false, false, 1, format) + : ctx.TypeImage(type, spv::Dim::Dim1D, depth, false, false, 1, format); case TextureType::ColorArray1D: - return ctx.TypeImage(type, spv::Dim::Dim1D, depth, true, false, 1, format); + return emulate_1d ? ctx.TypeImage(type, spv::Dim::Dim2D, depth, true, false, 1, format) + : ctx.TypeImage(type, spv::Dim::Dim1D, depth, true, false, 1, format); case TextureType::Color2D: case TextureType::Color2DRect: return ctx.TypeImage(type, spv::Dim::Dim2D, depth, false, ms, 1, format); @@ -79,11 +85,15 @@ spv::ImageFormat GetImageFormat(ImageFormat format) { Id ImageType(EmitContext& ctx, const ImageDescriptor& desc, Id sampled_type) { const spv::ImageFormat format{GetImageFormat(desc.format)}; + const bool emulate_1d = ctx.profile.needs_1d_texture_emulation; + switch (desc.type) { case TextureType::Color1D: - return ctx.TypeImage(sampled_type, spv::Dim::Dim1D, false, false, false, 2, format); + return emulate_1d ? ctx.TypeImage(sampled_type, spv::Dim::Dim2D, false, false, false, 2, format) + : ctx.TypeImage(sampled_type, spv::Dim::Dim1D, false, false, false, 2, format); case TextureType::ColorArray1D: - return ctx.TypeImage(sampled_type, spv::Dim::Dim1D, false, true, false, 2, format); + return emulate_1d ? ctx.TypeImage(sampled_type, spv::Dim::Dim2D, false, true, false, 2, format) + : ctx.TypeImage(sampled_type, spv::Dim::Dim1D, false, true, false, 2, format); case TextureType::Color2D: return ctx.TypeImage(sampled_type, spv::Dim::Dim2D, false, false, false, 2, format); case TextureType::ColorArray2D: @@ -1605,6 +1615,15 @@ void EmitContext::DefineInputs(const IR::Program& program) { subgroup_mask_le = DefineInput(*this, U32[4], false, spv::BuiltIn::SubgroupLeMaskKHR); subgroup_mask_gt = DefineInput(*this, U32[4], false, spv::BuiltIn::SubgroupGtMaskKHR); subgroup_mask_ge = DefineInput(*this, U32[4], false, spv::BuiltIn::SubgroupGeMaskKHR); + + // Vulkan spec: Fragment shader Input variables with integer/float type must have Flat decoration + if (stage == Stage::Fragment) { + Decorate(subgroup_mask_eq, spv::Decoration::Flat); + Decorate(subgroup_mask_lt, spv::Decoration::Flat); + Decorate(subgroup_mask_le, spv::Decoration::Flat); + Decorate(subgroup_mask_gt, spv::Decoration::Flat); + Decorate(subgroup_mask_ge, spv::Decoration::Flat); + } } if (info.uses_fswzadd || info.uses_subgroup_invocation_id || info.uses_subgroup_shuffles || (profile.warp_size_potentially_larger_than_guest && diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index 6014221e8f..bfdfece595 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -95,6 +95,8 @@ struct Profile { bool ignore_nan_fp_comparisons{}; /// Some drivers have broken support for OpVectorExtractDynamic on subgroup mask inputs bool has_broken_spirv_subgroup_mask_vector_extract_dynamic{}; + /// Mobile GPUs lack Sampled1D capability - need to emulate 1D textures as 2D with height=1 + bool needs_1d_texture_emulation{}; u32 gl_max_compute_smem_size{}; diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index ed13e68209..97845b0520 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h @@ -130,7 +130,7 @@ enum class TexturePixelFormat { ASTC_2D_8X6_SRGB, ASTC_2D_6X5_UNORM, ASTC_2D_6X5_SRGB, - E5B9G9R9_FLOAT, + D32_FLOAT, D16_UNORM, X8_D24_UNORM, diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h index 5ea9e23780..27c5b93df1 100644 --- a/src/video_core/renderer_opengl/maxwell_to_gl.h +++ b/src/video_core/renderer_opengl/maxwell_to_gl.h @@ -113,7 +113,6 @@ constexpr std::array FORMAT_TAB {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR}, // ASTC_2D_8X6_SRGB {GL_COMPRESSED_RGBA_ASTC_6x5_KHR}, // ASTC_2D_6X5_UNORM {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR}, // ASTC_2D_6X5_SRGB - {GL_RGB9_E5, GL_RGB, GL_UNSIGNED_INT_5_9_9_9_REV}, // E5B9G9R9_FLOAT {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT}, // D32_FLOAT {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT}, // D16_UNORM {GL_DEPTH_COMPONENT24, GL_DEPTH_COMPONENT, GL_UNSIGNED_INT_24_8}, // X8_D24_UNORM diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp index 45bbc93d5d..3f4c648e99 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp @@ -214,7 +214,7 @@ struct FormatTuple { {VK_FORMAT_ASTC_8x6_SRGB_BLOCK}, // ASTC_2D_8X6_SRGB {VK_FORMAT_ASTC_6x5_UNORM_BLOCK}, // ASTC_2D_6X5_UNORM {VK_FORMAT_ASTC_6x5_SRGB_BLOCK}, // ASTC_2D_6X5_SRGB - {VK_FORMAT_E5B9G9R9_UFLOAT_PACK32}, // E5B9G9R9_FLOAT (SAMPLED_IMAGE only, no COLOR_ATTACHMENT) + {VK_FORMAT_B10G11R11_UFLOAT_PACK32}, // B10G11R11_FLOAT // Depth formats {VK_FORMAT_D32_SFLOAT, Attachable}, // D32_FLOAT diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index f6bc3d88f2..2e35febd5d 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -389,6 +389,12 @@ PipelineCache::PipelineCache(Tegra::MaxwellDeviceMemoryManager& device_memory_, .ignore_nan_fp_comparisons = false, .has_broken_spirv_subgroup_mask_vector_extract_dynamic = driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY, + .needs_1d_texture_emulation = + driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY || + driver_id == VK_DRIVER_ID_MESA_TURNIP || + driver_id == VK_DRIVER_ID_ARM_PROPRIETARY || + driver_id == VK_DRIVER_ID_BROADCOM_PROPRIETARY || + driver_id == VK_DRIVER_ID_IMAGINATION_PROPRIETARY, .has_broken_robust = device.IsNvidia() && device.GetNvidiaArch() <= NvidiaArchitecture::Arch_Pascal, .min_ssbo_alignment = device.GetStorageBufferAlignment(), diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 0745787945..bffbcfd5c9 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -2121,8 +2121,9 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI } const auto format_info = MaxwellToVK::SurfaceFormat(*device, FormatType::Optimal, true, format); - // This causes validation errors and undefined behavior (flickering, missing geometry) on certain games - // Reinterpret R32_UINT as R32_SFLOAT for sampled images to match shader expectations + // Workaround: Nintendo Switch games incorrectly use R32_UINT textures with float samplers + // This causes validation errors and undefined behavior (flickering, missing geometry) + // Reinterpret R32_UINT as R16_SFLOAT for sampled images (R32_SFLOAT lacks LINEAR filter support on Adreno) VkFormat view_format = format_info.format; if (view_format == VK_FORMAT_R32_UINT && !info.IsRenderTarget() && @@ -2130,8 +2131,8 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI // Only reinterpret if NOT used as storage image (storage requires matching types) const bool is_storage = (ImageUsageFlags(format_info, format) & VK_IMAGE_USAGE_STORAGE_BIT) != 0; if (!is_storage) { - view_format = VK_FORMAT_R32_SFLOAT; - LOG_DEBUG(Render_Vulkan, "Reinterpreting R32_UINT as R32_SFLOAT for sampled image compatibility"); + view_format = VK_FORMAT_R16_SFLOAT; + LOG_DEBUG(Render_Vulkan, "Reinterpreting R32_UINT as R16_SFLOAT for sampled image compatibility (LINEAR filter support)"); } } diff --git a/src/video_core/shader_environment.cpp b/src/video_core/shader_environment.cpp index de12d795c8..ca6dfc1721 100644 --- a/src/video_core/shader_environment.cpp +++ b/src/video_core/shader_environment.cpp @@ -277,7 +277,19 @@ std::optional GenericEnvironment::TryFindSize() { Tegra::Texture::TICEntry GenericEnvironment::ReadTextureInfo(GPUVAddr tic_addr, u32 tic_limit, bool via_header_index, u32 raw) { const auto handle{Tegra::Texture::TexturePair(raw, via_header_index)}; - ASSERT(handle.first <= tic_limit); + + // Some games (especially on updates) use invalid texture handles beyond tic_limit + // Clamp to limit instead of asserting to prevent crashes + if (handle.first > tic_limit) { + LOG_WARNING(HW_GPU, "Texture handle {} exceeds TIC limit {}, clamping to limit", + handle.first, tic_limit); + const u32 clamped_handle = std::min(handle.first, tic_limit); + const GPUVAddr descriptor_addr{tic_addr + clamped_handle * sizeof(Tegra::Texture::TICEntry)}; + Tegra::Texture::TICEntry entry; + gpu_memory->ReadBlock(descriptor_addr, &entry, sizeof(entry)); + return entry; + } + const GPUVAddr descriptor_addr{tic_addr + handle.first * sizeof(Tegra::Texture::TICEntry)}; Tegra::Texture::TICEntry entry; gpu_memory->ReadBlock(descriptor_addr, &entry, sizeof(entry)); diff --git a/src/video_core/surface.h b/src/video_core/surface.h index 4ccb24f27d..111d8134d7 100644 --- a/src/video_core/surface.h +++ b/src/video_core/surface.h @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + // SPDX-FileCopyrightText: 2014 Citra Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later @@ -108,7 +111,6 @@ enum class PixelFormat { ASTC_2D_8X6_SRGB, ASTC_2D_6X5_UNORM, ASTC_2D_6X5_SRGB, - E5B9G9R9_FLOAT, MaxColorFormat, @@ -249,7 +251,6 @@ constexpr std::array BLOCK_WIDTH_TABLE = {{ 8, // ASTC_2D_8X6_SRGB 6, // ASTC_2D_6X5_UNORM 6, // ASTC_2D_6X5_SRGB - 1, // E5B9G9R9_FLOAT 1, // D32_FLOAT 1, // D16_UNORM 1, // X8_D24_UNORM @@ -359,7 +360,6 @@ constexpr std::array BLOCK_HEIGHT_TABLE = {{ 6, // ASTC_2D_8X6_SRGB 5, // ASTC_2D_6X5_UNORM 5, // ASTC_2D_6X5_SRGB - 1, // E5B9G9R9_FLOAT 1, // D32_FLOAT 1, // D16_UNORM 1, // X8_D24_UNORM @@ -469,7 +469,6 @@ constexpr std::array BITS_PER_BLOCK_TABLE = {{ 128, // ASTC_2D_8X6_SRGB 128, // ASTC_2D_6X5_UNORM 128, // ASTC_2D_6X5_SRGB - 32, // E5B9G9R9_FLOAT 32, // D32_FLOAT 16, // D16_UNORM 32, // X8_D24_UNORM diff --git a/src/video_core/texture_cache/format_lookup_table.cpp b/src/video_core/texture_cache/format_lookup_table.cpp index 8c774f512c..47d2f39e47 100644 --- a/src/video_core/texture_cache/format_lookup_table.cpp +++ b/src/video_core/texture_cache/format_lookup_table.cpp @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + // SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later @@ -135,7 +138,7 @@ PixelFormat PixelFormatFromTextureInfo(TextureFormat format, ComponentType red, case Hash(TextureFormat::R32, SINT): return PixelFormat::R32_SINT; case Hash(TextureFormat::E5B9G9R9, FLOAT): - return PixelFormat::E5B9G9R9_FLOAT; + return PixelFormat::B10G11R11_FLOAT; case Hash(TextureFormat::Z32, FLOAT): return PixelFormat::D32_FLOAT; case Hash(TextureFormat::Z32, FLOAT, UINT, UINT, UINT, LINEAR): diff --git a/src/video_core/texture_cache/formatter.h b/src/video_core/texture_cache/formatter.h index 33c32645a2..b4068f4369 100644 --- a/src/video_core/texture_cache/formatter.h +++ b/src/video_core/texture_cache/formatter.h @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + // SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later @@ -205,8 +208,7 @@ struct fmt::formatter : fmt::formatter @@ -150,8 +140,7 @@ constexpr const VkFormat* GetFormatAlternatives(VkFormat format) { return Alternatives::VK_FORMAT_A4B4G4R4_UNORM_PACK16.data(); case VK_FORMAT_B10G11R11_UFLOAT_PACK32: return Alternatives::B10G11R11_UFLOAT_PACK32.data(); - case VK_FORMAT_E5B9G9R9_UFLOAT_PACK32: - return Alternatives::E5B9G9R9_UFLOAT_PACK32.data(); + default: return nullptr; }