Browse Source

[vk, texture_cache, pipeline_cache] Refining handling of R32

pull/3062/head
CamilleLaVey 1 month ago
committed by lizzie
parent
commit
56de1618b6
  1. 27
      src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
  2. 2
      src/shader_recompiler/profile.h
  3. 2
      src/shader_recompiler/shader_info.h
  4. 1
      src/video_core/renderer_opengl/maxwell_to_gl.h
  5. 2
      src/video_core/renderer_vulkan/maxwell_to_vk.cpp
  6. 6
      src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
  7. 9
      src/video_core/renderer_vulkan/vk_texture_cache.cpp
  8. 14
      src/video_core/shader_environment.cpp
  9. 7
      src/video_core/surface.h
  10. 5
      src/video_core/texture_cache/format_lookup_table.cpp
  11. 6
      src/video_core/texture_cache/formatter.h
  12. 15
      src/video_core/vulkan_common/vulkan_device.cpp

27
src/shader_recompiler/backend/spirv/spirv_emit_context.cpp

@ -33,11 +33,17 @@ Id ImageType(EmitContext& ctx, const TextureDescriptor& desc) {
const Id type{ctx.F32[1]};
const bool depth{desc.is_depth};
const bool ms{desc.is_multisample};
// Mobile GPUs lack Sampled1D SPIR-V capability - emulate 1D as 2D with array layer
const bool emulate_1d = ctx.profile.needs_1d_texture_emulation;
switch (desc.type) {
case TextureType::Color1D:
return ctx.TypeImage(type, spv::Dim::Dim1D, depth, false, false, 1, format);
return emulate_1d ? ctx.TypeImage(type, spv::Dim::Dim2D, depth, false, false, 1, format)
: ctx.TypeImage(type, spv::Dim::Dim1D, depth, false, false, 1, format);
case TextureType::ColorArray1D:
return ctx.TypeImage(type, spv::Dim::Dim1D, depth, true, false, 1, format);
return emulate_1d ? ctx.TypeImage(type, spv::Dim::Dim2D, depth, true, false, 1, format)
: ctx.TypeImage(type, spv::Dim::Dim1D, depth, true, false, 1, format);
case TextureType::Color2D:
case TextureType::Color2DRect:
return ctx.TypeImage(type, spv::Dim::Dim2D, depth, false, ms, 1, format);
@ -79,11 +85,15 @@ spv::ImageFormat GetImageFormat(ImageFormat format) {
Id ImageType(EmitContext& ctx, const ImageDescriptor& desc, Id sampled_type) {
const spv::ImageFormat format{GetImageFormat(desc.format)};
const bool emulate_1d = ctx.profile.needs_1d_texture_emulation;
switch (desc.type) {
case TextureType::Color1D:
return ctx.TypeImage(sampled_type, spv::Dim::Dim1D, false, false, false, 2, format);
return emulate_1d ? ctx.TypeImage(sampled_type, spv::Dim::Dim2D, false, false, false, 2, format)
: ctx.TypeImage(sampled_type, spv::Dim::Dim1D, false, false, false, 2, format);
case TextureType::ColorArray1D:
return ctx.TypeImage(sampled_type, spv::Dim::Dim1D, false, true, false, 2, format);
return emulate_1d ? ctx.TypeImage(sampled_type, spv::Dim::Dim2D, false, true, false, 2, format)
: ctx.TypeImage(sampled_type, spv::Dim::Dim1D, false, true, false, 2, format);
case TextureType::Color2D:
return ctx.TypeImage(sampled_type, spv::Dim::Dim2D, false, false, false, 2, format);
case TextureType::ColorArray2D:
@ -1605,6 +1615,15 @@ void EmitContext::DefineInputs(const IR::Program& program) {
subgroup_mask_le = DefineInput(*this, U32[4], false, spv::BuiltIn::SubgroupLeMaskKHR);
subgroup_mask_gt = DefineInput(*this, U32[4], false, spv::BuiltIn::SubgroupGtMaskKHR);
subgroup_mask_ge = DefineInput(*this, U32[4], false, spv::BuiltIn::SubgroupGeMaskKHR);
// Vulkan spec: Fragment shader Input variables with integer/float type must have Flat decoration
if (stage == Stage::Fragment) {
Decorate(subgroup_mask_eq, spv::Decoration::Flat);
Decorate(subgroup_mask_lt, spv::Decoration::Flat);
Decorate(subgroup_mask_le, spv::Decoration::Flat);
Decorate(subgroup_mask_gt, spv::Decoration::Flat);
Decorate(subgroup_mask_ge, spv::Decoration::Flat);
}
}
if (info.uses_fswzadd || info.uses_subgroup_invocation_id || info.uses_subgroup_shuffles ||
(profile.warp_size_potentially_larger_than_guest &&

2
src/shader_recompiler/profile.h

@ -95,6 +95,8 @@ struct Profile {
bool ignore_nan_fp_comparisons{};
/// Some drivers have broken support for OpVectorExtractDynamic on subgroup mask inputs
bool has_broken_spirv_subgroup_mask_vector_extract_dynamic{};
/// Mobile GPUs lack Sampled1D capability - need to emulate 1D textures as 2D with height=1
bool needs_1d_texture_emulation{};
u32 gl_max_compute_smem_size{};

2
src/shader_recompiler/shader_info.h

@ -130,7 +130,7 @@ enum class TexturePixelFormat {
ASTC_2D_8X6_SRGB,
ASTC_2D_6X5_UNORM,
ASTC_2D_6X5_SRGB,
E5B9G9R9_FLOAT,
D32_FLOAT,
D16_UNORM,
X8_D24_UNORM,

1
src/video_core/renderer_opengl/maxwell_to_gl.h

@ -113,7 +113,6 @@ constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> FORMAT_TAB
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR}, // ASTC_2D_8X6_SRGB
{GL_COMPRESSED_RGBA_ASTC_6x5_KHR}, // ASTC_2D_6X5_UNORM
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR}, // ASTC_2D_6X5_SRGB
{GL_RGB9_E5, GL_RGB, GL_UNSIGNED_INT_5_9_9_9_REV}, // E5B9G9R9_FLOAT
{GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT}, // D32_FLOAT
{GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT}, // D16_UNORM
{GL_DEPTH_COMPONENT24, GL_DEPTH_COMPONENT, GL_UNSIGNED_INT_24_8}, // X8_D24_UNORM

2
src/video_core/renderer_vulkan/maxwell_to_vk.cpp

@ -214,7 +214,7 @@ struct FormatTuple {
{VK_FORMAT_ASTC_8x6_SRGB_BLOCK}, // ASTC_2D_8X6_SRGB
{VK_FORMAT_ASTC_6x5_UNORM_BLOCK}, // ASTC_2D_6X5_UNORM
{VK_FORMAT_ASTC_6x5_SRGB_BLOCK}, // ASTC_2D_6X5_SRGB
{VK_FORMAT_E5B9G9R9_UFLOAT_PACK32}, // E5B9G9R9_FLOAT (SAMPLED_IMAGE only, no COLOR_ATTACHMENT)
{VK_FORMAT_B10G11R11_UFLOAT_PACK32}, // B10G11R11_FLOAT
// Depth formats
{VK_FORMAT_D32_SFLOAT, Attachable}, // D32_FLOAT

6
src/video_core/renderer_vulkan/vk_pipeline_cache.cpp

@ -389,6 +389,12 @@ PipelineCache::PipelineCache(Tegra::MaxwellDeviceMemoryManager& device_memory_,
.ignore_nan_fp_comparisons = false,
.has_broken_spirv_subgroup_mask_vector_extract_dynamic =
driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY,
.needs_1d_texture_emulation =
driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY ||
driver_id == VK_DRIVER_ID_MESA_TURNIP ||
driver_id == VK_DRIVER_ID_ARM_PROPRIETARY ||
driver_id == VK_DRIVER_ID_BROADCOM_PROPRIETARY ||
driver_id == VK_DRIVER_ID_IMAGINATION_PROPRIETARY,
.has_broken_robust =
device.IsNvidia() && device.GetNvidiaArch() <= NvidiaArchitecture::Arch_Pascal,
.min_ssbo_alignment = device.GetStorageBufferAlignment(),

9
src/video_core/renderer_vulkan/vk_texture_cache.cpp

@ -2121,8 +2121,9 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI
}
const auto format_info = MaxwellToVK::SurfaceFormat(*device, FormatType::Optimal, true, format);
// This causes validation errors and undefined behavior (flickering, missing geometry) on certain games
// Reinterpret R32_UINT as R32_SFLOAT for sampled images to match shader expectations
// Workaround: Nintendo Switch games incorrectly use R32_UINT textures with float samplers
// This causes validation errors and undefined behavior (flickering, missing geometry)
// Reinterpret R32_UINT as R16_SFLOAT for sampled images (R32_SFLOAT lacks LINEAR filter support on Adreno)
VkFormat view_format = format_info.format;
if (view_format == VK_FORMAT_R32_UINT &&
!info.IsRenderTarget() &&
@ -2130,8 +2131,8 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI
// Only reinterpret if NOT used as storage image (storage requires matching types)
const bool is_storage = (ImageUsageFlags(format_info, format) & VK_IMAGE_USAGE_STORAGE_BIT) != 0;
if (!is_storage) {
view_format = VK_FORMAT_R32_SFLOAT;
LOG_DEBUG(Render_Vulkan, "Reinterpreting R32_UINT as R32_SFLOAT for sampled image compatibility");
view_format = VK_FORMAT_R16_SFLOAT;
LOG_DEBUG(Render_Vulkan, "Reinterpreting R32_UINT as R16_SFLOAT for sampled image compatibility (LINEAR filter support)");
}
}

14
src/video_core/shader_environment.cpp

@ -277,7 +277,19 @@ std::optional<u64> GenericEnvironment::TryFindSize() {
Tegra::Texture::TICEntry GenericEnvironment::ReadTextureInfo(GPUVAddr tic_addr, u32 tic_limit,
bool via_header_index, u32 raw) {
const auto handle{Tegra::Texture::TexturePair(raw, via_header_index)};
ASSERT(handle.first <= tic_limit);
// Some games (especially on updates) use invalid texture handles beyond tic_limit
// Clamp to limit instead of asserting to prevent crashes
if (handle.first > tic_limit) {
LOG_WARNING(HW_GPU, "Texture handle {} exceeds TIC limit {}, clamping to limit",
handle.first, tic_limit);
const u32 clamped_handle = std::min(handle.first, tic_limit);
const GPUVAddr descriptor_addr{tic_addr + clamped_handle * sizeof(Tegra::Texture::TICEntry)};
Tegra::Texture::TICEntry entry;
gpu_memory->ReadBlock(descriptor_addr, &entry, sizeof(entry));
return entry;
}
const GPUVAddr descriptor_addr{tic_addr + handle.first * sizeof(Tegra::Texture::TICEntry)};
Tegra::Texture::TICEntry entry;
gpu_memory->ReadBlock(descriptor_addr, &entry, sizeof(entry));

7
src/video_core/surface.h

@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: 2014 Citra Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
@ -108,7 +111,6 @@ enum class PixelFormat {
ASTC_2D_8X6_SRGB,
ASTC_2D_6X5_UNORM,
ASTC_2D_6X5_SRGB,
E5B9G9R9_FLOAT,
MaxColorFormat,
@ -249,7 +251,6 @@ constexpr std::array<u8, MaxPixelFormat> BLOCK_WIDTH_TABLE = {{
8, // ASTC_2D_8X6_SRGB
6, // ASTC_2D_6X5_UNORM
6, // ASTC_2D_6X5_SRGB
1, // E5B9G9R9_FLOAT
1, // D32_FLOAT
1, // D16_UNORM
1, // X8_D24_UNORM
@ -359,7 +360,6 @@ constexpr std::array<u8, MaxPixelFormat> BLOCK_HEIGHT_TABLE = {{
6, // ASTC_2D_8X6_SRGB
5, // ASTC_2D_6X5_UNORM
5, // ASTC_2D_6X5_SRGB
1, // E5B9G9R9_FLOAT
1, // D32_FLOAT
1, // D16_UNORM
1, // X8_D24_UNORM
@ -469,7 +469,6 @@ constexpr std::array<u8, MaxPixelFormat> BITS_PER_BLOCK_TABLE = {{
128, // ASTC_2D_8X6_SRGB
128, // ASTC_2D_6X5_UNORM
128, // ASTC_2D_6X5_SRGB
32, // E5B9G9R9_FLOAT
32, // D32_FLOAT
16, // D16_UNORM
32, // X8_D24_UNORM

5
src/video_core/texture_cache/format_lookup_table.cpp

@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
@ -135,7 +138,7 @@ PixelFormat PixelFormatFromTextureInfo(TextureFormat format, ComponentType red,
case Hash(TextureFormat::R32, SINT):
return PixelFormat::R32_SINT;
case Hash(TextureFormat::E5B9G9R9, FLOAT):
return PixelFormat::E5B9G9R9_FLOAT;
return PixelFormat::B10G11R11_FLOAT;
case Hash(TextureFormat::Z32, FLOAT):
return PixelFormat::D32_FLOAT;
case Hash(TextureFormat::Z32, FLOAT, UINT, UINT, UINT, LINEAR):

6
src/video_core/texture_cache/formatter.h

@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
@ -205,8 +208,7 @@ struct fmt::formatter<VideoCore::Surface::PixelFormat> : fmt::formatter<fmt::str
return "ASTC_2D_6X5_UNORM";
case PixelFormat::ASTC_2D_6X5_SRGB:
return "ASTC_2D_6X5_SRGB";
case PixelFormat::E5B9G9R9_FLOAT:
return "E5B9G9R9_FLOAT";
case PixelFormat::D32_FLOAT:
return "D32_FLOAT";
case PixelFormat::D16_UNORM:

15
src/video_core/vulkan_common/vulkan_device.cpp

@ -1,5 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
@ -105,17 +106,6 @@ constexpr std::array B10G11R11_UFLOAT_PACK32{
VK_FORMAT_UNDEFINED,
};
// E5B9G9R9_UFLOAT (RGB9E5) - INVALID for COLOR_ATTACHMENT on Nintendo Switch
// Nintendo Switch hardware validation: NO COLOR_ATTACHMENT_BIT (only SAMPLED_IMAGE)
// Reference: vp_gpuinfo_nintendo_switch_v2_495_0_0_0 - Missing required attachment bits
// This format should NEVER be used as render target, only for texture sampling
constexpr std::array E5B9G9R9_UFLOAT_PACK32{
VK_FORMAT_B10G11R11_UFLOAT_PACK32, // Upgrade to proper HDR format with attachment support
VK_FORMAT_A8B8G8R8_UNORM_PACK32, // Fallback: RGBA8 LDR
VK_FORMAT_R16G16B16A16_SFLOAT, // Last resort: RGBA16F
VK_FORMAT_UNDEFINED,
};
} // namespace Alternatives
template <typename T>
@ -150,8 +140,7 @@ constexpr const VkFormat* GetFormatAlternatives(VkFormat format) {
return Alternatives::VK_FORMAT_A4B4G4R4_UNORM_PACK16.data();
case VK_FORMAT_B10G11R11_UFLOAT_PACK32:
return Alternatives::B10G11R11_UFLOAT_PACK32.data();
case VK_FORMAT_E5B9G9R9_UFLOAT_PACK32:
return Alternatives::E5B9G9R9_UFLOAT_PACK32.data();
default:
return nullptr;
}

Loading…
Cancel
Save