Browse Source

[vk, spir-v] Refining the path for Array 1D emulation and R32 Uint handling consistency

pull/3062/head
CamilleLaVey 1 month ago
committed by lizzie
parent
commit
e12c7f44cc
  1. 64
      src/shader_recompiler/backend/spirv/emit_spirv_image.cpp
  2. 14
      src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
  3. 2
      src/video_core/renderer_vulkan/renderer_vulkan.cpp
  4. 3
      src/video_core/renderer_vulkan/vk_blit_screen.cpp
  5. 19
      src/video_core/renderer_vulkan/vk_texture_cache.cpp

64
src/shader_recompiler/backend/spirv/emit_spirv_image.cpp

@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
@ -318,13 +321,23 @@ void AddOffsetToCoordinates(EmitContext& ctx, const IR::TextureInstInfo& info, I
return;
}
// Mobile GPUs: 1D textures emulated as 2D with height=1
const bool emulate_1d = ctx.profile.needs_1d_texture_emulation;
Id result_type{};
switch (info.type) {
case TextureType::Buffer:
case TextureType::Color1D: {
result_type = ctx.U32[1];
break;
}
case TextureType::Color1D:
if (emulate_1d) {
// Treat as 2D: offset needs Y component
offset = ctx.OpCompositeConstruct(ctx.U32[2], offset, ctx.u32_zero_value);
result_type = ctx.U32[2];
} else {
result_type = ctx.U32[1];
}
break;
case TextureType::ColorArray1D:
offset = ctx.OpCompositeConstruct(ctx.U32[2], offset, ctx.u32_zero_value);
[[fallthrough]];
@ -348,6 +361,29 @@ void AddOffsetToCoordinates(EmitContext& ctx, const IR::TextureInstInfo& info, I
}
coords = ctx.OpIAdd(result_type, coords, offset);
}
// Helper: Convert 1D coordinates to 2D when emulating 1D textures on mobile GPUs
[[nodiscard]] Id AdjustCoordinatesForEmulation(EmitContext& ctx, const IR::TextureInstInfo& info,
Id coords) {
if (!ctx.profile.needs_1d_texture_emulation) {
return coords;
}
switch (info.type) {
case TextureType::Color1D:
// Convert scalar → vec2(x, 0.0)
return ctx.OpCompositeConstruct(ctx.F32[2], coords, ctx.f32_zero_value);
case TextureType::ColorArray1D:
// Convert vec2(x, layer) → vec3(x, 0.0, layer)
// ColorArray1D coords are always vec2 in IR
const Id x = ctx.OpCompositeExtract(ctx.F32[1], coords, 0);
const Id layer = ctx.OpCompositeExtract(ctx.F32[1], coords, 1);
return ctx.OpCompositeConstruct(ctx.F32[3], x, ctx.f32_zero_value, layer);
default:
return coords;
}
}
} // Anonymous namespace
Id EmitBindlessImageSampleImplicitLod(EmitContext&) {
@ -449,6 +485,7 @@ Id EmitBoundImageWrite(EmitContext&) {
Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
Id bias_lc, const IR::Value& offset) {
const auto info{inst->Flags<IR::TextureInstInfo>()};
coords = AdjustCoordinatesForEmulation(ctx, info, coords);
if (ctx.stage == Stage::Fragment) {
const ImageOperands operands(ctx, info.has_bias != 0, false, info.has_lod_clamp != 0,
bias_lc, offset);
@ -470,6 +507,7 @@ Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value&
Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
Id lod, const IR::Value& offset) {
const auto info{inst->Flags<IR::TextureInstInfo>()};
coords = AdjustCoordinatesForEmulation(ctx, info, coords);
const ImageOperands operands(ctx, false, true, false, lod, offset);
return Emit(&EmitContext::OpImageSparseSampleExplicitLod,
&EmitContext::OpImageSampleExplicitLod, ctx, inst, ctx.F32[4],
@ -479,6 +517,7 @@ Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value&
Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index,
Id coords, Id dref, Id bias_lc, const IR::Value& offset) {
const auto info{inst->Flags<IR::TextureInstInfo>()};
coords = AdjustCoordinatesForEmulation(ctx, info, coords);
if (ctx.stage == Stage::Fragment) {
const ImageOperands operands(ctx, info.has_bias != 0, false, info.has_lod_clamp != 0,
bias_lc, offset);
@ -500,6 +539,7 @@ Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Va
Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index,
Id coords, Id dref, Id lod, const IR::Value& offset) {
const auto info{inst->Flags<IR::TextureInstInfo>()};
coords = AdjustCoordinatesForEmulation(ctx, info, coords);
const ImageOperands operands(ctx, false, true, false, lod, offset);
return Emit(&EmitContext::OpImageSparseSampleDrefExplicitLod,
&EmitContext::OpImageSampleDrefExplicitLod, ctx, inst, ctx.F32[1],
@ -509,6 +549,7 @@ Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Va
Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
const IR::Value& offset, const IR::Value& offset2) {
const auto info{inst->Flags<IR::TextureInstInfo>()};
coords = AdjustCoordinatesForEmulation(ctx, info, coords);
const ImageOperands operands(ctx, offset, offset2);
if (ctx.profile.need_gather_subpixel_offset) {
coords = ImageGatherSubpixelOffset(ctx, info, TextureImage(ctx, info, index), coords);
@ -521,6 +562,7 @@ Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id
Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
const IR::Value& offset, const IR::Value& offset2, Id dref) {
const auto info{inst->Flags<IR::TextureInstInfo>()};
coords = AdjustCoordinatesForEmulation(ctx, info, coords);
const ImageOperands operands(ctx, offset, offset2);
if (ctx.profile.need_gather_subpixel_offset) {
coords = ImageGatherSubpixelOffset(ctx, info, TextureImage(ctx, info, index), coords);
@ -533,6 +575,7 @@ Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index,
Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id offset,
Id lod, Id ms) {
const auto info{inst->Flags<IR::TextureInstInfo>()};
coords = AdjustCoordinatesForEmulation(ctx, info, coords);
AddOffsetToCoordinates(ctx, info, coords, offset);
if (info.type == TextureType::Buffer) {
lod = Id{};
@ -559,9 +602,20 @@ Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& i
return uses_lod ? ctx.OpImageQuerySizeLod(type, image, lod)
: ctx.OpImageQuerySize(type, image);
}};
// Mobile GPUs: 1D textures emulated as 2D, query returns vec2 instead of scalar
const bool emulate_1d = ctx.profile.needs_1d_texture_emulation;
switch (info.type) {
case TextureType::Color1D:
return ctx.OpCompositeConstruct(ctx.U32[4], query(ctx.U32[1]), zero, zero, mips());
if (emulate_1d) {
// Query as 2D, extract only X component for 1D size
const Id size_2d = query(ctx.U32[2]);
const Id width = ctx.OpCompositeExtract(ctx.U32[1], size_2d, 0);
return ctx.OpCompositeConstruct(ctx.U32[4], width, zero, zero, mips());
} else {
return ctx.OpCompositeConstruct(ctx.U32[4], query(ctx.U32[1]), zero, zero, mips());
}
case TextureType::ColorArray1D:
case TextureType::Color2D:
case TextureType::ColorCube:
@ -579,6 +633,7 @@ Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& i
Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords) {
const auto info{inst->Flags<IR::TextureInstInfo>()};
coords = AdjustCoordinatesForEmulation(ctx, info, coords);
const Id zero{ctx.f32_zero_value};
const Id sampler{Texture(ctx, info, index)};
return ctx.OpCompositeConstruct(ctx.F32[4], ctx.OpImageQueryLod(ctx.F32[2], sampler, coords),
@ -588,6 +643,7 @@ Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, I
Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
Id derivatives, const IR::Value& offset, Id lod_clamp) {
const auto info{inst->Flags<IR::TextureInstInfo>()};
coords = AdjustCoordinatesForEmulation(ctx, info, coords);
const auto operands = info.num_derivatives == 3
? ImageOperands(ctx, info.has_lod_clamp != 0, derivatives,
ctx.Def(offset), {}, lod_clamp)
@ -600,6 +656,7 @@ Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, I
Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords) {
const auto info{inst->Flags<IR::TextureInstInfo>()};
coords = AdjustCoordinatesForEmulation(ctx, info, coords);
if (info.image_format == ImageFormat::Typeless && !ctx.profile.support_typeless_image_loads) {
LOG_WARNING(Shader_SPIRV, "Typeless image read not supported by host");
return ctx.ConstantNull(ctx.U32[4]);
@ -616,6 +673,7 @@ Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id co
void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id color) {
const auto info{inst->Flags<IR::TextureInstInfo>()};
coords = AdjustCoordinatesForEmulation(ctx, info, coords);
const auto [image, is_integer] = Image(ctx, index, info);
if (!is_integer) {
color = ctx.OpBitcast(ctx.F32[4], color);

14
src/shader_recompiler/backend/spirv/spirv_emit_context.cpp

@ -37,6 +37,13 @@ Id ImageType(EmitContext& ctx, const TextureDescriptor& desc) {
// Mobile GPUs lack Sampled1D SPIR-V capability - emulate 1D as 2D with array layer
const bool emulate_1d = ctx.profile.needs_1d_texture_emulation;
// Debug log for 1D emulation
if (desc.type == TextureType::Color1D || desc.type == TextureType::ColorArray1D) {
LOG_WARNING(Shader_SPIRV, "ImageType(texture): Creating {} texture, emulate_1d={}",
desc.type == TextureType::Color1D ? "Color1D" : "ColorArray1D",
emulate_1d);
}
switch (desc.type) {
case TextureType::Color1D:
return emulate_1d ? ctx.TypeImage(type, spv::Dim::Dim2D, depth, false, false, 1, format)
@ -87,6 +94,13 @@ Id ImageType(EmitContext& ctx, const ImageDescriptor& desc, Id sampled_type) {
const spv::ImageFormat format{GetImageFormat(desc.format)};
const bool emulate_1d = ctx.profile.needs_1d_texture_emulation;
// Debug log for 1D emulation
if (desc.type == TextureType::Color1D || desc.type == TextureType::ColorArray1D) {
LOG_WARNING(Shader_SPIRV, "ImageType: Creating {} image, emulate_1d={}",
desc.type == TextureType::Color1D ? "Color1D" : "ColorArray1D",
emulate_1d);
}
switch (desc.type) {
case TextureType::Color1D:
return emulate_1d ? ctx.TypeImage(sampled_type, spv::Dim::Dim2D, false, false, false, 2, format)

2
src/video_core/renderer_vulkan/renderer_vulkan.cpp

@ -177,8 +177,6 @@ try
RendererVulkan::~RendererVulkan() {
scheduler.RegisterOnSubmit([] {});
// Acquire submit_mutex before WaitIdle to prevent simultaneous queue access
std::scoped_lock lock{scheduler.submit_mutex};
void(device.GetLogical().WaitIdle());
}

3
src/video_core/renderer_vulkan/vk_blit_screen.cpp

@ -30,8 +30,7 @@ BlitScreen::~BlitScreen() = default;
void BlitScreen::WaitIdle() {
present_manager.WaitPresent();
scheduler.Finish();
// Note: scheduler.Finish() already waits for GPU and synchronizes submit_mutex
// Calling device.WaitIdle() here causes threading errors (simultaneous queue access)
device.GetLogical().WaitIdle();
}
void BlitScreen::SetWindowAdaptPass() {

19
src/video_core/renderer_vulkan/vk_texture_cache.cpp

@ -2121,18 +2121,23 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI
}
const auto format_info = MaxwellToVK::SurfaceFormat(*device, FormatType::Optimal, true, format);
// Workaround: Nintendo Switch games incorrectly use R32_UINT textures with float samplers
// This causes validation errors and undefined behavior (flickering, missing geometry)
// Reinterpret R32_UINT as R16_SFLOAT for sampled images (R32_SFLOAT lacks LINEAR filter support on Adreno)
// Workaround: Some Switch games incorrectly use R32_UINT textures with float samplers
// causing flickering/missing geometry. However, glyph atlases and lookup tables
// CORRECTLY use R32_UINT for integer data - reinterpreting breaks text rendering.
// Conservative heuristic: Only reinterpret large textures (likely geometry/effects)
VkFormat view_format = format_info.format;
if (view_format == VK_FORMAT_R32_UINT &&
!info.IsRenderTarget() &&
(ImageUsageFlags(format_info, format) & VK_IMAGE_USAGE_SAMPLED_BIT)) {
// Only reinterpret if NOT used as storage image (storage requires matching types)
// Skip small textures (likely atlases, lookup tables, or integer data)
const bool is_likely_atlas = info.size.width <= 1024 || info.size.height <= 1024;
const bool is_storage = (ImageUsageFlags(format_info, format) & VK_IMAGE_USAGE_STORAGE_BIT) != 0;
if (!is_storage) {
view_format = VK_FORMAT_R16_SFLOAT;
LOG_DEBUG(Render_Vulkan, "Reinterpreting R32_UINT as R16_SFLOAT for sampled image compatibility (LINEAR filter support)");
// Only reinterpret large textures that are NOT storage and NOT likely atlases
if (!is_storage && !is_likely_atlas) {
view_format = VK_FORMAT_R32_SFLOAT;
LOG_DEBUG(Render_Vulkan, "Reinterpreting R32_UINT→R32_SFLOAT for {}x{} texture",
info.size.width, info.size.height);
}
}

Loading…
Cancel
Save