From 08dbacdf53790ea6892f7876fa4fe1e31fbe63ab Mon Sep 17 00:00:00 2001 From: CamilleLaVey Date: Sat, 29 Nov 2025 14:06:18 -0400 Subject: [PATCH] [vk, gl, spv] Opcode Promotion path emulation --- .../backend/spirv/emit_spirv.cpp | 2 +- .../backend/spirv/emit_spirv_image.cpp | 392 +++++++++++++++--- .../backend/spirv/spirv_emit_context.cpp | 7 +- .../backend/spirv/texture_helpers.h | 29 ++ src/shader_recompiler/host_translate_info.h | 1 + src/shader_recompiler/profile.h | 1 + .../renderer_opengl/gl_shader_cache.cpp | 2 + .../renderer_vulkan/vk_pipeline_cache.cpp | 2 + .../renderer_vulkan/vk_texture_cache.cpp | 22 +- .../vulkan_common/vulkan_device.cpp | 3 + src/video_core/vulkan_common/vulkan_device.h | 5 + 11 files changed, 391 insertions(+), 75 deletions(-) create mode 100644 src/shader_recompiler/backend/spirv/texture_helpers.h diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 4aa211089f..48083b6b1d 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -410,7 +410,7 @@ void SetupTransformFeedbackCapabilities(EmitContext& ctx, Id main_func) { } void SetupCapabilities(const Profile& profile, const Info& info, EmitContext& ctx) { - if (info.uses_sampled_1d) { + if (info.uses_sampled_1d && profile.support_sampled_1d) { ctx.AddCapability(spv::Capability::Sampled1D); } if (info.uses_sparse_residency) { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp index ddb4b7e32f..2cee0e8a1a 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp @@ -4,15 +4,162 @@ // SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#include #include #include "shader_recompiler/backend/spirv/emit_spirv.h" #include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" #include "shader_recompiler/backend/spirv/spirv_emit_context.h" +#include "shader_recompiler/backend/spirv/texture_helpers.h" #include "shader_recompiler/frontend/ir/modifiers.h" namespace Shader::Backend::SPIRV { namespace { + +TextureType GetEffectiveType(const EmitContext& ctx, TextureType type) { + return EffectiveTextureType(ctx.profile, type); +} + +bool HasLayerComponent(TextureType type) { + switch (type) { + case TextureType::ColorArray1D: + case TextureType::ColorArray2D: + case TextureType::ColorArrayCube: + return true; + default: + return false; + } +} + +u32 BaseDimension(TextureType type) { + switch (type) { + case TextureType::Color1D: + case TextureType::ColorArray1D: + return 1; + case TextureType::Color2D: + case TextureType::Color2DRect: + case TextureType::ColorArray2D: + case TextureType::ColorCube: + case TextureType::ColorArrayCube: + return 2; + case TextureType::Color3D: + return 3; + default: + return 0; + } +} + +bool IsFloatCoordType(IR::Type type) { + switch (type) { + case IR::Type::F32: + case IR::Type::F32x2: + case IR::Type::F32x3: + case IR::Type::F32x4: + return true; + case IR::Type::U32: + case IR::Type::U32x2: + case IR::Type::U32x3: + case IR::Type::U32x4: + return false; + default: + throw InvalidArgument("Unsupported coordinate type {}", type); + } +} + +u32 NumComponents(IR::Type type) { + switch (type) { + case IR::Type::F32: + case IR::Type::U32: + return 1; + case IR::Type::F32x2: + case IR::Type::U32x2: + return 2; + case IR::Type::F32x3: + case IR::Type::U32x3: + return 3; + case IR::Type::F32x4: + case IR::Type::U32x4: + return 4; + default: + throw InvalidArgument("Unsupported type for coordinate promotion {}", type); + } +} + +Id ExtractComponent(EmitContext& ctx, Id value, IR::Type type, u32 index, bool is_float) { + if (NumComponents(type) == 1) { + return value; + } + const Id scalar_type{is_float ? ctx.F32[1] : ctx.U32[1]}; + return ctx.OpCompositeExtract(scalar_type, value, index); +} + +Id PromoteCoordinate(EmitContext& ctx, IR::Inst* inst, const IR::TextureInstInfo& info, + Id coords) { + if (!Needs1DPromotion(ctx.profile, info.type)) { + return coords; + } + const TextureType effective_type{GetEffectiveType(ctx, info.type)}; + const bool has_layer{HasLayerComponent(effective_type)}; + const u32 target_base_dim{BaseDimension(effective_type)}; + if (target_base_dim == 0) { + return coords; + } + const IR::Type coord_type{inst->Arg(1).Type()}; + const bool is_float{IsFloatCoordType(coord_type)}; + const Id zero{is_float ? ctx.f32_zero_value : ctx.u32_zero_value}; + const u32 total_components{NumComponents(coord_type)}; + const u32 original_base_dim{total_components - (has_layer ? 1u : 0u)}; + + boost::container::small_vector components; + components.reserve(target_base_dim + (has_layer ? 1u : 0u)); + for (u32 i = 0; i < original_base_dim; ++i) { + components.push_back(ExtractComponent(ctx, coords, coord_type, i, is_float)); + } + if (components.empty()) { + components.push_back(coords); + } + while (components.size() < target_base_dim) { + components.push_back(zero); + } + if (has_layer) { + const u32 layer_index{total_components == 1 ? 0u : total_components - 1u}; + components.push_back(ExtractComponent(ctx, coords, coord_type, layer_index, is_float)); + } + const Id vector_type{is_float ? ctx.F32[components.size()] : ctx.U32[components.size()]}; + return ctx.OpCompositeConstruct(vector_type, std::span{components.data(), components.size()}); +} + +Id PromoteOffset(EmitContext& ctx, const IR::Value& offset, Id offset_id, + TextureType effective_type) { + const u32 required_components{BaseDimension(effective_type)}; + if (required_components <= 1 || offset.IsEmpty()) { + return offset_id; + } + const IR::Type offset_type{offset.Type()}; + const u32 existing_components{NumComponents(offset_type)}; + if (existing_components >= required_components) { + return offset_id; + } + boost::container::small_vector components; + components.reserve(required_components); + if (existing_components == 1) { + components.push_back(offset_id); + } else { + for (u32 i = 0; i < existing_components; ++i) { + components.push_back(ctx.OpCompositeExtract(ctx.S32[1], offset_id, i)); + } + } + while (components.size() < required_components) { + components.push_back(ctx.SConst(0)); + } + return ctx.OpCompositeConstruct(ctx.S32[required_components], + std::span{components.data(), components.size()}); +} + +u32 ExpectedDerivativeComponents(TextureType type) { + return BaseDimension(type); +} + class ImageOperands { public: [[maybe_unused]] static constexpr bool ImageSampleOffsetAllowed = false; @@ -20,8 +167,10 @@ public: [[maybe_unused]] static constexpr bool ImageFetchOffsetAllowed = false; [[maybe_unused]] static constexpr bool ImageGradientOffsetAllowed = false; - explicit ImageOperands(EmitContext& ctx, bool has_bias, bool has_lod, bool has_lod_clamp, - Id lod, const IR::Value& offset) { + explicit ImageOperands(EmitContext& ctx, TextureType type, bool promote, + bool has_bias, bool has_lod, bool has_lod_clamp, Id lod, + const IR::Value& offset) + : texture_type{type}, needs_promotion{promote} { if (has_bias) { const Id bias{has_lod_clamp ? ctx.OpCompositeExtract(ctx.F32[1], lod, 0) : lod}; Add(spv::ImageOperandsMask::Bias, bias); @@ -37,12 +186,11 @@ public: } } - explicit ImageOperands(EmitContext& ctx, const IR::Value& offset, const IR::Value& offset2) { + explicit ImageOperands(EmitContext& ctx, TextureType type, bool promote, + const IR::Value& offset, const IR::Value& offset2) + : texture_type{type}, needs_promotion{promote} { if (offset2.IsEmpty()) { - if (offset.IsEmpty()) { - return; - } - Add(spv::ImageOperandsMask::Offset, ctx.Def(offset)); + AddOffset(ctx, offset, ImageGatherOffsetAllowed); return; } const std::array values{offset.InstRecursive(), offset2.InstRecursive()}; @@ -72,8 +220,10 @@ public: } } - explicit ImageOperands(EmitContext& ctx, bool has_lod_clamp, Id derivatives, - u32 num_derivatives, const IR::Value& offset, Id lod_clamp) { + explicit ImageOperands(EmitContext& ctx, TextureType type, bool promote, bool has_lod_clamp, + Id derivatives, u32 num_derivatives, const IR::Value& offset, + Id lod_clamp) + : texture_type{type}, needs_promotion{promote} { if (!Sirit::ValidId(derivatives)) { throw LogicError("Derivatives must be present"); } @@ -83,10 +233,16 @@ public: deriv_x_accum.push_back(ctx.OpCompositeExtract(ctx.F32[1], derivatives, i * 2)); deriv_y_accum.push_back(ctx.OpCompositeExtract(ctx.F32[1], derivatives, i * 2 + 1)); } + const u32 expected_components{needs_promotion ? ExpectedDerivativeComponents(texture_type) + : num_derivatives}; + while (needs_promotion && deriv_x_accum.size() < expected_components) { + deriv_x_accum.push_back(ctx.f32_zero_value); + deriv_y_accum.push_back(ctx.f32_zero_value); + } const Id derivatives_X{ctx.OpCompositeConstruct( - ctx.F32[num_derivatives], std::span{deriv_x_accum.data(), deriv_x_accum.size()})}; + ctx.F32[expected_components], std::span{deriv_x_accum.data(), deriv_x_accum.size()})}; const Id derivatives_Y{ctx.OpCompositeConstruct( - ctx.F32[num_derivatives], std::span{deriv_y_accum.data(), deriv_y_accum.size()})}; + ctx.F32[expected_components], std::span{deriv_y_accum.data(), deriv_y_accum.size()})}; Add(spv::ImageOperandsMask::Grad, derivatives_X, derivatives_Y); AddOffset(ctx, offset, ImageGradientOffsetAllowed); if (has_lod_clamp) { @@ -94,8 +250,10 @@ public: } } - explicit ImageOperands(EmitContext& ctx, bool has_lod_clamp, Id derivatives_1, Id derivatives_2, - const IR::Value& offset, Id lod_clamp) { + explicit ImageOperands(EmitContext& ctx, TextureType type, bool promote, bool has_lod_clamp, + Id derivatives_1, Id derivatives_2, const IR::Value& offset, + Id lod_clamp) + : texture_type{type}, needs_promotion{promote} { if (!Sirit::ValidId(derivatives_1) || !Sirit::ValidId(derivatives_2)) { throw LogicError("Derivatives must be present"); } @@ -137,38 +295,62 @@ private: if (offset.IsEmpty()) { return; } + const u32 required_components{BaseDimension(texture_type)}; + const bool promote_offset{needs_promotion && required_components > 1}; + + auto build_const_offset{[&](const boost::container::small_vector& values) -> Id { + switch (values.size()) { + case 1: + return ctx.SConst(values[0]); + case 2: + return ctx.SConst(values[0], values[1]); + case 3: + return ctx.SConst(values[0], values[1], values[2]); + default: + throw LogicError("Unsupported constant offset component count {}", + values.size()); + } + }}; + + auto pad_components{[&](boost::container::small_vector& components) { + while (promote_offset && components.size() < required_components) { + components.push_back(0); + } + }}; + if (offset.IsImmediate()) { - Add(spv::ImageOperandsMask::ConstOffset, ctx.SConst(static_cast(offset.U32()))); + boost::container::small_vector components{ + static_cast(offset.U32())}; + pad_components(components); + Add(spv::ImageOperandsMask::ConstOffset, build_const_offset(components)); return; } IR::Inst* const inst{offset.InstRecursive()}; if (inst->AreAllArgsImmediates()) { switch (inst->GetOpcode()) { case IR::Opcode::CompositeConstructU32x2: - Add(spv::ImageOperandsMask::ConstOffset, - ctx.SConst(static_cast(inst->Arg(0).U32()), - static_cast(inst->Arg(1).U32()))); - return; case IR::Opcode::CompositeConstructU32x3: - Add(spv::ImageOperandsMask::ConstOffset, - ctx.SConst(static_cast(inst->Arg(0).U32()), - static_cast(inst->Arg(1).U32()), - static_cast(inst->Arg(2).U32()))); - return; - case IR::Opcode::CompositeConstructU32x4: - Add(spv::ImageOperandsMask::ConstOffset, - ctx.SConst(static_cast(inst->Arg(0).U32()), - static_cast(inst->Arg(1).U32()), - static_cast(inst->Arg(2).U32()), - static_cast(inst->Arg(3).U32()))); + case IR::Opcode::CompositeConstructU32x4: { + boost::container::small_vector components; + for (u32 i = 0; i < inst->NumArgs(); ++i) { + components.push_back(static_cast(inst->Arg(i).U32())); + } + pad_components(components); + Add(spv::ImageOperandsMask::ConstOffset, build_const_offset(components)); return; + } default: break; } } - if (runtime_offset_allowed) { - Add(spv::ImageOperandsMask::Offset, ctx.Def(offset)); + if (!runtime_offset_allowed) { + return; + } + Id offset_id{ctx.Def(offset)}; + if (promote_offset) { + offset_id = PromoteOffset(ctx, offset, offset_id, texture_type); } + Add(spv::ImageOperandsMask::Offset, offset_id); } void Add(spv::ImageOperandsMask new_mask, Id value) { @@ -184,6 +366,8 @@ private: operands.push_back(value_2); } + TextureType texture_type{TextureType::Color2D}; + bool needs_promotion{}; boost::container::static_vector operands; spv::ImageOperandsMask mask{}; }; @@ -326,8 +510,7 @@ Id BitTest(EmitContext& ctx, Id mask, Id bit) { return ctx.OpINotEqual(ctx.U1, bit_value, ctx.u32_zero_value); } -Id ImageGatherSubpixelOffset(EmitContext& ctx, const IR::TextureInstInfo& info, Id texture, - Id coords) { +Id ImageGatherSubpixelOffset(EmitContext& ctx, TextureType type, Id texture, Id coords) { // Apply a subpixel offset of 1/512 the texel size of the texture to ensure same rounding on // AMD hardware as on Maxwell or other Nvidia architectures. const auto calculate_coords{[&](size_t dim) { @@ -338,7 +521,7 @@ Id ImageGatherSubpixelOffset(EmitContext& ctx, const IR::TextureInstInfo& info, offset = ctx.OpFDiv(ctx.F32[dim], offset, ctx.OpConvertUToF(ctx.F32[dim], image_size)); return ctx.OpFAdd(ctx.F32[dim], coords, offset); }}; - switch (info.type) { + switch (type) { case TextureType::Color2D: case TextureType::Color2DRect: return calculate_coords(2); @@ -350,14 +533,24 @@ Id ImageGatherSubpixelOffset(EmitContext& ctx, const IR::TextureInstInfo& info, } } -void AddOffsetToCoordinates(EmitContext& ctx, const IR::TextureInstInfo& info, Id& coords, +void AddOffsetToCoordinates(EmitContext& ctx, TextureType type, bool promoted_from_1d, Id& coords, Id offset) { if (!Sirit::ValidId(offset)) { return; } + auto PadScalarOffset = [&](u32 components) { + boost::container::static_vector elems; + elems.push_back(offset); + while (elems.size() < components) { + elems.push_back(ctx.u32_zero_value); + } + offset = ctx.OpCompositeConstruct(ctx.U32[components], + std::span{elems.data(), elems.size()}); + }; + Id result_type{}; - switch (info.type) { + switch (type) { case TextureType::Buffer: case TextureType::Color1D: { result_type = ctx.U32[1]; @@ -368,13 +561,21 @@ void AddOffsetToCoordinates(EmitContext& ctx, const IR::TextureInstInfo& info, I [[fallthrough]]; case TextureType::Color2D: case TextureType::Color2DRect: { + if (promoted_from_1d) { + PadScalarOffset(2); + } result_type = ctx.U32[2]; break; } case TextureType::ColorArray2D: - offset = ctx.OpCompositeConstruct(ctx.U32[3], ctx.OpCompositeExtract(ctx.U32[1], coords, 0), - ctx.OpCompositeExtract(ctx.U32[1], coords, 1), - ctx.u32_zero_value); + if (promoted_from_1d) { + PadScalarOffset(3); + } else { + offset = ctx.OpCompositeConstruct(ctx.U32[3], + ctx.OpCompositeExtract(ctx.U32[1], coords, 0), + ctx.OpCompositeExtract(ctx.U32[1], coords, 1), + ctx.u32_zero_value); + } [[fallthrough]]; case TextureType::Color3D: { result_type = ctx.U32[3]; @@ -488,12 +689,15 @@ Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& Id bias_lc, const IR::Value& offset) { const auto info{inst->Flags()}; const TextureDefinition& def{ctx.textures.at(info.descriptor_index)}; + const TextureType effective_type{GetEffectiveType(ctx, info.type)}; + const bool needs_promotion{Needs1DPromotion(ctx.profile, info.type)}; const Id color_type{TextureColorResultType(ctx, def)}; const Id texture{Texture(ctx, info, index)}; + coords = PromoteCoordinate(ctx, inst, info, coords); Id color{}; if (ctx.stage == Stage::Fragment) { - const ImageOperands operands(ctx, info.has_bias != 0, false, info.has_lod_clamp != 0, - bias_lc, offset); + const ImageOperands operands(ctx, effective_type, needs_promotion, info.has_bias != 0, + false, info.has_lod_clamp != 0, bias_lc, offset); color = Emit(&EmitContext::OpImageSparseSampleImplicitLod, &EmitContext::OpImageSampleImplicitLod, ctx, inst, color_type, texture, coords, operands.MaskOptional(), operands.Span()); @@ -502,7 +706,8 @@ Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& // if the lod was explicitly zero. This may change on Turing with implicit compute // derivatives const Id lod{ctx.Const(0.0f)}; - const ImageOperands operands(ctx, false, true, info.has_lod_clamp != 0, lod, offset); + const ImageOperands operands(ctx, effective_type, needs_promotion, false, true, + info.has_lod_clamp != 0, lod, offset); color = Emit(&EmitContext::OpImageSparseSampleExplicitLod, &EmitContext::OpImageSampleExplicitLod, ctx, inst, color_type, texture, coords, operands.Mask(), operands.Span()); @@ -514,8 +719,12 @@ Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& Id lod, const IR::Value& offset) { const auto info{inst->Flags()}; const TextureDefinition& def{ctx.textures.at(info.descriptor_index)}; + const TextureType effective_type{GetEffectiveType(ctx, info.type)}; + const bool needs_promotion{Needs1DPromotion(ctx.profile, info.type)}; const Id color_type{TextureColorResultType(ctx, def)}; - const ImageOperands operands(ctx, false, true, false, lod, offset); + coords = PromoteCoordinate(ctx, inst, info, coords); + const ImageOperands operands(ctx, effective_type, needs_promotion, false, true, false, lod, + offset); const Id color{Emit(&EmitContext::OpImageSparseSampleExplicitLod, &EmitContext::OpImageSampleExplicitLod, ctx, inst, color_type, Texture(ctx, info, index), coords, operands.Mask(), operands.Span())}; @@ -525,9 +734,12 @@ Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id dref, Id bias_lc, const IR::Value& offset) { const auto info{inst->Flags()}; + const TextureType effective_type{GetEffectiveType(ctx, info.type)}; + const bool needs_promotion{Needs1DPromotion(ctx.profile, info.type)}; + coords = PromoteCoordinate(ctx, inst, info, coords); if (ctx.stage == Stage::Fragment) { - const ImageOperands operands(ctx, info.has_bias != 0, false, info.has_lod_clamp != 0, - bias_lc, offset); + const ImageOperands operands(ctx, effective_type, needs_promotion, info.has_bias != 0, + false, info.has_lod_clamp != 0, bias_lc, offset); return Emit(&EmitContext::OpImageSparseSampleDrefImplicitLod, &EmitContext::OpImageSampleDrefImplicitLod, ctx, inst, ctx.F32[1], Texture(ctx, info, index), coords, dref, operands.MaskOptional(), @@ -536,7 +748,8 @@ Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Va // Implicit lods in compute behave on hardware as if sampling from LOD 0. // This check is to ensure all drivers behave this way. const Id lod{ctx.Const(0.0f)}; - const ImageOperands operands(ctx, false, true, false, lod, offset); + const ImageOperands operands(ctx, effective_type, needs_promotion, false, true, false, + lod, offset); return Emit(&EmitContext::OpImageSparseSampleDrefExplicitLod, &EmitContext::OpImageSampleDrefExplicitLod, ctx, inst, ctx.F32[1], Texture(ctx, info, index), coords, dref, operands.Mask(), operands.Span()); @@ -546,7 +759,11 @@ Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Va Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id dref, Id lod, const IR::Value& offset) { const auto info{inst->Flags()}; - const ImageOperands operands(ctx, false, true, false, lod, offset); + const TextureType effective_type{GetEffectiveType(ctx, info.type)}; + const bool needs_promotion{Needs1DPromotion(ctx.profile, info.type)}; + coords = PromoteCoordinate(ctx, inst, info, coords); + const ImageOperands operands(ctx, effective_type, needs_promotion, false, true, false, lod, + offset); return Emit(&EmitContext::OpImageSparseSampleDrefExplicitLod, &EmitContext::OpImageSampleDrefExplicitLod, ctx, inst, ctx.F32[1], Texture(ctx, info, index), coords, dref, operands.Mask(), operands.Span()); @@ -556,11 +773,15 @@ Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id const IR::Value& offset, const IR::Value& offset2) { const auto info{inst->Flags()}; const TextureDefinition& def{ctx.textures.at(info.descriptor_index)}; + const TextureType effective_type{GetEffectiveType(ctx, info.type)}; + const bool needs_promotion{Needs1DPromotion(ctx.profile, info.type)}; const Id color_type{TextureColorResultType(ctx, def)}; - const ImageOperands operands(ctx, offset, offset2); + coords = PromoteCoordinate(ctx, inst, info, coords); + const ImageOperands operands(ctx, effective_type, needs_promotion, offset, offset2); const Id texture{Texture(ctx, info, index)}; if (ctx.profile.need_gather_subpixel_offset) { - coords = ImageGatherSubpixelOffset(ctx, info, TextureImage(ctx, info, index), coords); + coords = ImageGatherSubpixelOffset(ctx, effective_type, TextureImage(ctx, info, index), + coords); } const Id color{ Emit(&EmitContext::OpImageSparseGather, &EmitContext::OpImageGather, ctx, inst, color_type, @@ -572,9 +793,13 @@ Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, const IR::Value& offset, const IR::Value& offset2, Id dref) { const auto info{inst->Flags()}; - const ImageOperands operands(ctx, offset, offset2); + const TextureType effective_type{GetEffectiveType(ctx, info.type)}; + const bool needs_promotion{Needs1DPromotion(ctx.profile, info.type)}; + coords = PromoteCoordinate(ctx, inst, info, coords); + const ImageOperands operands(ctx, effective_type, needs_promotion, offset, offset2); if (ctx.profile.need_gather_subpixel_offset) { - coords = ImageGatherSubpixelOffset(ctx, info, TextureImage(ctx, info, index), coords); + coords = ImageGatherSubpixelOffset(ctx, effective_type, TextureImage(ctx, info, index), + coords); } return Emit(&EmitContext::OpImageSparseDrefGather, &EmitContext::OpImageDrefGather, ctx, inst, ctx.F32[4], Texture(ctx, info, index), coords, dref, operands.MaskOptional(), @@ -586,8 +811,11 @@ Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id c const auto info{inst->Flags()}; const TextureDefinition* def = info.type == TextureType::Buffer ? nullptr : &ctx.textures.at(info.descriptor_index); + const TextureType effective_type{GetEffectiveType(ctx, info.type)}; + const bool needs_promotion{Needs1DPromotion(ctx.profile, info.type)}; const Id result_type{def ? TextureColorResultType(ctx, *def) : ctx.F32[4]}; - AddOffsetToCoordinates(ctx, info, coords, offset); + coords = PromoteCoordinate(ctx, inst, info, coords); + AddOffsetToCoordinates(ctx, effective_type, needs_promotion, coords, offset); if (info.type == TextureType::Buffer) { lod = Id{}; } @@ -608,30 +836,54 @@ Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id c Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id lod, const IR::Value& skip_mips_val) { const auto info{inst->Flags()}; + const TextureType effective_type{GetEffectiveType(ctx, info.type)}; + const bool needs_promotion{Needs1DPromotion(ctx.profile, info.type)}; const Id image{TextureImage(ctx, info, index)}; const Id zero{ctx.u32_zero_value}; const bool skip_mips{skip_mips_val.U1()}; const auto mips{[&] { return skip_mips ? zero : ctx.OpImageQueryLevels(ctx.U32[1], image); }}; const bool is_msaa{IsTextureMsaa(ctx, info)}; const bool uses_lod{!is_msaa && info.type != TextureType::Buffer}; - const auto query{[&](Id type) { - return uses_lod ? ctx.OpImageQuerySizeLod(type, image, lod) - : ctx.OpImageQuerySize(type, image); - }}; + + const u32 query_components = effective_type == TextureType::Buffer + ? 1u + : BaseDimension(effective_type) + + (HasLayerComponent(effective_type) ? 1u : 0u); + const Id query_type{ctx.U32[std::max(1u, query_components)]}; + const Id size = uses_lod ? ctx.OpImageQuerySizeLod(query_type, image, lod) + : ctx.OpImageQuerySize(query_type, image); + const auto extract = [&](u32 index) -> Id { + if (query_components == 1) { + return size; + } + return ctx.OpCompositeExtract(ctx.U32[1], size, index); + }; + switch (info.type) { case TextureType::Color1D: - return ctx.OpCompositeConstruct(ctx.U32[4], query(ctx.U32[1]), zero, zero, mips()); - case TextureType::ColorArray1D: + return ctx.OpCompositeConstruct(ctx.U32[4], extract(0), zero, zero, mips()); + case TextureType::ColorArray1D: { + const Id width{extract(0)}; + const Id layers{needs_promotion ? extract(2) : extract(1)}; + return ctx.OpCompositeConstruct(ctx.U32[4], width, layers, zero, mips()); + } case TextureType::Color2D: case TextureType::ColorCube: - case TextureType::Color2DRect: - return ctx.OpCompositeConstruct(ctx.U32[4], query(ctx.U32[2]), zero, mips()); + case TextureType::Color2DRect: { + const Id width{extract(0)}; + const Id height{extract(1)}; + return ctx.OpCompositeConstruct(ctx.U32[4], width, height, zero, mips()); + } case TextureType::ColorArray2D: case TextureType::Color3D: - case TextureType::ColorArrayCube: - return ctx.OpCompositeConstruct(ctx.U32[4], query(ctx.U32[3]), mips()); + case TextureType::ColorArrayCube: { + const Id width{extract(0)}; + const Id height{extract(1)}; + const Id depth{extract(2)}; + return ctx.OpCompositeConstruct(ctx.U32[4], width, height, depth, mips()); + } case TextureType::Buffer: - return ctx.OpCompositeConstruct(ctx.U32[4], query(ctx.U32[1]), zero, zero, mips()); + return ctx.OpCompositeConstruct(ctx.U32[4], extract(0), zero, zero, mips()); } throw LogicError("Unspecified image type {}", info.type.Value()); } @@ -640,6 +892,7 @@ Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, I const auto info{inst->Flags()}; const Id zero{ctx.f32_zero_value}; const Id sampler{Texture(ctx, info, index)}; + coords = PromoteCoordinate(ctx, inst, info, coords); return ctx.OpCompositeConstruct(ctx.F32[4], ctx.OpImageQueryLod(ctx.F32[2], sampler, coords), zero, zero); } @@ -648,11 +901,16 @@ Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, I Id derivatives, const IR::Value& offset, Id lod_clamp) { const auto info{inst->Flags()}; const TextureDefinition& def{ctx.textures.at(info.descriptor_index)}; + const TextureType effective_type{GetEffectiveType(ctx, info.type)}; + const bool needs_promotion{Needs1DPromotion(ctx.profile, info.type)}; const Id color_type{TextureColorResultType(ctx, def)}; + coords = PromoteCoordinate(ctx, inst, info, coords); const auto operands = info.num_derivatives == 3 - ? ImageOperands(ctx, info.has_lod_clamp != 0, derivatives, - ctx.Def(offset), {}, lod_clamp) - : ImageOperands(ctx, info.has_lod_clamp != 0, derivatives, + ? ImageOperands(ctx, effective_type, needs_promotion, + info.has_lod_clamp != 0, derivatives, + ctx.Def(offset), IR::Value{}, lod_clamp) + : ImageOperands(ctx, effective_type, needs_promotion, + info.has_lod_clamp != 0, derivatives, info.num_derivatives, offset, lod_clamp); const Id color{Emit(&EmitContext::OpImageSparseSampleExplicitLod, &EmitContext::OpImageSampleExplicitLod, ctx, inst, color_type, @@ -666,6 +924,7 @@ Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id co LOG_WARNING(Shader_SPIRV, "Typeless image read not supported by host"); return ctx.ConstantNull(ctx.U32[4]); } + coords = PromoteCoordinate(ctx, inst, info, coords); const auto [image, is_integer] = Image(ctx, index, info); const Id result_type{is_integer ? ctx.U32[4] : ctx.F32[4]}; Id color{Emit(&EmitContext::OpImageSparseRead, &EmitContext::OpImageRead, ctx, inst, @@ -678,6 +937,7 @@ Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id co void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id color) { const auto info{inst->Flags()}; + coords = PromoteCoordinate(ctx, inst, info, coords); const auto [image, is_integer] = Image(ctx, index, info); if (!is_integer) { color = ctx.OpBitcast(ctx.F32[4], color); diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index 8d4aff315a..1bce0a77b2 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -17,6 +17,7 @@ #include "common/div_ceil.h" #include "shader_recompiler/backend/spirv/emit_spirv.h" #include "shader_recompiler/backend/spirv/spirv_emit_context.h" +#include "shader_recompiler/backend/spirv/texture_helpers.h" namespace Shader::Backend::SPIRV { namespace { @@ -47,7 +48,8 @@ Id ImageType(EmitContext& ctx, const TextureDescriptor& desc, Id sampled_type) { const spv::ImageFormat format{spv::ImageFormat::Unknown}; const bool depth{desc.is_depth}; const bool ms{desc.is_multisample}; - switch (desc.type) { + const TextureType type{EffectiveTextureType(ctx.profile, desc.type)}; + switch (type) { case TextureType::Color1D: return ctx.TypeImage(sampled_type, spv::Dim::Dim1D, depth, false, false, 1, format); case TextureType::ColorArray1D: @@ -93,7 +95,8 @@ spv::ImageFormat GetImageFormat(ImageFormat format) { Id ImageType(EmitContext& ctx, const ImageDescriptor& desc, Id sampled_type) { const spv::ImageFormat format{GetImageFormat(desc.format)}; - switch (desc.type) { + const TextureType type{EffectiveTextureType(ctx.profile, desc.type)}; + switch (type) { case TextureType::Color1D: return ctx.TypeImage(sampled_type, spv::Dim::Dim1D, false, false, false, 2, format); case TextureType::ColorArray1D: diff --git a/src/shader_recompiler/backend/spirv/texture_helpers.h b/src/shader_recompiler/backend/spirv/texture_helpers.h new file mode 100644 index 0000000000..a00f74c81c --- /dev/null +++ b/src/shader_recompiler/backend/spirv/texture_helpers.h @@ -0,0 +1,29 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + +#pragma once + +#include "shader_recompiler/profile.h" +#include "shader_recompiler/shader_info.h" + +namespace Shader::Backend::SPIRV { + +inline bool Is1DTexture(TextureType type) noexcept { + return type == TextureType::Color1D || type == TextureType::ColorArray1D; +} + +inline bool Needs1DPromotion(const Profile& profile, TextureType type) noexcept { + return !profile.support_sampled_1d && Is1DTexture(type); +} + +inline TextureType EffectiveTextureType(const Profile& profile, TextureType type) noexcept { + if (!Needs1DPromotion(profile, type)) { + return type; + } + if (type == TextureType::Color1D) { + return TextureType::Color2D; + } + return TextureType::ColorArray2D; +} + +} // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/host_translate_info.h b/src/shader_recompiler/host_translate_info.h index 1b53404fcc..ff8e28e033 100644 --- a/src/shader_recompiler/host_translate_info.h +++ b/src/shader_recompiler/host_translate_info.h @@ -16,6 +16,7 @@ struct HostTranslateInfo { bool needs_demote_reorder{}; ///< True when the device needs DemoteToHelperInvocation reordered bool support_snorm_render_buffer{}; ///< True when the device supports SNORM render buffers bool support_viewport_index_layer{}; ///< True when the device supports gl_Layer in VS + bool support_sampled_1d{true}; ///< True when the device supports sampled 1D textures u32 min_ssbo_alignment{}; ///< Minimum alignment supported by the device for SSBOs bool support_geometry_shader_passthrough{}; ///< True when the device supports geometry ///< passthrough shaders diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index ea7f5cc76f..1aab367dd9 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -36,6 +36,7 @@ struct Profile { bool support_viewport_index_layer_non_geometry{}; bool support_viewport_mask{}; bool support_typeless_image_loads{}; + bool support_sampled_1d{true}; bool support_demote_to_helper_invocation{}; bool support_int64_atomics{}; bool support_derivative_control{}; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index f3c17e2e91..0a630c56fa 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -207,6 +207,7 @@ ShaderCache::ShaderCache(Tegra::MaxwellDeviceMemoryManager& device_memory_, device.HasNvViewportArray2() || device.HasVertexViewportLayer(), .support_viewport_mask = device.HasNvViewportArray2(), .support_typeless_image_loads = device.HasImageLoadFormatted(), + .support_sampled_1d = true, .support_demote_to_helper_invocation = false, .support_int64_atomics = false, .support_derivative_control = device.HasDerivativeControl(), @@ -249,6 +250,7 @@ ShaderCache::ShaderCache(Tegra::MaxwellDeviceMemoryManager& device_memory_, .needs_demote_reorder = device.IsAmd(), .support_snorm_render_buffer = false, .support_viewport_index_layer = device.HasVertexViewportLayer(), + .support_sampled_1d = true, .min_ssbo_alignment = static_cast(device.GetShaderStorageBufferAlignment()), .support_geometry_shader_passthrough = device.HasGeometryShaderPassthrough(), .support_conditional_barrier = device.SupportsConditionalBarriers(), diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index e96c88d3aa..b2f0be4ca7 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -392,6 +392,7 @@ PipelineCache::PipelineCache(Tegra::MaxwellDeviceMemoryManager& device_memory_, device.IsExtShaderViewportIndexLayerSupported(), .support_viewport_mask = device.IsNvViewportArray2Supported(), .support_typeless_image_loads = device.IsFormatlessImageLoadSupported(), + .support_sampled_1d = device.SupportsSampled1D(), .support_demote_to_helper_invocation = device.IsExtShaderDemoteToHelperInvocationSupported(), .support_int64_atomics = device.IsExtShaderAtomicInt64Supported(), @@ -435,6 +436,7 @@ PipelineCache::PipelineCache(Tegra::MaxwellDeviceMemoryManager& device_memory_, driver_id == VK_DRIVER_ID_SAMSUNG_PROPRIETARY, .support_snorm_render_buffer = true, .support_viewport_index_layer = device.IsExtShaderViewportIndexLayerSupported(), + .support_sampled_1d = device.SupportsSampled1D(), .min_ssbo_alignment = static_cast(device.GetStorageBufferAlignment()), .support_geometry_shader_passthrough = device.IsNvGeometryShaderPassthroughSupported(), .support_conditional_barrier = device.SupportsConditionalBarriers(), diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 6bd8973aad..78090a3f3e 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -126,9 +126,18 @@ constexpr VkBorderColor ConvertBorderColor(const std::array& color) { return usage; } +[[nodiscard]] bool Needs1DPromotion(const Device& device, ImageType type) { + return type == ImageType::e1D && !device.SupportsSampled1D(); +} + +[[nodiscard]] ImageType HostImageType(const Device& device, ImageType type) { + return Needs1DPromotion(device, type) ? ImageType::e2D : type; +} + [[nodiscard]] VkImageCreateInfo MakeImageCreateInfo(const Device& device, const ImageInfo& info) { const auto format_info = MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, false, info.format); + const ImageType host_type = HostImageType(device, info.type); VkImageCreateFlags flags{}; if (info.type == ImageType::e2D && info.resources.layers >= 6 && info.size.width == info.size.height && !device.HasBrokenCubeImageCompatibility()) { @@ -142,7 +151,7 @@ constexpr VkBorderColor ConvertBorderColor(const std::array& color) { .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, .pNext = nullptr, .flags = flags, - .imageType = ConvertImageType(info.type), + .imageType = ConvertImageType(host_type), .format = format_info.format, .extent{ .width = info.size.width >> samples_x, @@ -273,10 +282,11 @@ constexpr VkBorderColor ConvertBorderColor(const std::array& color) { return VK_COMPONENT_SWIZZLE_ZERO; } -[[nodiscard]] VkImageViewType ImageViewType(Shader::TextureType type) { +[[nodiscard]] VkImageViewType ImageViewType(const Device& device, Shader::TextureType type) { + const bool promote_1d = !device.SupportsSampled1D(); switch (type) { case Shader::TextureType::Color1D: - return VK_IMAGE_VIEW_TYPE_1D; + return promote_1d ? VK_IMAGE_VIEW_TYPE_2D : VK_IMAGE_VIEW_TYPE_1D; case Shader::TextureType::Color2D: case Shader::TextureType::Color2DRect: return VK_IMAGE_VIEW_TYPE_2D; @@ -285,7 +295,7 @@ constexpr VkBorderColor ConvertBorderColor(const std::array& color) { case Shader::TextureType::Color3D: return VK_IMAGE_VIEW_TYPE_3D; case Shader::TextureType::ColorArray1D: - return VK_IMAGE_VIEW_TYPE_1D_ARRAY; + return promote_1d ? VK_IMAGE_VIEW_TYPE_2D_ARRAY : VK_IMAGE_VIEW_TYPE_1D_ARRAY; case Shader::TextureType::ColorArray2D: return VK_IMAGE_VIEW_TYPE_2D_ARRAY; case Shader::TextureType::ColorArrayCube: @@ -2083,7 +2093,7 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI }; const auto create = [&](TextureType tex_type) { VkImageViewCreateInfo ci{create_info}; - ci.viewType = ImageViewType(tex_type); + ci.viewType = ImageViewType(*device, tex_type); if (const auto override_layers = LayerCountOverride(tex_type)) { ci.subresourceRange.layerCount = *override_layers; } @@ -2304,7 +2314,7 @@ vk::ImageView ImageView::MakeView(VkFormat vk_format, VkImageAspectFlags aspect_ .pNext = nullptr, .flags = 0, .image = image_handle, - .viewType = ImageViewType(texture_type), + .viewType = ImageViewType(*device, texture_type), .format = vk_format, .components{ .r = VK_COMPONENT_SWIZZLE_IDENTITY, diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 65bd76cddf..e619e66350 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -435,6 +435,9 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR const bool is_turnip = driver_id == VK_DRIVER_ID_MESA_TURNIP; const bool is_arm = driver_id == VK_DRIVER_ID_ARM_PROPRIETARY; + // Qualcomm hardware (both proprietary and Turnip drivers) rejects Sampled1D capability. + supports_sampled_1d = !(is_qualcomm || is_turnip); + if ((is_mvk || is_qualcomm || is_turnip || is_arm) && !is_suitable) { LOG_WARNING(Render_Vulkan, "Unsuitable driver, continuing anyway"); } else if (!is_suitable) { diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index 00284057ef..9c229fafdc 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -881,6 +881,10 @@ public: return features2.features.multiViewport; } + bool SupportsSampled1D() const { + return supports_sampled_1d; + } + /// Returns true if the device supports VK_KHR_maintenance1. bool IsKhrMaintenance1Supported() const { return extensions.maintenance1; @@ -1108,6 +1112,7 @@ private: bool dynamic_state3_alpha_to_coverage{}; bool dynamic_state3_alpha_to_one{}; bool supports_conditional_barriers{}; ///< Allows barriers in conditional control flow. + bool supports_sampled_1d{true}; ///< Supports declaring Sampled1D in shaders. size_t sampler_heap_budget{}; ///< Sampler budget for buggy drivers (0 = unlimited). VkDeviceSize uniform_buffer_alignment_minimum{}; ///< Minimum enforced UBO alignment. VkDeviceSize storage_buffer_alignment_minimum{}; ///< Minimum enforced SSBO alignment.