diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 1de5709394..ad4600fd0f 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -41,19 +41,203 @@ struct OutAttr { Id pointer{}; Id type{}; + const GenericElementInfo* generic_info{}; + u32 generic_element{}; + bool is_position{}; + u32 position_component{}; }; +bool NeedsVectorAccessWorkaround(const EmitContext& ctx) { + return ctx.profile.has_broken_spirv_vector_access_chain; +} + +bool StageHasPerVertexInputs(const EmitContext& ctx) { + switch (ctx.stage) { + case Stage::TessellationControl: + case Stage::TessellationEval: + case Stage::Geometry: + return true; + default: + return false; + } +} + +bool StageHasPerInvocationOutputs(const EmitContext& ctx) { + return ctx.stage == Stage::TessellationControl; +} + +Id ApplyGenericConversion(EmitContext& ctx, const InputGenericInfo& generic, Id value) { + switch (generic.load_op) { + case InputGenericLoadOp::Bitcast: + return ctx.OpBitcast(ctx.F32[1], value); + case InputGenericLoadOp::SToF: + return ctx.OpConvertSToF(ctx.F32[1], value); + case InputGenericLoadOp::UToF: + return ctx.OpConvertUToF(ctx.F32[1], value); + default: + return value; + } +} + +Id InputGenericVectorPointer(EmitContext& ctx, const InputGenericInfo& generic, Id vertex, + bool use_vertex) { + if (use_vertex) { + return ctx.OpAccessChain(generic.composite_pointer_type, generic.id, vertex); + } + return generic.id; +} + +Id LoadGenericComponentConst(EmitContext& ctx, const InputGenericInfo& generic, Id vertex, + u32 component, bool use_vertex) { + if (!NeedsVectorAccessWorkaround(ctx)) { + const Id pointer{ + AttrPointer(ctx, generic.pointer_type, vertex, generic.id, ctx.Const(component))}; + const Id value{ctx.OpLoad(generic.component_type, pointer)}; + return ApplyGenericConversion(ctx, generic, value); + } + const Id vector_pointer{InputGenericVectorPointer(ctx, generic, vertex, use_vertex)}; + const Id vector_value{ctx.OpLoad(generic.composite_type, vector_pointer)}; + const Id component_value{ + ctx.OpCompositeExtract(generic.component_type, vector_value, component)}; + return ApplyGenericConversion(ctx, generic, component_value); +} + +Id OutputGenericVectorPointer(EmitContext& ctx, const GenericElementInfo& info, + bool use_invocation) { + if (use_invocation) { + const Id invocation_id{ctx.OpLoad(ctx.U32[1], ctx.invocation_id)}; + return ctx.OpAccessChain(info.composite_pointer_type, info.id, invocation_id); + } + return info.id; +} + +void StoreGenericComponentConst(EmitContext& ctx, const GenericElementInfo& info, + u32 attribute_element, Id value, bool use_invocation) { + const u32 local_component{attribute_element - info.first_element}; + if (!NeedsVectorAccessWorkaround(ctx) || info.num_components <= 1) { + const Id pointer{ + OutputAccessChain(ctx, ctx.output_f32, info.id, ctx.Const(local_component))}; + ctx.OpStore(pointer, value); + return; + } + const Id vector_pointer{OutputGenericVectorPointer(ctx, info, use_invocation)}; + const Id vector_value{ctx.OpLoad(info.composite_type, vector_pointer)}; + const Id updated{ + ctx.OpCompositeInsert(info.composite_type, value, vector_value, local_component)}; + ctx.OpStore(vector_pointer, updated); +} + +Id InputPositionPointer(EmitContext& ctx, Id vertex, bool use_vertex) { + const Id vector_pointer_type{ctx.TypePointer(spv::StorageClass::Input, ctx.F32[4])}; + if (ctx.need_input_position_indirect) { + if (use_vertex) { + return ctx.OpAccessChain(vector_pointer_type, ctx.input_position, vertex, + ctx.u32_zero_value); + } + return ctx.OpAccessChain(vector_pointer_type, ctx.input_position, ctx.u32_zero_value); + } + if (use_vertex) { + return ctx.OpAccessChain(vector_pointer_type, ctx.input_position, vertex); + } + return ctx.input_position; +} + +Id LoadPositionComponent(EmitContext& ctx, Id vertex, u32 component, bool use_vertex) { + if (!NeedsVectorAccessWorkaround(ctx)) { + return ctx.OpLoad( + ctx.F32[1], + ctx.need_input_position_indirect + ? AttrPointer(ctx, ctx.input_f32, vertex, ctx.input_position, ctx.u32_zero_value, + ctx.Const(component)) + : AttrPointer(ctx, ctx.input_f32, vertex, ctx.input_position, + ctx.Const(component))); + } + const Id pointer{InputPositionPointer(ctx, vertex, use_vertex)}; + const Id vector_value{ctx.OpLoad(ctx.F32[4], pointer)}; + return ctx.OpCompositeExtract(ctx.F32[1], vector_value, component); +} + +void StorePositionComponent(EmitContext& ctx, u32 component, Id value) { + if (!NeedsVectorAccessWorkaround(ctx)) { + const Id pointer{OutputAccessChain(ctx, ctx.output_f32, ctx.output_position, + ctx.Const(component))}; + ctx.OpStore(pointer, value); + return; + } + const bool use_invocation{StageHasPerInvocationOutputs(ctx)}; + const Id pointer_type{ctx.TypePointer(spv::StorageClass::Output, ctx.F32[4])}; + const Id target_pointer{use_invocation + ? ctx.OpAccessChain(pointer_type, ctx.output_position, + ctx.OpLoad(ctx.U32[1], ctx.invocation_id)) + : ctx.output_position}; + const Id vector_value{ctx.OpLoad(ctx.F32[4], target_pointer)}; + const Id updated{ctx.OpCompositeInsert(ctx.F32[4], value, vector_value, component)}; + ctx.OpStore(target_pointer, updated); +} + +Id LoadBuiltinVectorComponent(EmitContext& ctx, Id pointer, Id pointer_type, Id vector_type, + u32 component) { + if (!NeedsVectorAccessWorkaround(ctx)) { + const Id comp_id{ctx.Const(component)}; + const Id elem_ptr{ctx.OpAccessChain(pointer_type, pointer, comp_id)}; + return ctx.OpLoad(ctx.F32[1], elem_ptr); + } + const Id vector_value{ctx.OpLoad(vector_type, pointer)}; + return ctx.OpCompositeExtract(ctx.F32[1], vector_value, component); +} + +Id LoadPatchComponent(EmitContext& ctx, Id patch, u32 component) { + if (!NeedsVectorAccessWorkaround(ctx)) { + const Id pointer{ctx.OpAccessChain(ctx.stage == Stage::TessellationControl ? ctx.output_f32 + : ctx.input_f32, + patch, ctx.Const(component))}; + return ctx.OpLoad(ctx.F32[1], pointer); + } + const Id vector_value{ctx.OpLoad(ctx.F32[4], patch)}; + return ctx.OpCompositeExtract(ctx.F32[1], vector_value, component); +} + +void StorePatchComponent(EmitContext& ctx, Id patch, u32 component, Id value) { + if (!NeedsVectorAccessWorkaround(ctx)) { + const Id pointer{ctx.OpAccessChain(ctx.output_f32, patch, ctx.Const(component))}; + ctx.OpStore(pointer, value); + return; + } + const Id vector_value{ctx.OpLoad(ctx.F32[4], patch)}; + const Id updated{ctx.OpCompositeInsert(ctx.F32[4], value, vector_value, component)}; + ctx.OpStore(patch, updated); +} + +void StoreFragColorComponent(EmitContext& ctx, u32 index, u32 component, Id value) { + if (!NeedsVectorAccessWorkaround(ctx)) { + const Id component_id{ctx.Const(component)}; + const Id pointer{ + ctx.OpAccessChain(ctx.output_f32, ctx.frag_color.at(index), component_id)}; + ctx.OpStore(pointer, value); + return; + } + const Id vector_value{ctx.OpLoad(ctx.F32[4], ctx.frag_color.at(index))}; + const Id updated{ctx.OpCompositeInsert(ctx.F32[4], value, vector_value, component)}; + ctx.OpStore(ctx.frag_color.at(index), updated); +} + std::optional OutputAttrPointer(EmitContext& ctx, IR::Attribute attr) { if (IR::IsGeneric(attr)) { const u32 index{IR::GenericAttributeIndex(attr)}; const u32 element{IR::GenericAttributeElement(attr)}; const GenericElementInfo& info{ctx.output_generics.at(index).at(element)}; if (info.num_components == 1) { - return info.id; + OutAttr out{info.id}; + out.generic_info = &info; + out.generic_element = element; + return out; } else { const u32 index_element{element - info.first_element}; const Id index_id{ctx.Const(index_element)}; - return OutputAccessChain(ctx, ctx.output_f32, info.id, index_id); + OutAttr out{OutputAccessChain(ctx, ctx.output_f32, info.id, index_id)}; + out.generic_info = &info; + out.generic_element = element; + return out; } } @@ -66,7 +250,10 @@ std::optional OutputAttrPointer(EmitContext& ctx, IR::Attribute attr) { case IR::Attribute::PositionW: { const u32 element{static_cast(attr) % 4}; const Id element_id{ctx.Const(element)}; - return OutputAccessChain(ctx, ctx.output_f32, ctx.output_position, element_id); + OutAttr out{OutputAccessChain(ctx, ctx.output_f32, ctx.output_position, element_id)}; + out.is_position = true; + out.position_component = element; + return out; } case IR::Attribute::ClipDistance0: case IR::Attribute::ClipDistance1: @@ -316,21 +503,8 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) { // Attribute is disabled or varying component is not written return ctx.Const(element == 3 ? 1.0f : 0.0f); } - const Id pointer{ - AttrPointer(ctx, generic.pointer_type, vertex, generic.id, ctx.Const(element))}; - const Id value{ctx.OpLoad(generic.component_type, pointer)}; - return [&ctx, generic, value]() { - switch (generic.load_op) { - case InputGenericLoadOp::Bitcast: - return ctx.OpBitcast(ctx.F32[1], value); - case InputGenericLoadOp::SToF: - return ctx.OpConvertSToF(ctx.F32[1], value); - case InputGenericLoadOp::UToF: - return ctx.OpConvertUToF(ctx.F32[1], value); - default: - return value; - }; - }(); + const bool use_vertex{StageHasPerVertexInputs(ctx)}; + return LoadGenericComponentConst(ctx, generic, vertex, element, use_vertex); } switch (attr) { case IR::Attribute::PrimitiveId: @@ -341,12 +515,7 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) { case IR::Attribute::PositionY: case IR::Attribute::PositionZ: case IR::Attribute::PositionW: - return ctx.OpLoad( - ctx.F32[1], - ctx.need_input_position_indirect - ? AttrPointer(ctx, ctx.input_f32, vertex, ctx.input_position, ctx.u32_zero_value, - ctx.Const(element)) - : AttrPointer(ctx, ctx.input_f32, vertex, ctx.input_position, ctx.Const(element))); + return LoadPositionComponent(ctx, vertex, element, StageHasPerVertexInputs(ctx)); case IR::Attribute::InstanceId: if (ctx.profile.support_vertex_instance_id) { return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.instance_id)); @@ -372,17 +541,13 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) { ctx.OpBitcast(ctx.F32[1], ctx.Const((std::numeric_limits::max)())), ctx.f32_zero_value); case IR::Attribute::PointSpriteS: - return ctx.OpLoad(ctx.F32[1], - ctx.OpAccessChain(ctx.input_f32, ctx.point_coord, ctx.u32_zero_value)); + return LoadBuiltinVectorComponent(ctx, ctx.point_coord, ctx.input_f32, ctx.F32[2], 0); case IR::Attribute::PointSpriteT: - return ctx.OpLoad(ctx.F32[1], - ctx.OpAccessChain(ctx.input_f32, ctx.point_coord, ctx.Const(1U))); + return LoadBuiltinVectorComponent(ctx, ctx.point_coord, ctx.input_f32, ctx.F32[2], 1); case IR::Attribute::TessellationEvaluationPointU: - return ctx.OpLoad(ctx.F32[1], - ctx.OpAccessChain(ctx.input_f32, ctx.tess_coord, ctx.u32_zero_value)); + return LoadBuiltinVectorComponent(ctx, ctx.tess_coord, ctx.input_f32, ctx.F32[3], 0); case IR::Attribute::TessellationEvaluationPointV: - return ctx.OpLoad(ctx.F32[1], - ctx.OpAccessChain(ctx.input_f32, ctx.tess_coord, ctx.Const(1U))); + return LoadBuiltinVectorComponent(ctx, ctx.tess_coord, ctx.input_f32, ctx.F32[3], 1); default: throw NotImplementedException("Read attribute {}", attr); } @@ -433,6 +598,20 @@ void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, [[maybe_un const u32 idx = (u32) attr - (u32) cd0; clip_distance_written.set(idx); } + + if (NeedsVectorAccessWorkaround(ctx)) { + const bool use_invocation{StageHasPerInvocationOutputs(ctx)}; + if (output->generic_info) { + StoreGenericComponentConst(ctx, *output->generic_info, output->generic_element, value, + use_invocation); + return; + } + if (output->is_position) { + StorePositionComponent(ctx, output->position_component, value); + return; + } + } + ctx.OpStore(output->pointer, value); } @@ -456,18 +635,20 @@ Id EmitGetPatch(EmitContext& ctx, IR::Patch patch) { throw NotImplementedException("Non-generic patch load"); } const u32 index{IR::GenericPatchIndex(patch)}; - const Id element{ctx.Const(IR::GenericPatchElement(patch))}; - const Id type{ctx.stage == Stage::TessellationControl ? ctx.output_f32 : ctx.input_f32}; - const Id pointer{ctx.OpAccessChain(type, ctx.patches.at(index), element)}; - return ctx.OpLoad(ctx.F32[1], pointer); + const u32 element{IR::GenericPatchElement(patch)}; + return LoadPatchComponent(ctx, ctx.patches.at(index), element); } void EmitSetPatch(EmitContext& ctx, IR::Patch patch, Id value) { const Id pointer{[&] { if (IR::IsGeneric(patch)) { const u32 index{IR::GenericPatchIndex(patch)}; - const Id element{ctx.Const(IR::GenericPatchElement(patch))}; - return ctx.OpAccessChain(ctx.output_f32, ctx.patches.at(index), element); + const u32 element{IR::GenericPatchElement(patch)}; + if (NeedsVectorAccessWorkaround(ctx)) { + StorePatchComponent(ctx, ctx.patches.at(index), element, value); + return Id{}; + } + return ctx.OpAccessChain(ctx.output_f32, ctx.patches.at(index), ctx.Const(element)); } switch (patch) { case IR::Patch::TessellationLodLeft: @@ -487,11 +668,18 @@ void EmitSetPatch(EmitContext& ctx, IR::Patch patch, Id value) { throw NotImplementedException("Patch {}", patch); } }()}; + if (!Sirit::ValidId(pointer)) { + return; + } ctx.OpStore(pointer, value); } void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, Id value) { const Id component_id{ctx.Const(component)}; + if (NeedsVectorAccessWorkaround(ctx)) { + StoreFragColorComponent(ctx, index, component, value); + return; + } const Id pointer{ctx.OpAccessChain(ctx.output_f32, ctx.frag_color.at(index), component_id)}; ctx.OpStore(pointer, value); } diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index d4e5441469..88f4defc26 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -13,23 +13,26 @@ #include -#include "common/common_types.h" + const Id vec_type{ctx.F32[4]}; #include "common/div_ceil.h" #include "shader_recompiler/backend/spirv/emit_spirv.h" #include "shader_recompiler/backend/spirv/spirv_emit_context.h" - -namespace Shader::Backend::SPIRV { + vec_type, + ctx.TypePointer(spv::StorageClass::Input, vec_type), namespace { enum class Operation { Increment, + const Id vec_type{ctx.U32[4]}; Decrement, FPAdd, FPMin, - FPMax, -}; + vec_type, + ctx.TypePointer(spv::StorageClass::Input, vec_type), Id ImageType(EmitContext& ctx, const TextureDescriptor& desc) { const spv::ImageFormat format{spv::ImageFormat::Unknown}; + const Id scalar_type{ctx.TypeInt(32, true)}; + const Id vec_type{ctx.TypeVector(scalar_type, 4)}; const Id type{ctx.F32[1]}; const bool depth{desc.is_depth}; const bool ms{desc.is_multisample}; @@ -39,35 +42,39 @@ Id ImageType(EmitContext& ctx, const TextureDescriptor& desc) { case TextureType::ColorArray1D: return ctx.TypeImage(type, spv::Dim::Dim1D, depth, true, false, 1, format); case TextureType::Color2D: + const Id vec_type{ctx.F32[4]}; case TextureType::Color2DRect: return ctx.TypeImage(type, spv::Dim::Dim2D, depth, false, ms, 1, format); case TextureType::ColorArray2D: - return ctx.TypeImage(type, spv::Dim::Dim2D, depth, true, ms, 1, format); - case TextureType::Color3D: + vec_type, + ctx.TypePointer(spv::StorageClass::Input, vec_type), return ctx.TypeImage(type, spv::Dim::Dim3D, depth, false, false, 1, format); case TextureType::ColorCube: - return ctx.TypeImage(type, spv::Dim::Cube, depth, false, false, 1, format); - case TextureType::ColorArrayCube: - return ctx.TypeImage(type, spv::Dim::Cube, depth, true, false, 1, format); - case TextureType::Buffer: - break; - } - throw InvalidArgument("Invalid texture type {}", desc.type); + const Id scalar_type{ctx.TypeInt(32, true)}; + const Id vec_type{ctx.TypeVector(scalar_type, 4)}; + return InputGenericInfo{id, + ctx.input_s32, + scalar_type, + vec_type, + ctx.TypePointer(spv::StorageClass::Input, vec_type), + InputGenericLoadOp::SToF}; } + const Id vec_type{ctx.F32[4]}; spv::ImageFormat GetImageFormat(ImageFormat format) { switch (format) { case ImageFormat::Typeless: - return spv::ImageFormat::Unknown; - case ImageFormat::R8_UINT: + vec_type, + ctx.TypePointer(spv::StorageClass::Input, vec_type), return spv::ImageFormat::R8ui; case ImageFormat::R8_SINT: - return spv::ImageFormat::R8i; - case ImageFormat::R16_UINT: - return spv::ImageFormat::R16ui; - case ImageFormat::R16_SINT: - return spv::ImageFormat::R16i; - case ImageFormat::R32_UINT: + const Id vec_type{ctx.U32[4]}; + return InputGenericInfo{id, + ctx.input_u32, + ctx.U32[1], + vec_type, + ctx.TypePointer(spv::StorageClass::Input, vec_type), + InputGenericLoadOp::UToF}; return spv::ImageFormat::R32ui; case ImageFormat::R32G32_UINT: return spv::ImageFormat::Rg32ui; @@ -185,10 +192,14 @@ void DefineGenericOutput(EmitContext& ctx, size_t index, std::optional invo } else { ctx.Name(id, fmt::format("out_attr{}", index)); } + const Id vector_type{ctx.F32[num_components]}; const GenericElementInfo info{ .id = id, .first_element = element, .num_components = num_components, + .composite_type = vector_type, + .composite_pointer_type = + ctx.TypePointer(spv::StorageClass::Output, vector_type), }; std::fill_n(ctx.output_generics[index].begin() + element, num_components, info); element += num_components; @@ -217,21 +228,58 @@ Id GetAttributeType(EmitContext& ctx, AttributeType type) { InputGenericInfo GetAttributeInfo(EmitContext& ctx, AttributeType type, Id id) { switch (type) { case AttributeType::Float: - return InputGenericInfo{id, ctx.input_f32, ctx.F32[1], InputGenericLoadOp::None}; + return InputGenericInfo{id, + ctx.input_f32, + ctx.F32[1], + ctx.F32[4], + ctx.TypePointer(spv::StorageClass::Input, ctx.F32[4]), + InputGenericLoadOp::None}; case AttributeType::UnsignedInt: - return InputGenericInfo{id, ctx.input_u32, ctx.U32[1], InputGenericLoadOp::Bitcast}; + return InputGenericInfo{id, + ctx.input_u32, + ctx.U32[1], + ctx.U32[4], + ctx.TypePointer(spv::StorageClass::Input, ctx.U32[4]), + InputGenericLoadOp::Bitcast}; case AttributeType::SignedInt: - return InputGenericInfo{id, ctx.input_s32, ctx.TypeInt(32, true), + return InputGenericInfo{id, + ctx.input_s32, + ctx.TypeInt(32, true), + ctx.TypeVector(ctx.TypeInt(32, true), 4), + ctx.TypePointer(spv::StorageClass::Input, + ctx.TypeVector(ctx.TypeInt(32, true), 4)), InputGenericLoadOp::Bitcast}; case AttributeType::SignedScaled: - return ctx.profile.support_scaled_attributes - ? InputGenericInfo{id, ctx.input_f32, ctx.F32[1], InputGenericLoadOp::None} - : InputGenericInfo{id, ctx.input_s32, ctx.TypeInt(32, true), - InputGenericLoadOp::SToF}; + if (ctx.profile.support_scaled_attributes) { + return InputGenericInfo{id, + ctx.input_f32, + ctx.F32[1], + ctx.F32[4], + ctx.TypePointer(spv::StorageClass::Input, ctx.F32[4]), + InputGenericLoadOp::None}; + } + return InputGenericInfo{id, + ctx.input_s32, + ctx.TypeInt(32, true), + ctx.TypeVector(ctx.TypeInt(32, true), 4), + ctx.TypePointer(spv::StorageClass::Input, + ctx.TypeVector(ctx.TypeInt(32, true), 4)), + InputGenericLoadOp::SToF}; case AttributeType::UnsignedScaled: - return ctx.profile.support_scaled_attributes - ? InputGenericInfo{id, ctx.input_f32, ctx.F32[1], InputGenericLoadOp::None} - : InputGenericInfo{id, ctx.input_u32, ctx.U32[1], InputGenericLoadOp::UToF}; + if (ctx.profile.support_scaled_attributes) { + return InputGenericInfo{id, + ctx.input_f32, + ctx.F32[1], + ctx.F32[4], + ctx.TypePointer(spv::StorageClass::Input, ctx.F32[4]), + InputGenericLoadOp::None}; + } + return InputGenericInfo{id, + ctx.input_u32, + ctx.U32[1], + ctx.U32[4], + ctx.TypePointer(spv::StorageClass::Input, ctx.U32[4]), + InputGenericLoadOp::UToF}; case AttributeType::Disabled: return InputGenericInfo{}; } @@ -702,7 +750,8 @@ void EmitContext::DefineSharedMemoryFunctions(const IR::Program& program) { void EmitContext::DefineAttributeMemAccess(const Info& info) { const auto make_load{[&] { - const bool is_array{stage == Stage::Geometry}; + const bool is_array{stage == Stage::Geometry || stage == Stage::TessellationControl || + stage == Stage::TessellationEval}; const Id end_block{OpLabel()}; const Id default_label{OpLabel()}; @@ -737,6 +786,28 @@ void EmitContext::DefineAttributeMemAccess(const Info& info) { size_t label_index{0}; if (info.loads.AnyComponent(IR::Attribute::PositionX)) { AddLabel(labels[label_index]); + if (ctx.profile.has_broken_spirv_vector_access_chain) { + const Id pointer_type{TypePointer(spv::StorageClass::Input, F32[4])}; + const Id vector_pointer{[&]() { + if (need_input_position_indirect) { + if (is_array) { + return OpAccessChain(pointer_type, input_position, vertex, + u32_zero_value); + } + return OpAccessChain(pointer_type, input_position, u32_zero_value); + } else { + if (is_array) { + return OpAccessChain(pointer_type, input_position, vertex); + } + return input_position; + } + }()}; + const Id vector_value{OpLoad(F32[4], vector_pointer)}; + const Id result{OpVectorExtractDynamic(F32[1], vector_value, masked_index)}; + OpReturnValue(result); + ++label_index; + continue; + } const Id pointer{[&]() { if (need_input_position_indirect) { if (is_array) @@ -768,22 +839,36 @@ void EmitContext::DefineAttributeMemAccess(const Info& info) { ++label_index; continue; } - const Id pointer{ - is_array ? OpAccessChain(generic.pointer_type, generic_id, vertex, masked_index) - : OpAccessChain(generic.pointer_type, generic_id, masked_index)}; - const Id value{OpLoad(generic.component_type, pointer)}; - const Id result{[this, generic, value]() { + const auto convert_value{[&](Id component) { switch (generic.load_op) { case InputGenericLoadOp::Bitcast: - return OpBitcast(F32[1], value); + return OpBitcast(F32[1], component); case InputGenericLoadOp::SToF: - return OpConvertSToF(F32[1], value); + return OpConvertSToF(F32[1], component); case InputGenericLoadOp::UToF: - return OpConvertUToF(F32[1], value); + return OpConvertUToF(F32[1], component); default: - return value; - }; - }()}; + return component; + } + }}; + Id result{}; + if (ctx.profile.has_broken_spirv_vector_access_chain) { + const Id vector_pointer{is_array + ? OpAccessChain(generic.composite_pointer_type, + generic_id, vertex) + : generic_id}; + const Id vector_value{OpLoad(generic.composite_type, vector_pointer)}; + const Id component_value{ + OpVectorExtractDynamic(generic.component_type, vector_value, masked_index)}; + result = convert_value(component_value); + } else { + const Id pointer{ + is_array ? OpAccessChain(generic.pointer_type, generic_id, vertex, + masked_index) + : OpAccessChain(generic.pointer_type, generic_id, masked_index)}; + const Id value{OpLoad(generic.component_type, pointer)}; + result = convert_value(value); + } OpReturnValue(result); ++label_index; } @@ -835,6 +920,19 @@ void EmitContext::DefineAttributeMemAccess(const Info& info) { size_t label_index{0}; if (info.stores.AnyComponent(IR::Attribute::PositionX)) { AddLabel(labels[label_index]); + if (ctx.profile.has_broken_spirv_vector_access_chain) { + const bool use_invocation{ctx.stage == Stage::TessellationControl}; + const Id pointer_type{TypePointer(spv::StorageClass::Output, F32[4])}; + const Id vector_pointer{use_invocation + ? OpAccessChain(pointer_type, output_position, + OpLoad(U32[1], invocation_id)) + : output_position}; + const Id vector_value{OpLoad(F32[4], vector_pointer)}; + const Id updated{ + OpVectorInsertDynamic(F32[4], vector_value, store_value, masked_index)}; + OpStore(vector_pointer, updated); + OpReturn(); + } const Id pointer{OpAccessChain(output_f32, output_position, masked_index)}; OpStore(pointer, store_value); OpReturn(); @@ -848,7 +946,27 @@ void EmitContext::DefineAttributeMemAccess(const Info& info) { throw NotImplementedException("Physical stores and transform feedbacks"); } AddLabel(labels[label_index]); - const Id generic_id{output_generics[index][0].id}; + const GenericElementInfo& element_info{output_generics[index][0]}; + if (ctx.profile.has_broken_spirv_vector_access_chain) { + Id local_index{masked_index}; + if (element_info.first_element != 0) { + local_index = OpISub(U32[1], masked_index, Const(element_info.first_element)); + } + const bool use_invocation{ctx.stage == Stage::TessellationControl}; + const Id pointer_type{ + TypePointer(spv::StorageClass::Output, element_info.composite_type)}; + const Id invocation{use_invocation ? OpLoad(U32[1], invocation_id) : Id{}}; + const Id vector_pointer{use_invocation + ? OpAccessChain(pointer_type, element_info.id, + invocation) + : element_info.id}; + const Id vector_value{OpLoad(element_info.composite_type, vector_pointer)}; + const Id updated{OpVectorInsertDynamic(element_info.composite_type, vector_value, + store_value, local_index)}; + OpStore(vector_pointer, updated); + OpReturn(); + } + const Id generic_id{element_info.id}; const Id pointer{OpAccessChain(output_f32, generic_id, masked_index)}; OpStore(pointer, store_value); OpReturn(); diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.h b/src/shader_recompiler/backend/spirv/spirv_emit_context.h index 66cdb1d3db..6c26d429d9 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.h +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.h @@ -146,6 +146,8 @@ struct InputGenericInfo { Id id; Id pointer_type; Id component_type; + Id composite_type; + Id composite_pointer_type; InputGenericLoadOp load_op; }; @@ -153,6 +155,8 @@ struct GenericElementInfo { Id id{}; u32 first_element{}; u32 num_components{}; + Id composite_type{}; + Id composite_pointer_type{}; }; class EmitContext final : public Sirit::Module { diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index 90e46bb1ba..247512bbc7 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + // SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later @@ -61,6 +64,10 @@ struct Profile { /// OpFClamp is broken and OpFMax + OpFMin should be used instead bool has_broken_spirv_clamp{}; + /// Driver mishandles vector OpAccessChain operations + bool has_broken_spirv_vector_access_chain{}; + /// Driver crashes when spirv-opt folds certain OpAccessChain chains + bool has_broken_spirv_access_chain_opt{}; /// The Position builtin needs to be wrapped in a struct when used as an input bool has_broken_spirv_position_input{}; /// Offset image operands with an unsigned type do not work diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 45f729698e..374ce87c33 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -228,7 +228,9 @@ ShaderCache::ShaderCache(Tegra::MaxwellDeviceMemoryManager& device_memory_, .need_fastmath_off = device.NeedsFastmathOff(), .need_gather_subpixel_offset = device.IsAmd() || device.IsIntel(), - .has_broken_spirv_clamp = true, + .has_broken_spirv_clamp = true, + .has_broken_spirv_vector_access_chain = false, + .has_broken_spirv_access_chain_opt = false, .has_broken_unsigned_image_offsets = true, .has_broken_signed_operations = true, .has_broken_fp16_float_controls = false, @@ -541,7 +543,9 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline( case Settings::ShaderBackend::SpirV: ConvertLegacyToGeneric(program, runtime_info); sources_spirv[stage_index] = - EmitSPIRV(profile, runtime_info, program, binding, this->optimize_spirv_output); + const bool optimize_shader{this->optimize_spirv_output && + !profile.has_broken_spirv_access_chain_opt}; + EmitSPIRV(profile, runtime_info, program, binding, optimize_shader); break; } previous_program = &program; @@ -600,7 +604,9 @@ std::unique_ptr ShaderCache::CreateComputePipeline( code = EmitGLASM(profile, info, program); break; case Settings::ShaderBackend::SpirV: - code_spirv = EmitSPIRV(profile, program, this->optimize_spirv_output); + const bool optimize_shader{this->optimize_spirv_output && + !profile.has_broken_spirv_access_chain_opt}; + code_spirv = EmitSPIRV(profile, program, optimize_shader); break; } diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 57ea9dcc61..8be001ecf3 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -368,6 +368,8 @@ PipelineCache::PipelineCache(Tegra::MaxwellDeviceMemoryManager& device_memory_, driver_id == VK_DRIVER_ID_INTEL_OPEN_SOURCE_MESA, .has_broken_spirv_clamp = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS, + .has_broken_spirv_vector_access_chain = driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY, + .has_broken_spirv_access_chain_opt = driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY, .has_broken_spirv_position_input = driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY, .has_broken_unsigned_image_offsets = false, .has_broken_signed_operations = false, @@ -694,7 +696,10 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline( const auto runtime_info{MakeRuntimeInfo(programs, key, program, previous_stage)}; ConvertLegacyToGeneric(program, runtime_info); - const std::vector code{EmitSPIRV(profile, runtime_info, program, binding, this->optimize_spirv_output)}; + const bool optimize_shader{this->optimize_spirv_output && + !profile.has_broken_spirv_access_chain_opt}; + const std::vector code{EmitSPIRV(profile, runtime_info, program, binding, + optimize_shader)}; device.SaveShader(code); modules[stage_index] = BuildShader(device, code); if (device.HasDebuggingToolAttached()) { @@ -801,7 +806,9 @@ std::unique_ptr PipelineCache::CreateComputePipeline( max_shared_memory / 1024); program.shared_memory_size = max_shared_memory; } - const std::vector code{EmitSPIRV(profile, program, this->optimize_spirv_output)}; + const bool optimize_shader{this->optimize_spirv_output && + !profile.has_broken_spirv_access_chain_opt}; + const std::vector code{EmitSPIRV(profile, program, optimize_shader)}; device.SaveShader(code); vk::ShaderModule spv_module{BuildShader(device, code)}; if (device.HasDebuggingToolAttached()) {