diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 79a4bf4fd2..6bbc268234 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -223,6 +223,7 @@ add_library(shader_recompiler STATIC ir_opt/constant_propagation_pass.cpp ir_opt/dead_code_elimination_pass.cpp ir_opt/dual_vertex_pass.cpp + ir_opt/amd_fp64_varying_lowering.cpp ir_opt/global_memory_to_storage_buffer_pass.cpp ir_opt/identity_removal_pass.cpp ir_opt/layer_pass.cpp diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp index 945cdb42bc..4bef553f67 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp @@ -185,6 +185,87 @@ private: spv::ImageOperandsMask mask{}; }; +Id ManualDepthCompare(EmitContext& ctx, const TextureMeta& meta, Id texel, Id reference) { + if (!meta.compare_func.has_value()) { + return ctx.Const(1.0f); + } + + const Id type{ctx.F32[1]}; + const Id zero{ctx.Const(0.0f)}; + const Id one{ctx.Const(1.0f)}; + const auto clamp01 = [&](Id value) { + return ctx.OpFClamp(type, value, zero, one); + }; + + const bool needs_quantization = NeedsD24Quantization(meta); + if (needs_quantization) { + const Id scale{ctx.Const(16777215.0f)}; + const Id inv_scale{ctx.Const(1.0f / 16777215.0f)}; + const Id half{ctx.Const(0.5f)}; + const auto quantize = [&](Id value) { + const Id clamped{clamp01(value)}; + const Id scaled{ctx.OpFMul(type, clamped, scale)}; + const Id shifted{ctx.OpFAdd(type, scaled, half)}; + const Id floored{ctx.OpFloor(type, shifted)}; + return ctx.OpFMul(type, floored, inv_scale); + }; + texel = quantize(texel); + reference = quantize(reference); + } + + const CompareFunction func{*meta.compare_func}; + switch (func) { + case CompareFunction::Never: + return ctx.Const(0.0f); + case CompareFunction::Always: + return ctx.Const(1.0f); + default: + break; + } + + Id tex_compare{texel}; + Id ref_compare{reference}; + if (needs_quantization) { + const Id epsilon{ctx.Const(0.5f / 16777215.0f)}; + switch (func) { + case CompareFunction::LessThanEqual: + tex_compare = ctx.OpFAdd(type, tex_compare, epsilon); + break; + case CompareFunction::GreaterThanEqual: + tex_compare = ctx.OpFSub(type, tex_compare, epsilon); + break; + default: + break; + } + } + + Id compare_result{}; + switch (func) { + case CompareFunction::Less: + compare_result = ctx.OpFOrdLessThan(ctx.U1, ref_compare, tex_compare); + break; + case CompareFunction::Equal: + compare_result = ctx.OpFOrdEqual(ctx.U1, ref_compare, tex_compare); + break; + case CompareFunction::LessThanEqual: + compare_result = ctx.OpFOrdLessThanEqual(ctx.U1, ref_compare, tex_compare); + break; + case CompareFunction::Greater: + compare_result = ctx.OpFOrdGreaterThan(ctx.U1, ref_compare, tex_compare); + break; + case CompareFunction::NotEqual: + compare_result = ctx.OpFOrdNotEqual(ctx.U1, ref_compare, tex_compare); + break; + case CompareFunction::GreaterThanEqual: + compare_result = ctx.OpFOrdGreaterThanEqual(ctx.U1, ref_compare, tex_compare); + break; + default: + compare_result = ctx.false_value; + break; + } + + return ctx.OpSelect(type, compare_result, one, zero); +} Id Texture(EmitContext& ctx, IR::TextureInstInfo info, [[maybe_unused]] const IR::Value& index) { const TextureDefinition& def{ctx.textures.at(info.descriptor_index)}; if (def.count > 1) { @@ -479,6 +560,20 @@ Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id dref, Id bias_lc, const IR::Value& offset) { const auto info{inst->Flags()}; + const TextureDefinition& def{ctx.textures.at(info.descriptor_index)}; + const TextureMeta* meta = def.meta; + const bool manual_compare = + ctx.options.amd_depth_compare_workaround && meta && meta->manual_compare; + if (manual_compare) { + IR::TextureInstInfo sample_info{info}; + sample_info.is_depth.Assign(0); + inst->SetFlags(sample_info); + const Id sampled = + EmitImageSampleImplicitLod(ctx, inst, index, coords, bias_lc, offset); + inst->SetFlags(info); + const Id texel{ctx.OpCompositeExtract(ctx.F32[1], sampled, 0u)}; + return ManualDepthCompare(ctx, *meta, texel, dref); + } if (ctx.stage == Stage::Fragment) { const ImageOperands operands(ctx, info.has_bias != 0, false, info.has_lod_clamp != 0, bias_lc, offset); @@ -500,6 +595,19 @@ Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Va Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id dref, Id lod, const IR::Value& offset) { const auto info{inst->Flags()}; + const TextureDefinition& def{ctx.textures.at(info.descriptor_index)}; + const TextureMeta* meta = def.meta; + const bool manual_compare = + ctx.options.amd_depth_compare_workaround && meta && meta->manual_compare; + if (manual_compare) { + IR::TextureInstInfo sample_info{info}; + sample_info.is_depth.Assign(0); + inst->SetFlags(sample_info); + const Id sampled = EmitImageSampleExplicitLod(ctx, inst, index, coords, lod, offset); + inst->SetFlags(info); + const Id texel{ctx.OpCompositeExtract(ctx.F32[1], sampled, 0u)}; + return ManualDepthCompare(ctx, *meta, texel, dref); + } const ImageOperands operands(ctx, false, true, false, lod, offset); return Emit(&EmitContext::OpImageSparseSampleDrefExplicitLod, &EmitContext::OpImageSampleDrefExplicitLod, ctx, inst, ctx.F32[1], diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index 4c3e101433..4414020827 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -460,8 +460,9 @@ void VectorTypes::Define(Sirit::Module& sirit_ctx, Id base_type, std::string_vie EmitContext::EmitContext(const Profile& profile_, const RuntimeInfo& runtime_info_, IR::Program& program, Bindings& bindings) - : Sirit::Module(profile_.supported_spirv), profile{profile_}, runtime_info{runtime_info_}, - stage{program.stage}, texture_rescaling_index{bindings.texture_scaling_index}, + : Sirit::Module(profile_.supported_spirv), profile{profile_}, options{program.options}, + runtime_info{runtime_info_}, stage{program.stage}, + texture_rescaling_index{bindings.texture_scaling_index}, image_rescaling_index{bindings.image_scaling_index} { const bool is_unified{profile.unified_descriptor_binding}; u32& uniform_binding{is_unified ? bindings.unified : bindings.uniform_buffer}; @@ -1358,7 +1359,10 @@ void EmitContext::DefineImageBuffers(const Info& info, u32& binding) { void EmitContext::DefineTextures(const Info& info, u32& binding, u32& scaling_index) { textures.reserve(info.texture_descriptors.size()); - for (const TextureDescriptor& desc : info.texture_descriptors) { + for (size_t tex_index = 0; tex_index < info.texture_descriptors.size(); ++tex_index) { + const TextureDescriptor& desc = info.texture_descriptors[tex_index]; + const TextureMeta* meta = + tex_index < info.texture_metas.size() ? &info.texture_metas[tex_index] : nullptr; const Id image_type{ImageType(*this, desc)}; const Id sampled_type{TypeSampledImage(image_type)}; const Id pointer_type{TypePointer(spv::StorageClass::UniformConstant, sampled_type)}; @@ -1372,6 +1376,7 @@ void EmitContext::DefineTextures(const Info& info, u32& binding, u32& scaling_in .sampled_type = sampled_type, .pointer_type = pointer_type, .image_type = image_type, + .meta = meta, .count = desc.count, .is_multisample = desc.is_multisample, }); diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.h b/src/shader_recompiler/backend/spirv/spirv_emit_context.h index 66cdb1d3db..3b9985c76d 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.h +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.h @@ -36,6 +36,7 @@ struct TextureDefinition { Id sampled_type; Id pointer_type; Id image_type; + const TextureMeta* meta; u32 count; bool is_multisample; }; @@ -205,6 +206,7 @@ public: } const Profile& profile; + const RecompilerOptions& options; const RuntimeInfo& runtime_info; Stage stage{}; diff --git a/src/shader_recompiler/environment.h b/src/shader_recompiler/environment.h index 5dbbc7e61e..9a8cb5af36 100644 --- a/src/shader_recompiler/environment.h +++ b/src/shader_recompiler/environment.h @@ -4,6 +4,7 @@ #pragma once #include +#include #include "common/common_types.h" #include "shader_recompiler/program_header.h" @@ -26,6 +27,9 @@ public: [[nodiscard]] virtual bool IsTexturePixelFormatInteger(u32 raw_handle) = 0; + [[nodiscard]] virtual std::optional ReadTextureCompareFunction( + u32 raw_handle) = 0; + [[nodiscard]] virtual u32 ReadViewportTransformState() = 0; [[nodiscard]] virtual u32 TextureBoundBuffer() const = 0; diff --git a/src/shader_recompiler/frontend/ir/program.h b/src/shader_recompiler/frontend/ir/program.h index 6b4a05c598..cda00e0adb 100644 --- a/src/shader_recompiler/frontend/ir/program.h +++ b/src/shader_recompiler/frontend/ir/program.h @@ -19,6 +19,7 @@ struct Program { BlockList blocks; BlockList post_order_blocks; Info info; + RecompilerOptions options; Stage stage{}; std::array workgroup_size{}; OutputTopology output_topology{}; diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp index 97b9b0cf07..e05e14becc 100644 --- a/src/shader_recompiler/frontend/maxwell/translate_program.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp @@ -241,8 +241,10 @@ void LowerGeometryPassthrough(const IR::Program& program, const HostTranslateInf } // Anonymous namespace IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool& block_pool, - Environment& env, Flow::CFG& cfg, const HostTranslateInfo& host_info) { + Environment& env, Flow::CFG& cfg, const HostTranslateInfo& host_info, + const RecompilerOptions& options) { IR::Program program; + program.options = options; program.syntax_list = BuildASL(inst_pool, block_pool, env, cfg, host_info); program.blocks = GenerateBlocks(program.syntax_list); program.post_order_blocks = PostOrder(program.syntax_list.front()); @@ -285,6 +287,8 @@ IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool& inst_pool, ObjectPool& inst_pool, IR::Program& source_program, Shader::OutputTopology output_topology) { IR::Program program; + program.options = source_program.options; program.stage = Stage::Geometry; program.output_topology = output_topology; program.output_vertices = GetOutputTopologyVertices(output_topology); diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.h b/src/shader_recompiler/frontend/maxwell/translate_program.h index 497afe7cb9..923c48febd 100644 --- a/src/shader_recompiler/frontend/maxwell/translate_program.h +++ b/src/shader_recompiler/frontend/maxwell/translate_program.h @@ -12,13 +12,23 @@ namespace Shader { struct HostTranslateInfo; +struct RecompilerOptions; } namespace Shader::Maxwell { [[nodiscard]] IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool& block_pool, Environment& env, - Flow::CFG& cfg, const HostTranslateInfo& host_info); + Flow::CFG& cfg, const HostTranslateInfo& host_info, + const RecompilerOptions& options); + +[[nodiscard]] inline IR::Program TranslateProgram(ObjectPool& inst_pool, + ObjectPool& block_pool, + Environment& env, Flow::CFG& cfg, + const HostTranslateInfo& host_info) { + Shader::RecompilerOptions options{}; + return TranslateProgram(inst_pool, block_pool, env, cfg, host_info, options); +} [[nodiscard]] IR::Program MergeDualVertexPrograms(IR::Program& vertex_a, IR::Program& vertex_b, Environment& env_vertex_b); diff --git a/src/shader_recompiler/ir_opt/amd_fp64_varying_lowering.cpp b/src/shader_recompiler/ir_opt/amd_fp64_varying_lowering.cpp new file mode 100644 index 0000000000..c40773d268 --- /dev/null +++ b/src/shader_recompiler/ir_opt/amd_fp64_varying_lowering.cpp @@ -0,0 +1,83 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + +#include + +#include "shader_recompiler/frontend/ir/attribute.h" +#include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/ir_emitter.h" +#include "shader_recompiler/frontend/ir/program.h" +#include "shader_recompiler/frontend/ir/value.h" +#include "shader_recompiler/runtime_info.h" +#include "shader_recompiler/ir_opt/passes.h" + +namespace Shader::Optimization { +namespace { + +bool ConvertArgToF32(IR::Block& block, IR::Inst& inst, size_t arg_index) { + const IR::Value value{inst.Arg(arg_index)}; + if ((value.Type() & IR::Type::F64) == IR::Type::Void) { + return false; + } + + IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst)); + const IR::F16F32F64 converted{ir.FPConvert(32, IR::F16F32F64{value})}; + inst.SetArg(arg_index, converted); + return true; +} + +} // namespace + +void AmdFp64VaryingLoweringPass(IR::Program& program) { + if (program.stage == Stage::Fragment || program.stage == Stage::Compute) { + return; + } + + for (IR::Block* const block : program.blocks) { + for (IR::Inst& inst : block->Instructions()) { + switch (inst.GetOpcode()) { + case IR::Opcode::SetAttribute: { + if (!ConvertArgToF32(*block, inst, 1)) { + break; + } + const IR::Attribute attr{inst.Arg(0).Attribute()}; + if (IR::IsGeneric(attr)) { + const size_t index{IR::GenericAttributeIndex(attr)}; + program.info.amd_converted_fp64_varyings[index] = true; + } + break; + } + case IR::Opcode::SetAttributeIndexed: { + if (ConvertArgToF32(*block, inst, 1)) { + program.info.amd_converted_fp64_varyings_indexed = true; + } + break; + } + default: + break; + } + } + } +} + +void AmdFp64VaryingPostProcess(IR::Program& program, const RuntimeInfo& runtime_info) { + if (program.stage != Stage::Fragment) { + return; + } + + const bool indexed = runtime_info.amd_converted_fp64_varyings_indexed; + const auto& converted = runtime_info.amd_converted_fp64_varyings; + + if (!indexed && + std::none_of(converted.begin(), converted.end(), [](bool value) { return value; })) { + return; + } + + for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { + if (indexed || converted[index]) { + program.info.interpolation[index] = Interpolation::Smooth; + } + } +} + +} // namespace Shader::Optimization diff --git a/src/shader_recompiler/ir_opt/passes.h b/src/shader_recompiler/ir_opt/passes.h index 1e637cb23c..b5a6863616 100644 --- a/src/shader_recompiler/ir_opt/passes.h +++ b/src/shader_recompiler/ir_opt/passes.h @@ -8,6 +8,7 @@ namespace Shader { struct HostTranslateInfo; +struct RuntimeInfo; } namespace Shader::Optimization { @@ -21,6 +22,8 @@ void IdentityRemovalPass(IR::Program& program); void LowerFp64ToFp32(IR::Program& program); void LowerFp16ToFp32(IR::Program& program); void LowerInt64ToInt32(IR::Program& program); +void AmdFp64VaryingLoweringPass(IR::Program& program); +void AmdFp64VaryingPostProcess(IR::Program& program, const RuntimeInfo& runtime_info); void RescalingPass(IR::Program& program); void SsaRewritePass(IR::Program& program); void PositionPass(Environment& env, IR::Program& program); diff --git a/src/shader_recompiler/ir_opt/texture_pass.cpp b/src/shader_recompiler/ir_opt/texture_pass.cpp index 7ff1961172..9bb60f3216 100644 --- a/src/shader_recompiler/ir_opt/texture_pass.cpp +++ b/src/shader_recompiler/ir_opt/texture_pass.cpp @@ -490,10 +490,12 @@ public: explicit Descriptors(TextureBufferDescriptors& texture_buffer_descriptors_, ImageBufferDescriptors& image_buffer_descriptors_, TextureDescriptors& texture_descriptors_, + TextureMetas& texture_metas_, ImageDescriptors& image_descriptors_) : texture_buffer_descriptors{texture_buffer_descriptors_}, image_buffer_descriptors{image_buffer_descriptors_}, - texture_descriptors{texture_descriptors_}, image_descriptors{image_descriptors_} {} + texture_descriptors{texture_descriptors_}, texture_metas{texture_metas_}, + image_descriptors{image_descriptors_} {} u32 Add(const TextureBufferDescriptor& desc) { return Add(texture_buffer_descriptors, desc, [&desc](const auto& existing) { @@ -520,7 +522,8 @@ public: return index; } - u32 Add(const TextureDescriptor& desc) { + u32 Add(const TextureDescriptor& desc, const TextureMeta& meta) { + const u32 previous_size = static_cast(texture_descriptors.size()); const u32 index{Add(texture_descriptors, desc, [&desc](const auto& existing) { return desc.type == existing.type && desc.is_depth == existing.is_depth && desc.has_secondary == existing.has_secondary && @@ -534,6 +537,27 @@ public: })}; // TODO: Read this from TIC texture_descriptors[index].is_multisample |= desc.is_multisample; + if (index == previous_size) { + texture_metas.push_back(meta); + } else { + if (texture_metas.size() <= index) { + texture_metas.resize(texture_descriptors.size()); + } + auto& existing_meta = texture_metas[index]; + existing_meta.declared_depth |= meta.declared_depth; + const bool existing_depth_like = IsDepthLike(existing_meta); + const bool incoming_depth_like = IsDepthLike(meta); + if (!existing_depth_like || incoming_depth_like) { + existing_meta.guest_format = meta.guest_format; + } + existing_meta.manual_compare |= meta.manual_compare; + if (meta.manual_compare) { + existing_meta.guest_format = meta.guest_format; + } + if (meta.compare_func) { + existing_meta.compare_func = meta.compare_func; + } + } return index; } @@ -565,6 +589,7 @@ private: TextureBufferDescriptors& texture_buffer_descriptors; ImageBufferDescriptors& image_buffer_descriptors; TextureDescriptors& texture_descriptors; + TextureMetas& texture_metas; ImageDescriptors& image_descriptors; }; @@ -650,12 +675,11 @@ void TexturePass(Environment& env, IR::Program& program, const HostTranslateInfo if (a.cbuf.index != b.cbuf.index) return a.cbuf.index < b.cbuf.index; return a.cbuf.offset < b.cbuf.offset; }); - Descriptors descriptors{ - program.info.texture_buffer_descriptors, - program.info.image_buffer_descriptors, - program.info.texture_descriptors, - program.info.image_descriptors, - }; + Descriptors descriptors{program.info.texture_buffer_descriptors, + program.info.image_buffer_descriptors, + program.info.texture_descriptors, + program.info.texture_metas, + program.info.image_descriptors}; for (TextureInst& texture_inst : to_replace) { // TODO: Handle arrays IR::Inst* const inst{texture_inst.inst}; @@ -759,7 +783,11 @@ void TexturePass(Environment& env, IR::Program& program, const HostTranslateInfo .size_shift = DESCRIPTOR_SIZE_SHIFT, }); } else { - index = descriptors.Add(TextureDescriptor{ + const u32 handle = GetTextureHandleCached(env, cbuf); + const TexturePixelFormat pixel_format = env.ReadTexturePixelFormat(handle); + const std::optional compare_func = + env.ReadTextureCompareFunction(handle); + TextureDescriptor desc{ .type = flags.type, .is_depth = flags.is_depth != 0, .is_multisample = is_multisample, @@ -772,7 +800,22 @@ void TexturePass(Environment& env, IR::Program& program, const HostTranslateInfo .secondary_shift_left = cbuf.secondary_shift_left, .count = cbuf.count, .size_shift = DESCRIPTOR_SIZE_SHIFT, - }); + }; + TextureMeta meta{ + .guest_format = pixel_format, + .declared_depth = flags.is_depth != 0, + .manual_compare = false, + .compare_func = compare_func, + }; + const bool supports_native = SupportsNativeDepthCompare(meta); + const bool manual_compare = + program.options.amd_depth_compare_workaround && compare_func.has_value() && + (!supports_native || flags.is_depth == 0); + if (manual_compare) { + meta.manual_compare = true; + desc.is_depth = false; + } + index = descriptors.Add(desc, meta); } break; } @@ -797,23 +840,24 @@ void TexturePass(Environment& env, IR::Program& program, const HostTranslateInfo } } } + + program.info.texture_metas.resize(program.info.texture_descriptors.size()); } void JoinTextureInfo(Info& base, Info& source) { - Descriptors descriptors{ - base.texture_buffer_descriptors, - base.image_buffer_descriptors, - base.texture_descriptors, - base.image_descriptors, - }; + Descriptors descriptors{base.texture_buffer_descriptors, + base.image_buffer_descriptors, + base.texture_descriptors, + base.texture_metas, + base.image_descriptors}; for (auto& desc : source.texture_buffer_descriptors) { descriptors.Add(desc); } for (auto& desc : source.image_buffer_descriptors) { descriptors.Add(desc); } - for (auto& desc : source.texture_descriptors) { - descriptors.Add(desc); + for (size_t index = 0; index < source.texture_descriptors.size(); ++index) { + descriptors.Add(source.texture_descriptors[index], source.texture_metas[index]); } for (auto& desc : source.image_descriptors) { descriptors.Add(desc); diff --git a/src/shader_recompiler/runtime_info.h b/src/shader_recompiler/runtime_info.h index dc54d932a6..a9a755f3f5 100644 --- a/src/shader_recompiler/runtime_info.h +++ b/src/shader_recompiler/runtime_info.h @@ -86,6 +86,9 @@ struct RuntimeInfo { bool convert_depth_mode{}; bool force_early_z{}; + std::array amd_converted_fp64_varyings{}; + bool amd_converted_fp64_varyings_indexed{}; + TessPrimitive tess_primitive{}; TessSpacing tess_spacing{}; bool tess_clockwise{}; diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index ed13e68209..ba57905bd5 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h @@ -6,6 +6,7 @@ #include #include #include +#include #include "common/common_types.h" #include "shader_recompiler/frontend/ir/type.h" @@ -18,8 +19,15 @@ namespace Shader { enum class ReplaceConstant : u32 { BaseInstance, - BaseVertex, - DrawID, + BaseVertex, + DrawID, +}; + +enum class CompareFunction; + +struct RecompilerOptions { + bool amd_depth_compare_workaround = false; + bool amd_fp64_varying_lowering = false; }; enum class TextureType : u32 { @@ -220,6 +228,56 @@ struct TextureDescriptor { }; using TextureDescriptors = boost::container::small_vector; +struct TextureMeta { + TexturePixelFormat guest_format{TexturePixelFormat::A8B8G8R8_UNORM}; + bool declared_depth{}; + bool manual_compare{}; + std::optional compare_func{}; +}; +using TextureMetas = boost::container::small_vector; + +[[nodiscard]] inline bool IsDepthLike(const TextureMeta& meta) { + if (meta.declared_depth) { + return true; + } + switch (meta.guest_format) { + case TexturePixelFormat::D16_UNORM: + case TexturePixelFormat::X8_D24_UNORM: + case TexturePixelFormat::D24_UNORM_S8_UINT: + case TexturePixelFormat::S8_UINT_D24_UNORM: + case TexturePixelFormat::D32_FLOAT: + case TexturePixelFormat::D32_FLOAT_S8_UINT: + return true; + default: + return false; + } +} + +[[nodiscard]] inline bool NeedsD24Quantization(const TextureMeta& meta) { + switch (meta.guest_format) { + case TexturePixelFormat::X8_D24_UNORM: + case TexturePixelFormat::D24_UNORM_S8_UINT: + case TexturePixelFormat::S8_UINT_D24_UNORM: + return true; + default: + return false; + } +} + +[[nodiscard]] inline bool SupportsNativeDepthCompare(const TextureMeta& meta) { + switch (meta.guest_format) { + case TexturePixelFormat::D16_UNORM: + case TexturePixelFormat::X8_D24_UNORM: + case TexturePixelFormat::D24_UNORM_S8_UINT: + case TexturePixelFormat::S8_UINT_D24_UNORM: + case TexturePixelFormat::D32_FLOAT: + case TexturePixelFormat::D32_FLOAT_S8_UINT: + return true; + default: + return false; + } +} + struct ImageDescriptor { TextureType type; ImageFormat format; @@ -255,6 +313,9 @@ struct Info { VaryingState stores; VaryingState passthrough; + std::array amd_converted_fp64_varyings{}; + bool amd_converted_fp64_varyings_indexed{}; + std::map legacy_stores_mapping; bool loads_indexed_attributes{}; @@ -332,6 +393,7 @@ struct Info { TextureBufferDescriptors texture_buffer_descriptors; ImageBufferDescriptors image_buffer_descriptors; TextureDescriptors texture_descriptors; + TextureMetas texture_metas; ImageDescriptors image_descriptors; }; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 45f729698e..ad1a8025aa 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -21,6 +21,7 @@ #include "shader_recompiler/backend/glasm/emit_glasm.h" #include "shader_recompiler/backend/glsl/emit_glsl.h" #include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/ir_opt/passes.h" #include "shader_recompiler/frontend/ir/program.h" #include "shader_recompiler/frontend/maxwell/control_flow.h" #include "shader_recompiler/frontend/maxwell/translate_program.h" @@ -533,6 +534,9 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline( switch (device.GetShaderBackend()) { case Settings::ShaderBackend::Glsl: ConvertLegacyToGeneric(program, runtime_info); + if (program.options.amd_fp64_varying_lowering) { + Shader::Optimization::AmdFp64VaryingPostProcess(program, runtime_info); + } sources[stage_index] = EmitGLSL(profile, runtime_info, program, binding); break; case Settings::ShaderBackend::Glasm: @@ -540,6 +544,9 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline( break; case Settings::ShaderBackend::SpirV: ConvertLegacyToGeneric(program, runtime_info); + if (program.options.amd_fp64_varying_lowering) { + Shader::Optimization::AmdFp64VaryingPostProcess(program, runtime_info); + } sources_spirv[stage_index] = EmitSPIRV(profile, runtime_info, program, binding, this->optimize_spirv_output); break; diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 2d9c5d4148..adadd89f6d 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -158,12 +158,18 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, for (const auto& desc : info.image_buffer_descriptors) { add_image(desc, false); } - for (const auto& desc : info.texture_descriptors) { + for (size_t desc_index = 0; desc_index < info.texture_descriptors.size(); ++desc_index) { + const auto& desc = info.texture_descriptors[desc_index]; + const auto& meta = info.texture_metas[desc_index]; for (u32 index = 0; index < desc.count; ++index) { const auto handle{read_handle(desc, index)}; views.push_back({handle.first}); - VideoCommon::SamplerId sampler = texture_cache.GetComputeSamplerId(handle.second); + const bool manual_compare = meta.manual_compare; + VideoCommon::SamplerId sampler = manual_compare + ? texture_cache.GetComputeManualSamplerId( + handle.second) + : texture_cache.GetComputeSamplerId(handle.second); samplers.push_back(sampler); } } diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 745389213e..34f46e1dc8 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -371,12 +371,19 @@ bool GraphicsPipeline::ConfigureImpl(bool is_indexed) { add_image(desc, false); } } - for (const auto& desc : info.texture_descriptors) { + for (size_t desc_index = 0; desc_index < info.texture_descriptors.size(); ++desc_index) { + const auto& desc = info.texture_descriptors[desc_index]; + const auto& meta = info.texture_metas[desc_index]; for (u32 index = 0; index < desc.count; ++index) { const auto handle{read_handle(desc, index)}; views[view_index++] = {handle.first}; - VideoCommon::SamplerId sampler{texture_cache.GetGraphicsSamplerId(handle.second)}; + const bool manual_compare = meta.manual_compare; + VideoCommon::SamplerId sampler = manual_compare + ? texture_cache.GetGraphicsManualSamplerId( + handle.second) + : texture_cache.GetGraphicsSamplerId( + handle.second); samplers[sampler_index++] = sampler; } } diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 10ee14773f..1052514347 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -19,6 +19,7 @@ #include "common/thread_worker.h" #include "core/core.h" #include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/ir_opt/passes.h" #include "shader_recompiler/environment.h" #include "shader_recompiler/frontend/maxwell/control_flow.h" #include "shader_recompiler/frontend/maxwell/translate_program.h" @@ -151,6 +152,9 @@ Shader::RuntimeInfo MakeRuntimeInfo(std::span program if (previous_program) { info.previous_stage_stores = previous_program->info.stores; info.previous_stage_legacy_stores_mapping = previous_program->info.legacy_stores_mapping; + info.amd_converted_fp64_varyings = previous_program->info.amd_converted_fp64_varyings; + info.amd_converted_fp64_varyings_indexed = + previous_program->info.amd_converted_fp64_varyings_indexed; if (previous_program->is_geometry_passthrough) { info.previous_stage_stores.mask |= previous_program->info.passthrough.mask; } @@ -381,6 +385,10 @@ PipelineCache::PipelineCache(Tegra::MaxwellDeviceMemoryManager& device_memory_, .max_user_clip_distances = device.GetMaxUserClipDistances(), }; + const bool is_amd_vendor = device.IsAmdVendor(); + recompiler_options.amd_depth_compare_workaround = is_amd_vendor; + recompiler_options.amd_fp64_varying_lowering = is_amd_vendor; + host_info = Shader::HostTranslateInfo{ .support_float64 = device.IsFloat64Supported(), .support_float16 = device.IsFloat16Supported(), @@ -646,11 +654,13 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline( Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset, index == 0); if (!uses_vertex_a || index != 1) { // Normal path - programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg, host_info); + programs[index] = + TranslateProgram(pools.inst, pools.block, env, cfg, host_info, recompiler_options); } else { // VertexB path when VertexA is present. auto& program_va{programs[0]}; - auto program_vb{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)}; + auto program_vb{ + TranslateProgram(pools.inst, pools.block, env, cfg, host_info, recompiler_options)}; programs[index] = MergeDualVertexPrograms(program_va, program_vb, env); } @@ -682,7 +692,11 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline( const auto runtime_info{MakeRuntimeInfo(programs, key, program, previous_stage)}; ConvertLegacyToGeneric(program, runtime_info); - const std::vector code{EmitSPIRV(profile, runtime_info, program, binding, this->optimize_spirv_output)}; + if (program.options.amd_fp64_varying_lowering) { + Shader::Optimization::AmdFp64VaryingPostProcess(program, runtime_info); + } + const std::vector code{ + EmitSPIRV(profile, runtime_info, program, binding, this->optimize_spirv_output)}; device.SaveShader(code); modules[stage_index] = BuildShader(device, code); if (device.HasDebuggingToolAttached()) { @@ -775,7 +789,8 @@ std::unique_ptr PipelineCache::CreateComputePipeline( env.Dump(hash, key.unique_hash); } - auto program{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)}; + auto program{ + TranslateProgram(pools.inst, pools.block, env, cfg, host_info, recompiler_options)}; const std::vector code{EmitSPIRV(profile, program, this->optimize_spirv_output)}; device.SaveShader(code); vk::ShaderModule spv_module{BuildShader(device, code)}; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index 7909bd8cf0..8d5a0df424 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -19,6 +19,7 @@ #include "shader_recompiler/host_translate_info.h" #include "shader_recompiler/object_pool.h" #include "shader_recompiler/profile.h" +#include "shader_recompiler/shader_info.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/host1x/gpu_device_memory_manager.h" #include "video_core/renderer_vulkan/fixed_pipeline_state.h" @@ -161,6 +162,7 @@ private: ShaderPools main_pools; Shader::Profile profile; + Shader::RecompilerOptions recompiler_options; Shader::HostTranslateInfo host_info; std::filesystem::path pipeline_cache_filename; diff --git a/src/video_core/shader_environment.cpp b/src/video_core/shader_environment.cpp index 28357bf10b..28e5ff67c1 100644 --- a/src/video_core/shader_environment.cpp +++ b/src/video_core/shader_environment.cpp @@ -20,6 +20,7 @@ #include "common/logging/log.h" #include #include "shader_recompiler/environment.h" +#include "shader_recompiler/runtime_info.h" #include "video_core/engines/kepler_compute.h" #include "video_core/memory_manager.h" #include "video_core/shader_environment.h" @@ -34,6 +35,35 @@ constexpr size_t INST_SIZE = sizeof(u64); using Maxwell = Tegra::Engines::Maxwell3D::Regs; +namespace { + +Shader::CompareFunction ConvertCompareFunction(Tegra::Texture::DepthCompareFunc func) { + using TegraFunc = Tegra::Texture::DepthCompareFunc; + switch (func) { + case TegraFunc::Never: + return Shader::CompareFunction::Never; + case TegraFunc::Less: + return Shader::CompareFunction::Less; + case TegraFunc::LessEqual: + return Shader::CompareFunction::LessThanEqual; + case TegraFunc::Equal: + return Shader::CompareFunction::Equal; + case TegraFunc::NotEqual: + return Shader::CompareFunction::NotEqual; + case TegraFunc::Greater: + return Shader::CompareFunction::Greater; + case TegraFunc::GreaterEqual: + return Shader::CompareFunction::GreaterThanEqual; + case TegraFunc::Always: + return Shader::CompareFunction::Always; + default: + UNIMPLEMENTED_MSG("Unimplemented depth compare func {}", static_cast(func)); + return Shader::CompareFunction::Always; + } +} + +} // Anonymous namespace + static u64 MakeCbufKey(u32 index, u32 offset) { return (static_cast(index) << 32) | offset; } @@ -200,6 +230,7 @@ void GenericEnvironment::Serialize(std::ofstream& file) const { const u64 code_size{static_cast(CachedSizeBytes())}; const u64 num_texture_types{static_cast(texture_types.size())}; const u64 num_texture_pixel_formats{static_cast(texture_pixel_formats.size())}; + const u64 num_texture_compare_funcs{static_cast(texture_compare_funcs.size())}; const u64 num_cbuf_values{static_cast(cbuf_values.size())}; const u64 num_cbuf_replacement_values{static_cast(cbuf_replacements.size())}; @@ -207,6 +238,8 @@ void GenericEnvironment::Serialize(std::ofstream& file) const { .write(reinterpret_cast(&num_texture_types), sizeof(num_texture_types)) .write(reinterpret_cast(&num_texture_pixel_formats), sizeof(num_texture_pixel_formats)) + .write(reinterpret_cast(&num_texture_compare_funcs), + sizeof(num_texture_compare_funcs)) .write(reinterpret_cast(&num_cbuf_values), sizeof(num_cbuf_values)) .write(reinterpret_cast(&num_cbuf_replacement_values), sizeof(num_cbuf_replacement_values)) @@ -227,6 +260,15 @@ void GenericEnvironment::Serialize(std::ofstream& file) const { file.write(reinterpret_cast(&key), sizeof(key)) .write(reinterpret_cast(&format), sizeof(format)); } + for (const auto& [key, compare] : texture_compare_funcs) { + const bool has_value{compare.has_value()}; + file.write(reinterpret_cast(&key), sizeof(key)) + .write(reinterpret_cast(&has_value), sizeof(has_value)); + if (has_value) { + const Shader::CompareFunction value{*compare}; + file.write(reinterpret_cast(&value), sizeof(value)); + } + } for (const auto& [key, type] : cbuf_values) { file.write(reinterpret_cast(&key), sizeof(key)) .write(reinterpret_cast(&type), sizeof(type)); @@ -284,6 +326,16 @@ Tegra::Texture::TICEntry GenericEnvironment::ReadTextureInfo(GPUVAddr tic_addr, return entry; } +Tegra::Texture::TSCEntry GenericEnvironment::ReadSamplerInfo(GPUVAddr tsc_addr, u32 tsc_limit, + bool via_header_index, u32 raw) { + const auto handle{Tegra::Texture::TexturePair(raw, via_header_index)}; + ASSERT(handle.second <= tsc_limit); + const GPUVAddr descriptor_addr{tsc_addr + handle.second * sizeof(Tegra::Texture::TSCEntry)}; + Tegra::Texture::TSCEntry entry; + gpu_memory->ReadBlock(descriptor_addr, &entry, sizeof(entry)); + return entry; +} + GraphicsEnvironment::GraphicsEnvironment(Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::MemoryManager& gpu_memory_, Maxwell::ShaderType program, GPUVAddr program_base_, @@ -392,6 +444,24 @@ bool GraphicsEnvironment::IsTexturePixelFormatInteger(u32 handle) { static_cast(ReadTexturePixelFormat(handle))); } +std::optional GraphicsEnvironment::ReadTextureCompareFunction( + u32 handle) { + if (const auto it = texture_compare_funcs.find(handle); it != texture_compare_funcs.end()) { + return it->second; + } + const auto& regs{maxwell3d->regs}; + const bool via_header_index{regs.sampler_binding == Maxwell::SamplerBinding::ViaHeaderBinding}; + const auto entry = + ReadSamplerInfo(regs.tex_sampler.Address(), regs.tex_sampler.limit, via_header_index, handle); + if (entry.depth_compare_enabled == 0) { + texture_compare_funcs.emplace(handle, std::nullopt); + return std::nullopt; + } + const auto compare = ConvertCompareFunction(entry.depth_compare_func); + texture_compare_funcs.emplace(handle, compare); + return compare; +} + u32 GraphicsEnvironment::ReadViewportTransformState() { const auto& regs{maxwell3d->regs}; viewport_transform_state = regs.viewport_scale_offset_enabled; @@ -447,6 +517,24 @@ bool ComputeEnvironment::IsTexturePixelFormatInteger(u32 handle) { static_cast(ReadTexturePixelFormat(handle))); } +std::optional ComputeEnvironment::ReadTextureCompareFunction( + u32 handle) { + if (const auto it = texture_compare_funcs.find(handle); it != texture_compare_funcs.end()) { + return it->second; + } + const auto& regs{kepler_compute->regs}; + const auto& qmd{kepler_compute->launch_description}; + const auto entry = + ReadSamplerInfo(regs.tsc.Address(), regs.tsc.limit, qmd.linked_tsc != 0, handle); + if (entry.depth_compare_enabled == 0) { + texture_compare_funcs.emplace(handle, std::nullopt); + return std::nullopt; + } + const auto compare = ConvertCompareFunction(entry.depth_compare_func); + texture_compare_funcs.emplace(handle, compare); + return compare; +} + u32 ComputeEnvironment::ReadViewportTransformState() { return viewport_transform_state; } @@ -455,12 +543,15 @@ void FileEnvironment::Deserialize(std::ifstream& file) { u64 code_size{}; u64 num_texture_types{}; u64 num_texture_pixel_formats{}; + u64 num_texture_compare_funcs{}; u64 num_cbuf_values{}; u64 num_cbuf_replacement_values{}; file.read(reinterpret_cast(&code_size), sizeof(code_size)) .read(reinterpret_cast(&num_texture_types), sizeof(num_texture_types)) .read(reinterpret_cast(&num_texture_pixel_formats), sizeof(num_texture_pixel_formats)) + .read(reinterpret_cast(&num_texture_compare_funcs), + sizeof(num_texture_compare_funcs)) .read(reinterpret_cast(&num_cbuf_values), sizeof(num_cbuf_values)) .read(reinterpret_cast(&num_cbuf_replacement_values), sizeof(num_cbuf_replacement_values)) @@ -487,6 +578,19 @@ void FileEnvironment::Deserialize(std::ifstream& file) { .read(reinterpret_cast(&format), sizeof(format)); texture_pixel_formats.emplace(key, format); } + for (size_t i = 0; i < num_texture_compare_funcs; ++i) { + u32 key; + bool has_value{}; + file.read(reinterpret_cast(&key), sizeof(key)) + .read(reinterpret_cast(&has_value), sizeof(has_value)); + if (has_value) { + Shader::CompareFunction value; + file.read(reinterpret_cast(&value), sizeof(value)); + texture_compare_funcs.emplace(key, value); + } else { + texture_compare_funcs.emplace(key, std::nullopt); + } + } for (size_t i = 0; i < num_cbuf_values; ++i) { u64 key; u32 value; @@ -555,6 +659,14 @@ bool FileEnvironment::IsTexturePixelFormatInteger(u32 handle) { static_cast(ReadTexturePixelFormat(handle))); } +std::optional FileEnvironment::ReadTextureCompareFunction(u32 handle) { + const auto it{texture_compare_funcs.find(handle)}; + if (it == texture_compare_funcs.end()) { + throw Shader::LogicError("Uncached read texture compare function"); + } + return it->second; +} + u32 FileEnvironment::ReadViewportTransformState() { return viewport_transform_state; } diff --git a/src/video_core/shader_environment.h b/src/video_core/shader_environment.h index 95c2d79277..e4957789d5 100644 --- a/src/video_core/shader_environment.h +++ b/src/video_core/shader_environment.h @@ -74,6 +74,8 @@ protected: Tegra::Texture::TICEntry ReadTextureInfo(GPUVAddr tic_addr, u32 tic_limit, bool via_header_index, u32 raw); + Tegra::Texture::TSCEntry ReadSamplerInfo(GPUVAddr tsc_addr, u32 tsc_limit, + bool via_header_index, u32 raw); Tegra::MemoryManager* gpu_memory{}; GPUVAddr program_base{}; @@ -81,6 +83,7 @@ protected: std::vector code; std::unordered_map texture_types; std::unordered_map texture_pixel_formats; + std::unordered_map> texture_compare_funcs; std::unordered_map cbuf_values; std::unordered_map cbuf_replacements; @@ -120,6 +123,8 @@ public: bool IsTexturePixelFormatInteger(u32 handle) override; + std::optional ReadTextureCompareFunction(u32 handle) override; + u32 ReadViewportTransformState() override; std::optional GetReplaceConstBuffer(u32 bank, u32 offset) override; @@ -146,6 +151,8 @@ public: bool IsTexturePixelFormatInteger(u32 handle) override; + std::optional ReadTextureCompareFunction(u32 handle) override; + u32 ReadViewportTransformState() override; std::optional GetReplaceConstBuffer( @@ -180,6 +187,9 @@ public: [[nodiscard]] bool IsTexturePixelFormatInteger(u32 handle) override; + [[nodiscard]] std::optional ReadTextureCompareFunction( + u32 handle) override; + [[nodiscard]] u32 ReadViewportTransformState() override; [[nodiscard]] u32 LocalMemorySize() const override; @@ -203,6 +213,7 @@ private: std::vector code; std::unordered_map texture_types; std::unordered_map texture_pixel_formats; + std::unordered_map> texture_compare_funcs; std::unordered_map cbuf_values; std::unordered_map cbuf_replacements; std::array workgroup_size{}; diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 2a44a5e8b2..51a7e1bf70 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -264,7 +264,11 @@ SamplerId TextureCache

::GetGraphicsSamplerId(u32 index) { const auto [descriptor, is_new] = channel_state->graphics_sampler_table.Read(index); SamplerId& id = channel_state->graphics_sampler_ids[index]; if (is_new) { - id = FindSampler(descriptor); + channel_state->graphics_manual_sampler_ids[index] = CORRUPT_ID; + id = CORRUPT_ID; + } + if (id == CORRUPT_ID) { + id = FindSampler(descriptor, false); } return id; } @@ -278,11 +282,49 @@ SamplerId TextureCache

::GetComputeSamplerId(u32 index) { const auto [descriptor, is_new] = channel_state->compute_sampler_table.Read(index); SamplerId& id = channel_state->compute_sampler_ids[index]; if (is_new) { - id = FindSampler(descriptor); + channel_state->compute_manual_sampler_ids[index] = CORRUPT_ID; + id = CORRUPT_ID; + } + if (id == CORRUPT_ID) { + id = FindSampler(descriptor, false); } return id; } +template +SamplerId TextureCache

::GetGraphicsManualSamplerId(u32 index) { + if (index > channel_state->graphics_sampler_table.Limit()) { + LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index); + return NULL_SAMPLER_ID; + } + const auto [descriptor, is_new] = channel_state->graphics_sampler_table.Read(index); + SamplerId& manual_id = channel_state->graphics_manual_sampler_ids[index]; + if (is_new) { + manual_id = CORRUPT_ID; + } + if (manual_id == CORRUPT_ID) { + manual_id = FindSampler(descriptor, true); + } + return manual_id; +} + +template +SamplerId TextureCache

::GetComputeManualSamplerId(u32 index) { + if (index > channel_state->compute_sampler_table.Limit()) { + LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index); + return NULL_SAMPLER_ID; + } + const auto [descriptor, is_new] = channel_state->compute_sampler_table.Read(index); + SamplerId& manual_id = channel_state->compute_manual_sampler_ids[index]; + if (is_new) { + manual_id = CORRUPT_ID; + } + if (manual_id == CORRUPT_ID) { + manual_id = FindSampler(descriptor, true); + } + return manual_id; +} + template const typename P::Sampler& TextureCache

::GetSampler(SamplerId id) const noexcept { return slot_samplers[id]; @@ -302,6 +344,7 @@ void TextureCache

::SynchronizeGraphicsDescriptors() { if (channel_state->graphics_sampler_table.Synchronize(maxwell3d->regs.tex_sampler.Address(), tsc_limit)) { channel_state->graphics_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID); + channel_state->graphics_manual_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID); } if (channel_state->graphics_image_table.Synchronize(maxwell3d->regs.tex_header.Address(), tic_limit)) { @@ -317,6 +360,7 @@ void TextureCache

::SynchronizeComputeDescriptors() { const GPUVAddr tsc_gpu_addr = kepler_compute->regs.tsc.Address(); if (channel_state->compute_sampler_table.Synchronize(tsc_gpu_addr, tsc_limit)) { channel_state->compute_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID); + channel_state->compute_manual_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID); } if (channel_state->compute_image_table.Synchronize(kepler_compute->regs.tic.Address(), tic_limit)) { @@ -1254,6 +1298,10 @@ void TextureCache

::InvalidateScale(Image& image) { if constexpr (ENABLE_VALIDATION) { std::ranges::fill(channel_info.graphics_image_view_ids, CORRUPT_ID); std::ranges::fill(channel_info.compute_image_view_ids, CORRUPT_ID); + std::ranges::fill(channel_info.graphics_sampler_ids, CORRUPT_ID); + std::ranges::fill(channel_info.compute_sampler_ids, CORRUPT_ID); + std::ranges::fill(channel_info.graphics_manual_sampler_ids, CORRUPT_ID); + std::ranges::fill(channel_info.compute_manual_sampler_ids, CORRUPT_ID); } channel_info.graphics_image_table.Invalidate(); channel_info.compute_image_table.Invalidate(); @@ -1730,7 +1778,14 @@ std::pair TextureCache

::PrepareDmaImage(ImageId dst_id, GPUVAddr ba } template -SamplerId TextureCache

::FindSampler(const TSCEntry& config) { +SamplerId TextureCache

::FindSampler(TSCEntry config, bool disable_compare) { + if (disable_compare) { + config.depth_compare_enabled.Assign(0); + config.depth_compare_func.Assign(Tegra::Texture::DepthCompareFunc::Always); + config.mag_filter.Assign(Tegra::Texture::TextureFilter::Nearest); + config.min_filter.Assign(Tegra::Texture::TextureFilter::Nearest); + config.mipmap_filter.Assign(Tegra::Texture::TextureMipmapFilter::Nearest); + } if (std::ranges::all_of(config.raw, [](u64 value) { return value == 0; })) { return NULL_SAMPLER_ID; } @@ -2243,6 +2298,10 @@ void TextureCache

::DeleteImage(ImageId image_id, bool immediate_delete) { if constexpr (ENABLE_VALIDATION) { std::ranges::fill(channel_info.graphics_image_view_ids, CORRUPT_ID); std::ranges::fill(channel_info.compute_image_view_ids, CORRUPT_ID); + std::ranges::fill(channel_info.graphics_sampler_ids, CORRUPT_ID); + std::ranges::fill(channel_info.compute_sampler_ids, CORRUPT_ID); + std::ranges::fill(channel_info.graphics_manual_sampler_ids, CORRUPT_ID); + std::ranges::fill(channel_info.compute_manual_sampler_ids, CORRUPT_ID); } channel_info.graphics_image_table.Invalidate(); channel_info.compute_image_table.Invalidate(); diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index 01a9a6a3f1..9a57b5dedd 100644 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h @@ -78,11 +78,13 @@ public: DescriptorTable graphics_image_table{gpu_memory}; DescriptorTable graphics_sampler_table{gpu_memory}; std::vector graphics_sampler_ids; + std::vector graphics_manual_sampler_ids; std::vector graphics_image_view_ids; DescriptorTable compute_image_table{gpu_memory}; DescriptorTable compute_sampler_table{gpu_memory}; std::vector compute_sampler_ids; + std::vector compute_manual_sampler_ids; std::vector compute_image_view_ids; std::unordered_map image_views; @@ -175,6 +177,12 @@ public: /// Get the sampler id from the compute descriptor table in the specified index SamplerId GetComputeSamplerId(u32 index); + + /// Get the manual sampler id from the graphics descriptor table in the specified index + SamplerId GetGraphicsManualSamplerId(u32 index); + + /// Get the manual sampler id from the compute descriptor table in the specified index + SamplerId GetComputeManualSamplerId(u32 index); /// Return a constant reference to the given sampler id [[nodiscard]] const Sampler& GetSampler(SamplerId id) const noexcept; @@ -347,7 +355,7 @@ private: const Tegra::Engines::Fermi2D::Config& copy); /// Find or create a sampler from a guest descriptor sampler - [[nodiscard]] SamplerId FindSampler(const TSCEntry& config); + [[nodiscard]] SamplerId FindSampler(TSCEntry config, bool disable_compare); /// Find or create an image view for the given color buffer index [[nodiscard]] ImageViewId FindColorBuffer(size_t index); diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index cb13f28523..03b40b8cd0 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -283,6 +283,14 @@ public: return properties.driver.driverID; } + u32 GetVendorID() const noexcept { + return properties.properties.vendorID; + } + + bool IsAmdVendor() const noexcept { + return GetVendorID() == 0x1002; + } + bool ShouldBoostClocks() const; /// Returns uniform buffer alignment requirement.