8 changed files with 198 additions and 0 deletions
-
1src/shader_recompiler/CMakeLists.txt
-
3src/shader_recompiler/frontend/maxwell/translate_program.cpp
-
1src/shader_recompiler/host_translate_info.h
-
185src/shader_recompiler/ir_opt/lower_fp64_to_fp32.cpp
-
1src/shader_recompiler/ir_opt/passes.h
-
1src/video_core/renderer_opengl/gl_shader_cache.cpp
-
1src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
-
5src/video_core/vulkan_common/vulkan_device.h
@ -0,0 +1,185 @@ |
|||||
|
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
|
||||
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
|
||||
|
#include "shader_recompiler/frontend/ir/ir_emitter.h"
|
||||
|
#include "shader_recompiler/frontend/ir/opcodes.h"
|
||||
|
#include "shader_recompiler/frontend/ir/value.h"
|
||||
|
#include "shader_recompiler/ir_opt/passes.h"
|
||||
|
|
||||
|
namespace Shader::Optimization { |
||||
|
namespace { |
||||
|
|
||||
|
constexpr s32 F64ToF32Exp = +1023 - 127; |
||||
|
constexpr s32 F32ToF64Exp = +127 - 1023; |
||||
|
|
||||
|
IR::F32 PackedF64ToF32(IR::IREmitter& ir, const IR::Value& packed) { |
||||
|
const IR::U32 lo{ir.CompositeExtract(packed, 0)}; |
||||
|
const IR::U32 hi{ir.CompositeExtract(packed, 1)}; |
||||
|
const IR::U32 sign{ir.BitFieldExtract(hi, ir.Imm32(31), ir.Imm32(1))}; |
||||
|
const IR::U32 exp{ir.BitFieldExtract(hi, ir.Imm32(20), ir.Imm32(11))}; |
||||
|
const IR::U32 mantissa_hi{ir.BitFieldExtract(hi, ir.Imm32(0), ir.Imm32(20))}; |
||||
|
const IR::U32 mantissa_lo{ir.BitFieldExtract(lo, ir.Imm32(29), ir.Imm32(3))}; |
||||
|
const IR::U32 mantissa{ |
||||
|
ir.BitwiseOr(ir.ShiftLeftLogical(mantissa_hi, ir.Imm32(3)), mantissa_lo)}; |
||||
|
const IR::U32 exp_if_subnorm{ |
||||
|
ir.Select(ir.IEqual(exp, ir.Imm32(0)), ir.Imm32(0), ir.IAdd(exp, ir.Imm32(F64ToF32Exp)))}; |
||||
|
const IR::U32 exp_if_infnan{ |
||||
|
ir.Select(ir.IEqual(exp, ir.Imm32(0x7ff)), ir.Imm32(0xff), exp_if_subnorm)}; |
||||
|
const IR::U32 result{ |
||||
|
ir.BitwiseOr(ir.ShiftLeftLogical(sign, ir.Imm32(31)), |
||||
|
ir.BitwiseOr(ir.ShiftLeftLogical(exp_if_infnan, ir.Imm32(23)), mantissa))}; |
||||
|
return ir.BitCast<IR::F32>(result); |
||||
|
} |
||||
|
|
||||
|
IR::Value F32ToPackedF64(IR::IREmitter& ir, const IR::Value& raw) { |
||||
|
const IR::U32 value{ir.BitCast<IR::U32>(IR::F32(raw))}; |
||||
|
const IR::U32 sign{ir.BitFieldExtract(value, ir.Imm32(31), ir.Imm32(1))}; |
||||
|
const IR::U32 exp{ir.BitFieldExtract(value, ir.Imm32(23), ir.Imm32(8))}; |
||||
|
const IR::U32 mantissa{ir.BitFieldExtract(value, ir.Imm32(0), ir.Imm32(23))}; |
||||
|
const IR::U32 mantissa_hi{ir.BitFieldExtract(mantissa, ir.Imm32(3), ir.Imm32(20))}; |
||||
|
const IR::U32 mantissa_lo{ir.BitFieldExtract(mantissa, ir.Imm32(0), ir.Imm32(3))}; |
||||
|
const IR::U32 exp_if_subnorm{ |
||||
|
ir.Select(ir.IEqual(exp, ir.Imm32(0)), ir.Imm32(0), ir.IAdd(exp, ir.Imm32(F32ToF64Exp)))}; |
||||
|
const IR::U32 exp_if_infnan{ |
||||
|
ir.Select(ir.IEqual(exp, ir.Imm32(0xff)), ir.Imm32(0x7ff), exp_if_subnorm)}; |
||||
|
const IR::U32 lo{ir.ShiftLeftLogical(mantissa_lo, ir.Imm32(29))}; |
||||
|
const IR::U32 hi{ |
||||
|
ir.BitwiseOr(ir.ShiftLeftLogical(sign, ir.Imm32(31)), |
||||
|
ir.BitwiseOr(ir.ShiftLeftLogical(exp_if_infnan, ir.Imm32(20)), mantissa_hi))}; |
||||
|
return ir.CompositeConstruct(lo, hi); |
||||
|
} |
||||
|
|
||||
|
IR::Opcode Replace(IR::Opcode op) { |
||||
|
switch (op) { |
||||
|
case IR::Opcode::FPAbs64: |
||||
|
return IR::Opcode::FPAbs32; |
||||
|
case IR::Opcode::FPAdd64: |
||||
|
return IR::Opcode::FPAdd32; |
||||
|
case IR::Opcode::FPCeil64: |
||||
|
return IR::Opcode::FPCeil32; |
||||
|
case IR::Opcode::FPFloor64: |
||||
|
return IR::Opcode::FPFloor32; |
||||
|
case IR::Opcode::FPFma64: |
||||
|
return IR::Opcode::FPFma32; |
||||
|
case IR::Opcode::FPMul64: |
||||
|
return IR::Opcode::FPMul32; |
||||
|
case IR::Opcode::FPNeg64: |
||||
|
return IR::Opcode::FPNeg32; |
||||
|
case IR::Opcode::FPRoundEven64: |
||||
|
return IR::Opcode::FPRoundEven32; |
||||
|
case IR::Opcode::FPSaturate64: |
||||
|
return IR::Opcode::FPSaturate32; |
||||
|
case IR::Opcode::FPClamp64: |
||||
|
return IR::Opcode::FPClamp32; |
||||
|
case IR::Opcode::FPTrunc64: |
||||
|
return IR::Opcode::FPTrunc32; |
||||
|
case IR::Opcode::CompositeConstructF64x2: |
||||
|
return IR::Opcode::CompositeConstructF32x2; |
||||
|
case IR::Opcode::CompositeConstructF64x3: |
||||
|
return IR::Opcode::CompositeConstructF32x3; |
||||
|
case IR::Opcode::CompositeConstructF64x4: |
||||
|
return IR::Opcode::CompositeConstructF32x4; |
||||
|
case IR::Opcode::CompositeExtractF64x2: |
||||
|
return IR::Opcode::CompositeExtractF32x2; |
||||
|
case IR::Opcode::CompositeExtractF64x3: |
||||
|
return IR::Opcode::CompositeExtractF32x3; |
||||
|
case IR::Opcode::CompositeExtractF64x4: |
||||
|
return IR::Opcode::CompositeExtractF32x4; |
||||
|
case IR::Opcode::CompositeInsertF64x2: |
||||
|
return IR::Opcode::CompositeInsertF32x2; |
||||
|
case IR::Opcode::CompositeInsertF64x3: |
||||
|
return IR::Opcode::CompositeInsertF32x3; |
||||
|
case IR::Opcode::CompositeInsertF64x4: |
||||
|
return IR::Opcode::CompositeInsertF32x4; |
||||
|
case IR::Opcode::FPOrdEqual64: |
||||
|
return IR::Opcode::FPOrdEqual32; |
||||
|
case IR::Opcode::FPUnordEqual64: |
||||
|
return IR::Opcode::FPUnordEqual32; |
||||
|
case IR::Opcode::FPOrdNotEqual64: |
||||
|
return IR::Opcode::FPOrdNotEqual32; |
||||
|
case IR::Opcode::FPUnordNotEqual64: |
||||
|
return IR::Opcode::FPUnordNotEqual32; |
||||
|
case IR::Opcode::FPOrdLessThan64: |
||||
|
return IR::Opcode::FPOrdLessThan32; |
||||
|
case IR::Opcode::FPUnordLessThan64: |
||||
|
return IR::Opcode::FPUnordLessThan32; |
||||
|
case IR::Opcode::FPOrdGreaterThan64: |
||||
|
return IR::Opcode::FPOrdGreaterThan32; |
||||
|
case IR::Opcode::FPUnordGreaterThan64: |
||||
|
return IR::Opcode::FPUnordGreaterThan32; |
||||
|
case IR::Opcode::FPOrdLessThanEqual64: |
||||
|
return IR::Opcode::FPOrdLessThanEqual32; |
||||
|
case IR::Opcode::FPUnordLessThanEqual64: |
||||
|
return IR::Opcode::FPUnordLessThanEqual32; |
||||
|
case IR::Opcode::FPOrdGreaterThanEqual64: |
||||
|
return IR::Opcode::FPOrdGreaterThanEqual32; |
||||
|
case IR::Opcode::FPUnordGreaterThanEqual64: |
||||
|
return IR::Opcode::FPUnordGreaterThanEqual32; |
||||
|
case IR::Opcode::FPIsNan64: |
||||
|
return IR::Opcode::FPIsNan32; |
||||
|
case IR::Opcode::ConvertS16F64: |
||||
|
return IR::Opcode::ConvertS16F32; |
||||
|
case IR::Opcode::ConvertS32F64: |
||||
|
return IR::Opcode::ConvertS32F32; |
||||
|
case IR::Opcode::ConvertS64F64: |
||||
|
return IR::Opcode::ConvertS64F32; |
||||
|
case IR::Opcode::ConvertU16F64: |
||||
|
return IR::Opcode::ConvertU16F32; |
||||
|
case IR::Opcode::ConvertU32F64: |
||||
|
return IR::Opcode::ConvertU32F32; |
||||
|
case IR::Opcode::ConvertU64F64: |
||||
|
return IR::Opcode::ConvertU64F32; |
||||
|
case IR::Opcode::ConvertF32F64: |
||||
|
return IR::Opcode::Identity; |
||||
|
case IR::Opcode::ConvertF64F32: |
||||
|
return IR::Opcode::Identity; |
||||
|
case IR::Opcode::ConvertF64S8: |
||||
|
return IR::Opcode::ConvertF32S8; |
||||
|
case IR::Opcode::ConvertF64S16: |
||||
|
return IR::Opcode::ConvertF32S16; |
||||
|
case IR::Opcode::ConvertF64S32: |
||||
|
return IR::Opcode::ConvertF32S32; |
||||
|
case IR::Opcode::ConvertF64S64: |
||||
|
return IR::Opcode::ConvertF32S64; |
||||
|
case IR::Opcode::ConvertF64U8: |
||||
|
return IR::Opcode::ConvertF32U8; |
||||
|
case IR::Opcode::ConvertF64U16: |
||||
|
return IR::Opcode::ConvertF32U16; |
||||
|
case IR::Opcode::ConvertF64U32: |
||||
|
return IR::Opcode::ConvertF32U32; |
||||
|
case IR::Opcode::ConvertF64U64: |
||||
|
return IR::Opcode::ConvertF32U64; |
||||
|
default: |
||||
|
return op; |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
void Lower(IR::Block& block, IR::Inst& inst) { |
||||
|
switch (inst.GetOpcode()) { |
||||
|
case IR::Opcode::PackDouble2x32: { |
||||
|
IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst)); |
||||
|
inst.ReplaceUsesWith(PackedF64ToF32(ir, inst.Arg(0))); |
||||
|
break; |
||||
|
} |
||||
|
case IR::Opcode::UnpackDouble2x32: { |
||||
|
IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst)); |
||||
|
inst.ReplaceUsesWith(F32ToPackedF64(ir, inst.Arg(0))); |
||||
|
break; |
||||
|
} |
||||
|
default: |
||||
|
inst.ReplaceOpcode(Replace(inst.GetOpcode())); |
||||
|
break; |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
} // Anonymous namespace
|
||||
|
|
||||
|
void LowerFp64ToFp32(IR::Program& program) { |
||||
|
for (IR::Block* const block : program.blocks) { |
||||
|
for (IR::Inst& inst : block->Instructions()) { |
||||
|
Lower(*block, inst); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
} // namespace Shader::Optimization
|
||||
Write
Preview
Loading…
Cancel
Save
Reference in new issue