Browse Source
[dynarmic] merge IR opt pass into single TU (#2561)
[dynarmic] merge IR opt pass into single TU (#2561)
Signed-off-by: lizzie <lizzie@eden-emu.dev> Reviewed-on: https://git.eden-emu.dev/eden-emu/eden/pulls/2561 Reviewed-by: MaranBr <maranbr@eden-emu.dev> Reviewed-by: CamilleLaVey <camillelavey99@gmail.com> Reviewed-by: crueter <crueter@eden-emu.dev> Co-authored-by: lizzie <lizzie@eden-emu.dev> Co-committed-by: lizzie <lizzie@eden-emu.dev>pull/480/head
committed by
crueter
No known key found for this signature in database
GPG Key ID: 425ACD2D4830EBC6
24 changed files with 1581 additions and 1944 deletions
-
15src/dynarmic/src/dynarmic/CMakeLists.txt
-
21src/dynarmic/src/dynarmic/backend/arm64/a32_address_space.cpp
-
22src/dynarmic/src/dynarmic/backend/arm64/a64_address_space.cpp
-
2src/dynarmic/src/dynarmic/backend/riscv64/a32_address_space.cpp
-
16src/dynarmic/src/dynarmic/backend/x64/a32_interface.cpp
-
18src/dynarmic/src/dynarmic/backend/x64/a64_interface.cpp
-
70src/dynarmic/src/dynarmic/ir/opt/a32_constant_memory_reads_pass.cpp
-
382src/dynarmic/src/dynarmic/ir/opt/a32_get_set_elimination_pass.cpp
-
57src/dynarmic/src/dynarmic/ir/opt/a64_callback_config_pass.cpp
-
165src/dynarmic/src/dynarmic/ir/opt/a64_get_set_elimination_pass.cpp
-
57src/dynarmic/src/dynarmic/ir/opt/a64_merge_interpret_blocks.cpp
-
559src/dynarmic/src/dynarmic/ir/opt/constant_propagation_pass.cpp
-
23src/dynarmic/src/dynarmic/ir/opt/dead_code_elimination_pass.cpp
-
44src/dynarmic/src/dynarmic/ir/opt/identity_removal_pass.cpp
-
127src/dynarmic/src/dynarmic/ir/opt/ir_matcher.h
-
18src/dynarmic/src/dynarmic/ir/opt/naming_pass.cpp
-
47src/dynarmic/src/dynarmic/ir/opt/passes.h
-
218src/dynarmic/src/dynarmic/ir/opt/polyfill_pass.cpp
-
51src/dynarmic/src/dynarmic/ir/opt/verification_pass.cpp
-
1519src/dynarmic/src/dynarmic/ir/opt_passes.cpp
-
37src/dynarmic/src/dynarmic/ir/opt_passes.h
-
10src/dynarmic/tests/A32/fuzz_thumb.cpp
-
12src/dynarmic/tests/A64/fuzz_with_unicorn.cpp
-
35src/dynarmic/tests/print_info.cpp
@ -1,70 +0,0 @@ |
|||
/* This file is part of the dynarmic project.
|
|||
* Copyright (c) 2016 MerryMage |
|||
* SPDX-License-Identifier: 0BSD |
|||
*/ |
|||
|
|||
#include "dynarmic/interface/A32/config.h"
|
|||
#include "dynarmic/ir/basic_block.h"
|
|||
#include "dynarmic/ir/opcodes.h"
|
|||
#include "dynarmic/ir/opt/passes.h"
|
|||
|
|||
namespace Dynarmic::Optimization { |
|||
|
|||
void A32ConstantMemoryReads(IR::Block& block, A32::UserCallbacks* cb) { |
|||
for (auto& inst : block) { |
|||
switch (inst.GetOpcode()) { |
|||
case IR::Opcode::A32ReadMemory8: { |
|||
if (!inst.AreAllArgsImmediates()) { |
|||
break; |
|||
} |
|||
|
|||
const u32 vaddr = inst.GetArg(1).GetU32(); |
|||
if (cb->IsReadOnlyMemory(vaddr)) { |
|||
const u8 value_from_memory = cb->MemoryRead8(vaddr); |
|||
inst.ReplaceUsesWith(IR::Value{value_from_memory}); |
|||
} |
|||
break; |
|||
} |
|||
case IR::Opcode::A32ReadMemory16: { |
|||
if (!inst.AreAllArgsImmediates()) { |
|||
break; |
|||
} |
|||
|
|||
const u32 vaddr = inst.GetArg(1).GetU32(); |
|||
if (cb->IsReadOnlyMemory(vaddr)) { |
|||
const u16 value_from_memory = cb->MemoryRead16(vaddr); |
|||
inst.ReplaceUsesWith(IR::Value{value_from_memory}); |
|||
} |
|||
break; |
|||
} |
|||
case IR::Opcode::A32ReadMemory32: { |
|||
if (!inst.AreAllArgsImmediates()) { |
|||
break; |
|||
} |
|||
|
|||
const u32 vaddr = inst.GetArg(1).GetU32(); |
|||
if (cb->IsReadOnlyMemory(vaddr)) { |
|||
const u32 value_from_memory = cb->MemoryRead32(vaddr); |
|||
inst.ReplaceUsesWith(IR::Value{value_from_memory}); |
|||
} |
|||
break; |
|||
} |
|||
case IR::Opcode::A32ReadMemory64: { |
|||
if (!inst.AreAllArgsImmediates()) { |
|||
break; |
|||
} |
|||
|
|||
const u32 vaddr = inst.GetArg(1).GetU32(); |
|||
if (cb->IsReadOnlyMemory(vaddr)) { |
|||
const u64 value_from_memory = cb->MemoryRead64(vaddr); |
|||
inst.ReplaceUsesWith(IR::Value{value_from_memory}); |
|||
} |
|||
break; |
|||
} |
|||
default: |
|||
break; |
|||
} |
|||
} |
|||
} |
|||
|
|||
} // namespace Dynarmic::Optimization
|
|||
@ -1,382 +0,0 @@ |
|||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
|||
// SPDX-License-Identifier: GPL-3.0-or-later
|
|||
|
|||
/* This file is part of the dynarmic project.
|
|||
* Copyright (c) 2016 MerryMage |
|||
* SPDX-License-Identifier: 0BSD |
|||
*/ |
|||
|
|||
#include <algorithm>
|
|||
#include <array>
|
|||
#include <functional>
|
|||
|
|||
#include "dynarmic/common/assert.h"
|
|||
#include "dynarmic/common/common_types.h"
|
|||
|
|||
#include "dynarmic/frontend/A32/a32_ir_emitter.h"
|
|||
#include "dynarmic/frontend/A32/a32_types.h"
|
|||
#include "dynarmic/ir/basic_block.h"
|
|||
#include "dynarmic/ir/opcodes.h"
|
|||
#include "dynarmic/ir/opt/passes.h"
|
|||
#include "dynarmic/ir/value.h"
|
|||
|
|||
namespace Dynarmic::Optimization { |
|||
|
|||
namespace { |
|||
|
|||
void FlagsPass(IR::Block& block) { |
|||
using Iterator = std::reverse_iterator<IR::Block::iterator>; |
|||
|
|||
struct FlagInfo { |
|||
bool set_not_required = false; |
|||
bool has_value_request = false; |
|||
Iterator value_request = {}; |
|||
}; |
|||
struct ValuelessFlagInfo { |
|||
bool set_not_required = false; |
|||
}; |
|||
ValuelessFlagInfo nzcvq; |
|||
ValuelessFlagInfo nzcv; |
|||
ValuelessFlagInfo nz; |
|||
FlagInfo c_flag; |
|||
FlagInfo ge; |
|||
|
|||
auto do_set = [&](FlagInfo& info, IR::Value value, Iterator inst) { |
|||
if (info.has_value_request) { |
|||
info.value_request->ReplaceUsesWith(value); |
|||
} |
|||
info.has_value_request = false; |
|||
|
|||
if (info.set_not_required) { |
|||
inst->Invalidate(); |
|||
} |
|||
info.set_not_required = true; |
|||
}; |
|||
|
|||
auto do_set_valueless = [&](ValuelessFlagInfo& info, Iterator inst) { |
|||
if (info.set_not_required) { |
|||
inst->Invalidate(); |
|||
} |
|||
info.set_not_required = true; |
|||
}; |
|||
|
|||
auto do_get = [](FlagInfo& info, Iterator inst) { |
|||
if (info.has_value_request) { |
|||
info.value_request->ReplaceUsesWith(IR::Value{&*inst}); |
|||
} |
|||
info.has_value_request = true; |
|||
info.value_request = inst; |
|||
}; |
|||
|
|||
A32::IREmitter ir{block, A32::LocationDescriptor{block.Location()}, {}}; |
|||
|
|||
for (auto inst = block.rbegin(); inst != block.rend(); ++inst) { |
|||
auto const opcode = inst->GetOpcode(); |
|||
switch (opcode) { |
|||
case IR::Opcode::A32GetCFlag: { |
|||
do_get(c_flag, inst); |
|||
break; |
|||
} |
|||
case IR::Opcode::A32SetCpsrNZCV: { |
|||
if (c_flag.has_value_request) { |
|||
ir.SetInsertionPointBefore(inst.base()); // base is one ahead
|
|||
IR::U1 c = ir.GetCFlagFromNZCV(IR::NZCV{inst->GetArg(0)}); |
|||
c_flag.value_request->ReplaceUsesWith(c); |
|||
c_flag.has_value_request = false; |
|||
break; // This case will be executed again because of the above
|
|||
} |
|||
|
|||
do_set_valueless(nzcv, inst); |
|||
|
|||
nz = {.set_not_required = true}; |
|||
c_flag = {.set_not_required = true}; |
|||
break; |
|||
} |
|||
case IR::Opcode::A32SetCpsrNZCVRaw: { |
|||
if (c_flag.has_value_request) { |
|||
nzcv.set_not_required = false; |
|||
} |
|||
|
|||
do_set_valueless(nzcv, inst); |
|||
|
|||
nzcvq = {}; |
|||
nz = {.set_not_required = true}; |
|||
c_flag = {.set_not_required = true}; |
|||
break; |
|||
} |
|||
case IR::Opcode::A32SetCpsrNZCVQ: { |
|||
if (c_flag.has_value_request) { |
|||
nzcvq.set_not_required = false; |
|||
} |
|||
|
|||
do_set_valueless(nzcvq, inst); |
|||
|
|||
nzcv = {.set_not_required = true}; |
|||
nz = {.set_not_required = true}; |
|||
c_flag = {.set_not_required = true}; |
|||
break; |
|||
} |
|||
case IR::Opcode::A32SetCpsrNZ: { |
|||
do_set_valueless(nz, inst); |
|||
|
|||
nzcvq = {}; |
|||
nzcv = {}; |
|||
break; |
|||
} |
|||
case IR::Opcode::A32SetCpsrNZC: { |
|||
if (c_flag.has_value_request) { |
|||
c_flag.value_request->ReplaceUsesWith(inst->GetArg(1)); |
|||
c_flag.has_value_request = false; |
|||
} |
|||
|
|||
if (!inst->GetArg(1).IsImmediate() && inst->GetArg(1).GetInstRecursive()->GetOpcode() == IR::Opcode::A32GetCFlag) { |
|||
const auto nz_value = inst->GetArg(0); |
|||
|
|||
inst->Invalidate(); |
|||
|
|||
ir.SetInsertionPointBefore(inst.base()); |
|||
ir.SetCpsrNZ(IR::NZCV{nz_value}); |
|||
|
|||
nzcvq = {}; |
|||
nzcv = {}; |
|||
nz = {.set_not_required = true}; |
|||
break; |
|||
} |
|||
|
|||
if (nz.set_not_required && c_flag.set_not_required) { |
|||
inst->Invalidate(); |
|||
} else if (nz.set_not_required) { |
|||
inst->SetArg(0, IR::Value::EmptyNZCVImmediateMarker()); |
|||
} |
|||
nz.set_not_required = true; |
|||
c_flag.set_not_required = true; |
|||
|
|||
nzcv = {}; |
|||
nzcvq = {}; |
|||
break; |
|||
} |
|||
case IR::Opcode::A32SetGEFlags: { |
|||
do_set(ge, inst->GetArg(0), inst); |
|||
break; |
|||
} |
|||
case IR::Opcode::A32GetGEFlags: { |
|||
do_get(ge, inst); |
|||
break; |
|||
} |
|||
case IR::Opcode::A32SetGEFlagsCompressed: { |
|||
ge = {.set_not_required = true}; |
|||
break; |
|||
} |
|||
case IR::Opcode::A32OrQFlag: { |
|||
break; |
|||
} |
|||
default: { |
|||
if (ReadsFromCPSR(opcode) || WritesToCPSR(opcode)) { |
|||
nzcvq = {}; |
|||
nzcv = {}; |
|||
nz = {}; |
|||
c_flag = {}; |
|||
ge = {}; |
|||
} |
|||
break; |
|||
} |
|||
} |
|||
} |
|||
} |
|||
|
|||
void RegisterPass(IR::Block& block) { |
|||
using Iterator = IR::Block::iterator; |
|||
|
|||
struct RegInfo { |
|||
IR::Value register_value; |
|||
std::optional<Iterator> last_set_instruction; |
|||
}; |
|||
std::array<RegInfo, 15> reg_info; |
|||
|
|||
const auto do_get = [](RegInfo& info, Iterator get_inst) { |
|||
if (info.register_value.IsEmpty()) { |
|||
info.register_value = IR::Value(&*get_inst); |
|||
return; |
|||
} |
|||
get_inst->ReplaceUsesWith(info.register_value); |
|||
}; |
|||
|
|||
const auto do_set = [](RegInfo& info, IR::Value value, Iterator set_inst) { |
|||
if (info.last_set_instruction) { |
|||
(*info.last_set_instruction)->Invalidate(); |
|||
} |
|||
info = { |
|||
.register_value = value, |
|||
.last_set_instruction = set_inst, |
|||
}; |
|||
}; |
|||
|
|||
enum class ExtValueType { |
|||
Empty, |
|||
Single, |
|||
Double, |
|||
VectorDouble, |
|||
VectorQuad, |
|||
}; |
|||
struct ExtRegInfo { |
|||
ExtValueType value_type = {}; |
|||
IR::Value register_value; |
|||
std::optional<Iterator> last_set_instruction; |
|||
}; |
|||
std::array<ExtRegInfo, 64> ext_reg_info; |
|||
|
|||
const auto do_ext_get = [](ExtValueType type, std::initializer_list<std::reference_wrapper<ExtRegInfo>> infos, Iterator get_inst) { |
|||
if (!std::all_of(infos.begin(), infos.end(), [type](const auto& info) { return info.get().value_type == type; })) { |
|||
for (auto& info : infos) { |
|||
info.get() = { |
|||
.value_type = type, |
|||
.register_value = IR::Value(&*get_inst), |
|||
.last_set_instruction = std::nullopt, |
|||
}; |
|||
} |
|||
return; |
|||
} |
|||
get_inst->ReplaceUsesWith(std::data(infos)[0].get().register_value); |
|||
}; |
|||
|
|||
const auto do_ext_set = [](ExtValueType type, std::initializer_list<std::reference_wrapper<ExtRegInfo>> infos, IR::Value value, Iterator set_inst) { |
|||
if (std::all_of(infos.begin(), infos.end(), [type](const auto& info) { return info.get().value_type == type; })) { |
|||
if (std::data(infos)[0].get().last_set_instruction) { |
|||
(*std::data(infos)[0].get().last_set_instruction)->Invalidate(); |
|||
} |
|||
} |
|||
for (auto& info : infos) { |
|||
info.get() = { |
|||
.value_type = type, |
|||
.register_value = value, |
|||
.last_set_instruction = set_inst, |
|||
}; |
|||
} |
|||
}; |
|||
|
|||
// Location and version don't matter here.
|
|||
A32::IREmitter ir{block, A32::LocationDescriptor{block.Location()}, {}}; |
|||
|
|||
for (auto inst = block.begin(); inst != block.end(); ++inst) { |
|||
auto const opcode = inst->GetOpcode(); |
|||
switch (opcode) { |
|||
case IR::Opcode::A32GetRegister: { |
|||
const A32::Reg reg = inst->GetArg(0).GetA32RegRef(); |
|||
ASSERT(reg != A32::Reg::PC); |
|||
const size_t reg_index = static_cast<size_t>(reg); |
|||
do_get(reg_info[reg_index], inst); |
|||
break; |
|||
} |
|||
case IR::Opcode::A32SetRegister: { |
|||
const A32::Reg reg = inst->GetArg(0).GetA32RegRef(); |
|||
if (reg == A32::Reg::PC) { |
|||
break; |
|||
} |
|||
const auto reg_index = static_cast<size_t>(reg); |
|||
do_set(reg_info[reg_index], inst->GetArg(1), inst); |
|||
break; |
|||
} |
|||
case IR::Opcode::A32GetExtendedRegister32: { |
|||
const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); |
|||
const size_t reg_index = A32::RegNumber(reg); |
|||
do_ext_get(ExtValueType::Single, {ext_reg_info[reg_index]}, inst); |
|||
break; |
|||
} |
|||
case IR::Opcode::A32SetExtendedRegister32: { |
|||
const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); |
|||
const size_t reg_index = A32::RegNumber(reg); |
|||
do_ext_set(ExtValueType::Single, {ext_reg_info[reg_index]}, inst->GetArg(1), inst); |
|||
break; |
|||
} |
|||
case IR::Opcode::A32GetExtendedRegister64: { |
|||
const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); |
|||
const size_t reg_index = A32::RegNumber(reg); |
|||
do_ext_get(ExtValueType::Double, |
|||
{ |
|||
ext_reg_info[reg_index * 2 + 0], |
|||
ext_reg_info[reg_index * 2 + 1], |
|||
}, |
|||
inst); |
|||
break; |
|||
} |
|||
case IR::Opcode::A32SetExtendedRegister64: { |
|||
const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); |
|||
const size_t reg_index = A32::RegNumber(reg); |
|||
do_ext_set(ExtValueType::Double, |
|||
{ |
|||
ext_reg_info[reg_index * 2 + 0], |
|||
ext_reg_info[reg_index * 2 + 1], |
|||
}, |
|||
inst->GetArg(1), |
|||
inst); |
|||
break; |
|||
} |
|||
case IR::Opcode::A32GetVector: { |
|||
const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); |
|||
const size_t reg_index = A32::RegNumber(reg); |
|||
if (A32::IsDoubleExtReg(reg)) { |
|||
do_ext_get(ExtValueType::VectorDouble, |
|||
{ |
|||
ext_reg_info[reg_index * 2 + 0], |
|||
ext_reg_info[reg_index * 2 + 1], |
|||
}, |
|||
inst); |
|||
} else { |
|||
DEBUG_ASSERT(A32::IsQuadExtReg(reg)); |
|||
do_ext_get(ExtValueType::VectorQuad, |
|||
{ |
|||
ext_reg_info[reg_index * 4 + 0], |
|||
ext_reg_info[reg_index * 4 + 1], |
|||
ext_reg_info[reg_index * 4 + 2], |
|||
ext_reg_info[reg_index * 4 + 3], |
|||
}, |
|||
inst); |
|||
} |
|||
break; |
|||
} |
|||
case IR::Opcode::A32SetVector: { |
|||
const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); |
|||
const size_t reg_index = A32::RegNumber(reg); |
|||
if (A32::IsDoubleExtReg(reg)) { |
|||
ir.SetInsertionPointAfter(inst); |
|||
const IR::U128 stored_value = ir.VectorZeroUpper(IR::U128{inst->GetArg(1)}); |
|||
do_ext_set(ExtValueType::VectorDouble, |
|||
{ |
|||
ext_reg_info[reg_index * 2 + 0], |
|||
ext_reg_info[reg_index * 2 + 1], |
|||
}, |
|||
stored_value, |
|||
inst); |
|||
} else { |
|||
DEBUG_ASSERT(A32::IsQuadExtReg(reg)); |
|||
do_ext_set(ExtValueType::VectorQuad, |
|||
{ |
|||
ext_reg_info[reg_index * 4 + 0], |
|||
ext_reg_info[reg_index * 4 + 1], |
|||
ext_reg_info[reg_index * 4 + 2], |
|||
ext_reg_info[reg_index * 4 + 3], |
|||
}, |
|||
inst->GetArg(1), |
|||
inst); |
|||
} |
|||
break; |
|||
} |
|||
default: { |
|||
if (ReadsFromCoreRegister(opcode) || WritesToCoreRegister(opcode)) { |
|||
reg_info = {}; |
|||
ext_reg_info = {}; |
|||
} |
|||
break; |
|||
} |
|||
} |
|||
} |
|||
} |
|||
|
|||
} // namespace
|
|||
|
|||
void A32GetSetElimination(IR::Block& block, A32GetSetEliminationOptions) { |
|||
FlagsPass(block); |
|||
RegisterPass(block); |
|||
} |
|||
|
|||
} // namespace Dynarmic::Optimization
|
|||
@ -1,57 +0,0 @@ |
|||
/* This file is part of the dynarmic project.
|
|||
* Copyright (c) 2018 MerryMage |
|||
* SPDX-License-Identifier: 0BSD |
|||
*/ |
|||
|
|||
#include "dynarmic/frontend/A64/a64_ir_emitter.h"
|
|||
#include "dynarmic/interface/A64/config.h"
|
|||
#include "dynarmic/ir/basic_block.h"
|
|||
#include "dynarmic/ir/microinstruction.h"
|
|||
#include "dynarmic/ir/opcodes.h"
|
|||
#include "dynarmic/ir/opt/passes.h"
|
|||
|
|||
namespace Dynarmic::Optimization { |
|||
|
|||
void A64CallbackConfigPass(IR::Block& block, const A64::UserConfig& conf) { |
|||
if (conf.hook_data_cache_operations) { |
|||
return; |
|||
} |
|||
|
|||
for (auto& inst : block) { |
|||
if (inst.GetOpcode() != IR::Opcode::A64DataCacheOperationRaised) { |
|||
continue; |
|||
} |
|||
|
|||
const auto op = static_cast<A64::DataCacheOperation>(inst.GetArg(1).GetU64()); |
|||
if (op == A64::DataCacheOperation::ZeroByVA) { |
|||
A64::IREmitter ir{block}; |
|||
ir.current_location = A64::LocationDescriptor{IR::LocationDescriptor{inst.GetArg(0).GetU64()}}; |
|||
ir.SetInsertionPointBefore(&inst); |
|||
|
|||
size_t bytes = 4 << static_cast<size_t>(conf.dczid_el0 & 0b1111); |
|||
IR::U64 addr{inst.GetArg(2)}; |
|||
|
|||
const IR::U128 zero_u128 = ir.ZeroExtendToQuad(ir.Imm64(0)); |
|||
while (bytes >= 16) { |
|||
ir.WriteMemory128(addr, zero_u128, IR::AccType::DCZVA); |
|||
addr = ir.Add(addr, ir.Imm64(16)); |
|||
bytes -= 16; |
|||
} |
|||
|
|||
while (bytes >= 8) { |
|||
ir.WriteMemory64(addr, ir.Imm64(0), IR::AccType::DCZVA); |
|||
addr = ir.Add(addr, ir.Imm64(8)); |
|||
bytes -= 8; |
|||
} |
|||
|
|||
while (bytes >= 4) { |
|||
ir.WriteMemory32(addr, ir.Imm32(0), IR::AccType::DCZVA); |
|||
addr = ir.Add(addr, ir.Imm64(4)); |
|||
bytes -= 4; |
|||
} |
|||
} |
|||
inst.Invalidate(); |
|||
} |
|||
} |
|||
|
|||
} // namespace Dynarmic::Optimization
|
|||
@ -1,165 +0,0 @@ |
|||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
|||
// SPDX-License-Identifier: GPL-3.0-or-later
|
|||
|
|||
/* This file is part of the dynarmic project.
|
|||
* Copyright (c) 2016 MerryMage |
|||
* SPDX-License-Identifier: 0BSD |
|||
*/ |
|||
|
|||
#include <array>
|
|||
|
|||
#include "dynarmic/common/common_types.h"
|
|||
|
|||
#include "dynarmic/frontend/A64/a64_types.h"
|
|||
#include "dynarmic/ir/basic_block.h"
|
|||
#include "dynarmic/ir/opcodes.h"
|
|||
#include "dynarmic/ir/opt/passes.h"
|
|||
#include "dynarmic/ir/value.h"
|
|||
|
|||
namespace Dynarmic::Optimization { |
|||
|
|||
void A64GetSetElimination(IR::Block& block) { |
|||
using Iterator = IR::Block::iterator; |
|||
|
|||
enum class TrackingType { |
|||
W, |
|||
X, |
|||
S, |
|||
D, |
|||
Q, |
|||
SP, |
|||
NZCV, |
|||
NZCVRaw, |
|||
}; |
|||
struct RegisterInfo { |
|||
IR::Value register_value; |
|||
TrackingType tracking_type; |
|||
bool set_instruction_present = false; |
|||
Iterator last_set_instruction; |
|||
}; |
|||
std::array<RegisterInfo, 31> reg_info; |
|||
std::array<RegisterInfo, 32> vec_info; |
|||
RegisterInfo sp_info; |
|||
RegisterInfo nzcv_info; |
|||
|
|||
const auto do_set = [&block](RegisterInfo& info, IR::Value value, Iterator set_inst, TrackingType tracking_type) { |
|||
if (info.set_instruction_present) { |
|||
info.last_set_instruction->Invalidate(); |
|||
block.Instructions().erase(info.last_set_instruction); |
|||
} |
|||
|
|||
info.register_value = value; |
|||
info.tracking_type = tracking_type; |
|||
info.set_instruction_present = true; |
|||
info.last_set_instruction = set_inst; |
|||
}; |
|||
|
|||
const auto do_get = [](RegisterInfo& info, Iterator get_inst, TrackingType tracking_type) { |
|||
const auto do_nothing = [&] { |
|||
info = {}; |
|||
info.register_value = IR::Value(&*get_inst); |
|||
info.tracking_type = tracking_type; |
|||
}; |
|||
|
|||
if (info.register_value.IsEmpty()) { |
|||
do_nothing(); |
|||
return; |
|||
} |
|||
|
|||
if (info.tracking_type == tracking_type) { |
|||
get_inst->ReplaceUsesWith(info.register_value); |
|||
return; |
|||
} |
|||
|
|||
do_nothing(); |
|||
}; |
|||
|
|||
for (auto inst = block.begin(); inst != block.end(); ++inst) { |
|||
auto const opcode = inst->GetOpcode(); |
|||
switch (opcode) { |
|||
case IR::Opcode::A64GetW: { |
|||
const size_t index = A64::RegNumber(inst->GetArg(0).GetA64RegRef()); |
|||
do_get(reg_info.at(index), inst, TrackingType::W); |
|||
break; |
|||
} |
|||
case IR::Opcode::A64GetX: { |
|||
const size_t index = A64::RegNumber(inst->GetArg(0).GetA64RegRef()); |
|||
do_get(reg_info.at(index), inst, TrackingType::X); |
|||
break; |
|||
} |
|||
case IR::Opcode::A64GetS: { |
|||
const size_t index = A64::VecNumber(inst->GetArg(0).GetA64VecRef()); |
|||
do_get(vec_info.at(index), inst, TrackingType::S); |
|||
break; |
|||
} |
|||
case IR::Opcode::A64GetD: { |
|||
const size_t index = A64::VecNumber(inst->GetArg(0).GetA64VecRef()); |
|||
do_get(vec_info.at(index), inst, TrackingType::D); |
|||
break; |
|||
} |
|||
case IR::Opcode::A64GetQ: { |
|||
const size_t index = A64::VecNumber(inst->GetArg(0).GetA64VecRef()); |
|||
do_get(vec_info.at(index), inst, TrackingType::Q); |
|||
break; |
|||
} |
|||
case IR::Opcode::A64GetSP: { |
|||
do_get(sp_info, inst, TrackingType::SP); |
|||
break; |
|||
} |
|||
case IR::Opcode::A64GetNZCVRaw: { |
|||
do_get(nzcv_info, inst, TrackingType::NZCVRaw); |
|||
break; |
|||
} |
|||
case IR::Opcode::A64SetW: { |
|||
const size_t index = A64::RegNumber(inst->GetArg(0).GetA64RegRef()); |
|||
do_set(reg_info.at(index), inst->GetArg(1), inst, TrackingType::W); |
|||
break; |
|||
} |
|||
case IR::Opcode::A64SetX: { |
|||
const size_t index = A64::RegNumber(inst->GetArg(0).GetA64RegRef()); |
|||
do_set(reg_info.at(index), inst->GetArg(1), inst, TrackingType::X); |
|||
break; |
|||
} |
|||
case IR::Opcode::A64SetS: { |
|||
const size_t index = A64::VecNumber(inst->GetArg(0).GetA64VecRef()); |
|||
do_set(vec_info.at(index), inst->GetArg(1), inst, TrackingType::S); |
|||
break; |
|||
} |
|||
case IR::Opcode::A64SetD: { |
|||
const size_t index = A64::VecNumber(inst->GetArg(0).GetA64VecRef()); |
|||
do_set(vec_info.at(index), inst->GetArg(1), inst, TrackingType::D); |
|||
break; |
|||
} |
|||
case IR::Opcode::A64SetQ: { |
|||
const size_t index = A64::VecNumber(inst->GetArg(0).GetA64VecRef()); |
|||
do_set(vec_info.at(index), inst->GetArg(1), inst, TrackingType::Q); |
|||
break; |
|||
} |
|||
case IR::Opcode::A64SetSP: { |
|||
do_set(sp_info, inst->GetArg(0), inst, TrackingType::SP); |
|||
break; |
|||
} |
|||
case IR::Opcode::A64SetNZCV: { |
|||
do_set(nzcv_info, inst->GetArg(0), inst, TrackingType::NZCV); |
|||
break; |
|||
} |
|||
case IR::Opcode::A64SetNZCVRaw: { |
|||
do_set(nzcv_info, inst->GetArg(0), inst, TrackingType::NZCVRaw); |
|||
break; |
|||
} |
|||
default: { |
|||
if (ReadsFromCPSR(opcode) || WritesToCPSR(opcode)) { |
|||
nzcv_info = {}; |
|||
} |
|||
if (ReadsFromCoreRegister(opcode) || WritesToCoreRegister(opcode)) { |
|||
reg_info = {}; |
|||
vec_info = {}; |
|||
sp_info = {}; |
|||
} |
|||
break; |
|||
} |
|||
} |
|||
} |
|||
} |
|||
|
|||
} // namespace Dynarmic::Optimization
|
|||
@ -1,57 +0,0 @@ |
|||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
|||
// SPDX-License-Identifier: GPL-3.0-or-later
|
|||
|
|||
/* This file is part of the dynarmic project.
|
|||
* Copyright (c) 2018 MerryMage |
|||
* SPDX-License-Identifier: 0BSD |
|||
*/ |
|||
|
|||
#include <boost/variant/get.hpp>
|
|||
#include "dynarmic/common/common_types.h"
|
|||
|
|||
#include "dynarmic/frontend/A64/a64_location_descriptor.h"
|
|||
#include "dynarmic/frontend/A64/translate/a64_translate.h"
|
|||
#include "dynarmic/interface/A64/config.h"
|
|||
#include "dynarmic/ir/basic_block.h"
|
|||
#include "dynarmic/ir/opt/passes.h"
|
|||
|
|||
namespace Dynarmic::Optimization { |
|||
|
|||
void A64MergeInterpretBlocksPass(IR::Block& block, A64::UserCallbacks* cb) { |
|||
const auto is_interpret_instruction = [cb](A64::LocationDescriptor location) { |
|||
const auto instruction = cb->MemoryReadCode(location.PC()); |
|||
if (!instruction) |
|||
return false; |
|||
|
|||
IR::Block new_block{location}; |
|||
A64::TranslateSingleInstruction(new_block, location, *instruction); |
|||
|
|||
if (!new_block.Instructions().empty()) |
|||
return false; |
|||
|
|||
const IR::Terminal terminal = new_block.GetTerminal(); |
|||
if (auto term = boost::get<IR::Term::Interpret>(&terminal)) { |
|||
return term->next == location; |
|||
} |
|||
|
|||
return false; |
|||
}; |
|||
|
|||
IR::Terminal terminal = block.GetTerminal(); |
|||
auto term = boost::get<IR::Term::Interpret>(&terminal); |
|||
if (!term) |
|||
return; |
|||
|
|||
A64::LocationDescriptor location{term->next}; |
|||
size_t num_instructions = 1; |
|||
|
|||
while (is_interpret_instruction(location.AdvancePC(static_cast<int>(num_instructions * 4)))) { |
|||
num_instructions++; |
|||
} |
|||
|
|||
term->num_instructions = num_instructions; |
|||
block.ReplaceTerminal(terminal); |
|||
block.CycleCount() += num_instructions - 1; |
|||
} |
|||
|
|||
} // namespace Dynarmic::Optimization
|
|||
@ -1,559 +0,0 @@ |
|||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
|||
// SPDX-License-Identifier: GPL-3.0-or-later
|
|||
|
|||
/* This file is part of the dynarmic project.
|
|||
* Copyright (c) 2016 MerryMage |
|||
* SPDX-License-Identifier: 0BSD |
|||
*/ |
|||
|
|||
#include <optional>
|
|||
|
|||
#include "dynarmic/common/assert.h"
|
|||
#include <mcl/bit/rotate.hpp>
|
|||
#include <mcl/bit/swap.hpp>
|
|||
#include "dynarmic/common/common_types.h"
|
|||
|
|||
#include "dynarmic/common/safe_ops.h"
|
|||
#include "dynarmic/ir/basic_block.h"
|
|||
#include "dynarmic/ir/ir_emitter.h"
|
|||
#include "dynarmic/ir/opcodes.h"
|
|||
#include "dynarmic/ir/opt/passes.h"
|
|||
|
|||
namespace Dynarmic::Optimization { |
|||
|
|||
using Op = Dynarmic::IR::Opcode; |
|||
|
|||
namespace { |
|||
|
|||
// Tiny helper to avoid the need to store based off the opcode
|
|||
// bit size all over the place within folding functions.
|
|||
void ReplaceUsesWith(IR::Inst& inst, bool is_32_bit, u64 value) { |
|||
if (is_32_bit) { |
|||
inst.ReplaceUsesWith(IR::Value{static_cast<u32>(value)}); |
|||
} else { |
|||
inst.ReplaceUsesWith(IR::Value{value}); |
|||
} |
|||
} |
|||
|
|||
IR::Value Value(bool is_32_bit, u64 value) { |
|||
return is_32_bit ? IR::Value{static_cast<u32>(value)} : IR::Value{value}; |
|||
} |
|||
|
|||
template<typename ImmFn> |
|||
bool FoldCommutative(IR::Inst& inst, bool is_32_bit, ImmFn imm_fn) { |
|||
const auto lhs = inst.GetArg(0); |
|||
const auto rhs = inst.GetArg(1); |
|||
|
|||
const bool is_lhs_immediate = lhs.IsImmediate(); |
|||
const bool is_rhs_immediate = rhs.IsImmediate(); |
|||
|
|||
if (is_lhs_immediate && is_rhs_immediate) { |
|||
const u64 result = imm_fn(lhs.GetImmediateAsU64(), rhs.GetImmediateAsU64()); |
|||
ReplaceUsesWith(inst, is_32_bit, result); |
|||
return false; |
|||
} |
|||
|
|||
if (is_lhs_immediate && !is_rhs_immediate) { |
|||
const IR::Inst* rhs_inst = rhs.GetInstRecursive(); |
|||
if (rhs_inst->GetOpcode() == inst.GetOpcode() && rhs_inst->GetArg(1).IsImmediate()) { |
|||
const u64 combined = imm_fn(lhs.GetImmediateAsU64(), rhs_inst->GetArg(1).GetImmediateAsU64()); |
|||
inst.SetArg(0, rhs_inst->GetArg(0)); |
|||
inst.SetArg(1, Value(is_32_bit, combined)); |
|||
} else { |
|||
// Normalize
|
|||
inst.SetArg(0, rhs); |
|||
inst.SetArg(1, lhs); |
|||
} |
|||
} |
|||
|
|||
if (!is_lhs_immediate && is_rhs_immediate) { |
|||
const IR::Inst* lhs_inst = lhs.GetInstRecursive(); |
|||
if (lhs_inst->GetOpcode() == inst.GetOpcode() && lhs_inst->GetArg(1).IsImmediate()) { |
|||
const u64 combined = imm_fn(rhs.GetImmediateAsU64(), lhs_inst->GetArg(1).GetImmediateAsU64()); |
|||
inst.SetArg(0, lhs_inst->GetArg(0)); |
|||
inst.SetArg(1, Value(is_32_bit, combined)); |
|||
} |
|||
} |
|||
|
|||
return true; |
|||
} |
|||
|
|||
void FoldAdd(IR::Inst& inst, bool is_32_bit) { |
|||
const auto lhs = inst.GetArg(0); |
|||
const auto rhs = inst.GetArg(1); |
|||
const auto carry = inst.GetArg(2); |
|||
|
|||
if (lhs.IsImmediate() && !rhs.IsImmediate()) { |
|||
// Normalize
|
|||
inst.SetArg(0, rhs); |
|||
inst.SetArg(1, lhs); |
|||
FoldAdd(inst, is_32_bit); |
|||
return; |
|||
} |
|||
|
|||
if (inst.HasAssociatedPseudoOperation()) { |
|||
return; |
|||
} |
|||
|
|||
if (!lhs.IsImmediate() && rhs.IsImmediate()) { |
|||
const IR::Inst* lhs_inst = lhs.GetInstRecursive(); |
|||
if (lhs_inst->GetOpcode() == inst.GetOpcode() && lhs_inst->GetArg(1).IsImmediate() && lhs_inst->GetArg(2).IsImmediate()) { |
|||
const u64 combined = rhs.GetImmediateAsU64() + lhs_inst->GetArg(1).GetImmediateAsU64() + lhs_inst->GetArg(2).GetU1(); |
|||
if (combined == 0) { |
|||
inst.ReplaceUsesWith(lhs_inst->GetArg(0)); |
|||
return; |
|||
} |
|||
inst.SetArg(0, lhs_inst->GetArg(0)); |
|||
inst.SetArg(1, Value(is_32_bit, combined)); |
|||
return; |
|||
} |
|||
if (rhs.IsZero() && carry.IsZero()) { |
|||
inst.ReplaceUsesWith(lhs); |
|||
return; |
|||
} |
|||
} |
|||
|
|||
if (inst.AreAllArgsImmediates()) { |
|||
const u64 result = lhs.GetImmediateAsU64() + rhs.GetImmediateAsU64() + carry.GetU1(); |
|||
ReplaceUsesWith(inst, is_32_bit, result); |
|||
return; |
|||
} |
|||
} |
|||
|
|||
/// Folds AND operations based on the following:
|
|||
///
|
|||
/// 1. imm_x & imm_y -> result
|
|||
/// 2. x & 0 -> 0
|
|||
/// 3. 0 & y -> 0
|
|||
/// 4. x & y -> y (where x has all bits set to 1)
|
|||
/// 5. x & y -> x (where y has all bits set to 1)
|
|||
///
|
|||
void FoldAND(IR::Inst& inst, bool is_32_bit) { |
|||
if (FoldCommutative(inst, is_32_bit, [](u64 a, u64 b) { return a & b; })) { |
|||
const auto rhs = inst.GetArg(1); |
|||
if (rhs.IsZero()) { |
|||
ReplaceUsesWith(inst, is_32_bit, 0); |
|||
} else if (rhs.HasAllBitsSet()) { |
|||
inst.ReplaceUsesWith(inst.GetArg(0)); |
|||
} |
|||
} |
|||
} |
|||
|
|||
/// Folds byte reversal opcodes based on the following:
|
|||
///
|
|||
/// 1. imm -> swap(imm)
|
|||
///
|
|||
void FoldByteReverse(IR::Inst& inst, Op op) { |
|||
const auto operand = inst.GetArg(0); |
|||
|
|||
if (!operand.IsImmediate()) { |
|||
return; |
|||
} |
|||
|
|||
if (op == Op::ByteReverseWord) { |
|||
const u32 result = mcl::bit::swap_bytes_32(static_cast<u32>(operand.GetImmediateAsU64())); |
|||
inst.ReplaceUsesWith(IR::Value{result}); |
|||
} else if (op == Op::ByteReverseHalf) { |
|||
const u16 result = mcl::bit::swap_bytes_16(static_cast<u16>(operand.GetImmediateAsU64())); |
|||
inst.ReplaceUsesWith(IR::Value{result}); |
|||
} else { |
|||
const u64 result = mcl::bit::swap_bytes_64(operand.GetImmediateAsU64()); |
|||
inst.ReplaceUsesWith(IR::Value{result}); |
|||
} |
|||
} |
|||
|
|||
/// Folds division operations based on the following:
|
|||
///
|
|||
/// 1. x / 0 -> 0 (NOTE: This is an ARM-specific behavior defined in the architecture reference manual)
|
|||
/// 2. imm_x / imm_y -> result
|
|||
/// 3. x / 1 -> x
|
|||
///
|
|||
void FoldDivide(IR::Inst& inst, bool is_32_bit, bool is_signed) { |
|||
const auto rhs = inst.GetArg(1); |
|||
|
|||
if (rhs.IsZero()) { |
|||
ReplaceUsesWith(inst, is_32_bit, 0); |
|||
return; |
|||
} |
|||
|
|||
const auto lhs = inst.GetArg(0); |
|||
if (lhs.IsImmediate() && rhs.IsImmediate()) { |
|||
if (is_signed) { |
|||
const s64 result = lhs.GetImmediateAsS64() / rhs.GetImmediateAsS64(); |
|||
ReplaceUsesWith(inst, is_32_bit, static_cast<u64>(result)); |
|||
} else { |
|||
const u64 result = lhs.GetImmediateAsU64() / rhs.GetImmediateAsU64(); |
|||
ReplaceUsesWith(inst, is_32_bit, result); |
|||
} |
|||
} else if (rhs.IsUnsignedImmediate(1)) { |
|||
inst.ReplaceUsesWith(IR::Value{lhs}); |
|||
} |
|||
} |
|||
|
|||
// Folds EOR operations based on the following:
|
|||
//
|
|||
// 1. imm_x ^ imm_y -> result
|
|||
// 2. x ^ 0 -> x
|
|||
// 3. 0 ^ y -> y
|
|||
//
|
|||
void FoldEOR(IR::Inst& inst, bool is_32_bit) { |
|||
if (FoldCommutative(inst, is_32_bit, [](u64 a, u64 b) { return a ^ b; })) { |
|||
const auto rhs = inst.GetArg(1); |
|||
if (rhs.IsZero()) { |
|||
inst.ReplaceUsesWith(inst.GetArg(0)); |
|||
} |
|||
} |
|||
} |
|||
|
|||
void FoldLeastSignificantByte(IR::Inst& inst) { |
|||
if (!inst.AreAllArgsImmediates()) { |
|||
return; |
|||
} |
|||
|
|||
const auto operand = inst.GetArg(0); |
|||
inst.ReplaceUsesWith(IR::Value{static_cast<u8>(operand.GetImmediateAsU64())}); |
|||
} |
|||
|
|||
void FoldLeastSignificantHalf(IR::Inst& inst) { |
|||
if (!inst.AreAllArgsImmediates()) { |
|||
return; |
|||
} |
|||
|
|||
const auto operand = inst.GetArg(0); |
|||
inst.ReplaceUsesWith(IR::Value{static_cast<u16>(operand.GetImmediateAsU64())}); |
|||
} |
|||
|
|||
void FoldLeastSignificantWord(IR::Inst& inst) { |
|||
if (!inst.AreAllArgsImmediates()) { |
|||
return; |
|||
} |
|||
|
|||
const auto operand = inst.GetArg(0); |
|||
inst.ReplaceUsesWith(IR::Value{static_cast<u32>(operand.GetImmediateAsU64())}); |
|||
} |
|||
|
|||
void FoldMostSignificantBit(IR::Inst& inst) { |
|||
if (!inst.AreAllArgsImmediates()) { |
|||
return; |
|||
} |
|||
|
|||
const auto operand = inst.GetArg(0); |
|||
inst.ReplaceUsesWith(IR::Value{(operand.GetImmediateAsU64() >> 31) != 0}); |
|||
} |
|||
|
|||
void FoldMostSignificantWord(IR::Inst& inst) { |
|||
IR::Inst* carry_inst = inst.GetAssociatedPseudoOperation(Op::GetCarryFromOp); |
|||
|
|||
if (!inst.AreAllArgsImmediates()) { |
|||
return; |
|||
} |
|||
|
|||
const auto operand = inst.GetArg(0); |
|||
if (carry_inst) { |
|||
carry_inst->ReplaceUsesWith(IR::Value{mcl::bit::get_bit<31>(operand.GetImmediateAsU64())}); |
|||
} |
|||
inst.ReplaceUsesWith(IR::Value{static_cast<u32>(operand.GetImmediateAsU64() >> 32)}); |
|||
} |
|||
|
|||
// Folds multiplication operations based on the following:
|
|||
//
|
|||
// 1. imm_x * imm_y -> result
|
|||
// 2. x * 0 -> 0
|
|||
// 3. 0 * y -> 0
|
|||
// 4. x * 1 -> x
|
|||
// 5. 1 * y -> y
|
|||
//
|
|||
void FoldMultiply(IR::Inst& inst, bool is_32_bit) { |
|||
if (FoldCommutative(inst, is_32_bit, [](u64 a, u64 b) { return a * b; })) { |
|||
const auto rhs = inst.GetArg(1); |
|||
if (rhs.IsZero()) { |
|||
ReplaceUsesWith(inst, is_32_bit, 0); |
|||
} else if (rhs.IsUnsignedImmediate(1)) { |
|||
inst.ReplaceUsesWith(inst.GetArg(0)); |
|||
} |
|||
} |
|||
} |
|||
|
|||
// Folds NOT operations if the contained value is an immediate.
|
|||
void FoldNOT(IR::Inst& inst, bool is_32_bit) { |
|||
const auto operand = inst.GetArg(0); |
|||
|
|||
if (!operand.IsImmediate()) { |
|||
return; |
|||
} |
|||
|
|||
const u64 result = ~operand.GetImmediateAsU64(); |
|||
ReplaceUsesWith(inst, is_32_bit, result); |
|||
} |
|||
|
|||
// Folds OR operations based on the following:
|
|||
//
|
|||
// 1. imm_x | imm_y -> result
|
|||
// 2. x | 0 -> x
|
|||
// 3. 0 | y -> y
|
|||
//
|
|||
void FoldOR(IR::Inst& inst, bool is_32_bit) { |
|||
if (FoldCommutative(inst, is_32_bit, [](u64 a, u64 b) { return a | b; })) { |
|||
const auto rhs = inst.GetArg(1); |
|||
if (rhs.IsZero()) { |
|||
inst.ReplaceUsesWith(inst.GetArg(0)); |
|||
} |
|||
} |
|||
} |
|||
|
|||
bool FoldShifts(IR::Inst& inst) { |
|||
IR::Inst* carry_inst = inst.GetAssociatedPseudoOperation(Op::GetCarryFromOp); |
|||
|
|||
// The 32-bit variants can contain 3 arguments, while the
|
|||
// 64-bit variants only contain 2.
|
|||
if (inst.NumArgs() == 3 && !carry_inst) { |
|||
inst.SetArg(2, IR::Value(false)); |
|||
} |
|||
|
|||
const auto shift_amount = inst.GetArg(1); |
|||
|
|||
if (shift_amount.IsZero()) { |
|||
if (carry_inst) { |
|||
carry_inst->ReplaceUsesWith(inst.GetArg(2)); |
|||
} |
|||
inst.ReplaceUsesWith(inst.GetArg(0)); |
|||
return false; |
|||
} |
|||
|
|||
if (inst.NumArgs() == 3 && shift_amount.IsImmediate() && !shift_amount.IsZero()) { |
|||
inst.SetArg(2, IR::Value(false)); |
|||
} |
|||
|
|||
if (!inst.AreAllArgsImmediates() || carry_inst) { |
|||
return false; |
|||
} |
|||
|
|||
return true; |
|||
} |
|||
|
|||
void FoldSignExtendXToWord(IR::Inst& inst) { |
|||
if (!inst.AreAllArgsImmediates()) { |
|||
return; |
|||
} |
|||
|
|||
const s64 value = inst.GetArg(0).GetImmediateAsS64(); |
|||
inst.ReplaceUsesWith(IR::Value{static_cast<u32>(value)}); |
|||
} |
|||
|
|||
void FoldSignExtendXToLong(IR::Inst& inst) { |
|||
if (!inst.AreAllArgsImmediates()) { |
|||
return; |
|||
} |
|||
|
|||
const s64 value = inst.GetArg(0).GetImmediateAsS64(); |
|||
inst.ReplaceUsesWith(IR::Value{static_cast<u64>(value)}); |
|||
} |
|||
|
|||
void FoldSub(IR::Inst& inst, bool is_32_bit) { |
|||
if (!inst.AreAllArgsImmediates() || inst.HasAssociatedPseudoOperation()) { |
|||
return; |
|||
} |
|||
|
|||
const auto lhs = inst.GetArg(0); |
|||
const auto rhs = inst.GetArg(1); |
|||
const auto carry = inst.GetArg(2); |
|||
|
|||
const u64 result = lhs.GetImmediateAsU64() + (~rhs.GetImmediateAsU64()) + carry.GetU1(); |
|||
ReplaceUsesWith(inst, is_32_bit, result); |
|||
} |
|||
|
|||
void FoldZeroExtendXToWord(IR::Inst& inst) { |
|||
if (!inst.AreAllArgsImmediates()) { |
|||
return; |
|||
} |
|||
|
|||
const u64 value = inst.GetArg(0).GetImmediateAsU64(); |
|||
inst.ReplaceUsesWith(IR::Value{static_cast<u32>(value)}); |
|||
} |
|||
|
|||
void FoldZeroExtendXToLong(IR::Inst& inst) { |
|||
if (!inst.AreAllArgsImmediates()) { |
|||
return; |
|||
} |
|||
|
|||
const u64 value = inst.GetArg(0).GetImmediateAsU64(); |
|||
inst.ReplaceUsesWith(IR::Value{value}); |
|||
} |
|||
} // Anonymous namespace
|
|||
|
|||
void ConstantPropagation(IR::Block& block) { |
|||
for (auto& inst : block) { |
|||
const auto opcode = inst.GetOpcode(); |
|||
|
|||
switch (opcode) { |
|||
case Op::LeastSignificantWord: |
|||
FoldLeastSignificantWord(inst); |
|||
break; |
|||
case Op::MostSignificantWord: |
|||
FoldMostSignificantWord(inst); |
|||
break; |
|||
case Op::LeastSignificantHalf: |
|||
FoldLeastSignificantHalf(inst); |
|||
break; |
|||
case Op::LeastSignificantByte: |
|||
FoldLeastSignificantByte(inst); |
|||
break; |
|||
case Op::MostSignificantBit: |
|||
FoldMostSignificantBit(inst); |
|||
break; |
|||
case Op::IsZero32: |
|||
if (inst.AreAllArgsImmediates()) { |
|||
inst.ReplaceUsesWith(IR::Value{inst.GetArg(0).GetU32() == 0}); |
|||
} |
|||
break; |
|||
case Op::IsZero64: |
|||
if (inst.AreAllArgsImmediates()) { |
|||
inst.ReplaceUsesWith(IR::Value{inst.GetArg(0).GetU64() == 0}); |
|||
} |
|||
break; |
|||
case Op::LogicalShiftLeft32: |
|||
if (FoldShifts(inst)) { |
|||
ReplaceUsesWith(inst, true, Safe::LogicalShiftLeft<u32>(inst.GetArg(0).GetU32(), inst.GetArg(1).GetU8())); |
|||
} |
|||
break; |
|||
case Op::LogicalShiftLeft64: |
|||
if (FoldShifts(inst)) { |
|||
ReplaceUsesWith(inst, false, Safe::LogicalShiftLeft<u64>(inst.GetArg(0).GetU64(), inst.GetArg(1).GetU8())); |
|||
} |
|||
break; |
|||
case Op::LogicalShiftRight32: |
|||
if (FoldShifts(inst)) { |
|||
ReplaceUsesWith(inst, true, Safe::LogicalShiftRight<u32>(inst.GetArg(0).GetU32(), inst.GetArg(1).GetU8())); |
|||
} |
|||
break; |
|||
case Op::LogicalShiftRight64: |
|||
if (FoldShifts(inst)) { |
|||
ReplaceUsesWith(inst, false, Safe::LogicalShiftRight<u64>(inst.GetArg(0).GetU64(), inst.GetArg(1).GetU8())); |
|||
} |
|||
break; |
|||
case Op::ArithmeticShiftRight32: |
|||
if (FoldShifts(inst)) { |
|||
ReplaceUsesWith(inst, true, Safe::ArithmeticShiftRight<u32>(inst.GetArg(0).GetU32(), inst.GetArg(1).GetU8())); |
|||
} |
|||
break; |
|||
case Op::ArithmeticShiftRight64: |
|||
if (FoldShifts(inst)) { |
|||
ReplaceUsesWith(inst, false, Safe::ArithmeticShiftRight<u64>(inst.GetArg(0).GetU64(), inst.GetArg(1).GetU8())); |
|||
} |
|||
break; |
|||
case Op::RotateRight32: |
|||
if (FoldShifts(inst)) { |
|||
ReplaceUsesWith(inst, true, mcl::bit::rotate_right<u32>(inst.GetArg(0).GetU32(), inst.GetArg(1).GetU8())); |
|||
} |
|||
break; |
|||
case Op::RotateRight64: |
|||
if (FoldShifts(inst)) { |
|||
ReplaceUsesWith(inst, false, mcl::bit::rotate_right<u64>(inst.GetArg(0).GetU64(), inst.GetArg(1).GetU8())); |
|||
} |
|||
break; |
|||
case Op::LogicalShiftLeftMasked32: |
|||
if (inst.AreAllArgsImmediates()) { |
|||
ReplaceUsesWith(inst, true, inst.GetArg(0).GetU32() << (inst.GetArg(1).GetU32() & 0x1f)); |
|||
} |
|||
break; |
|||
case Op::LogicalShiftLeftMasked64: |
|||
if (inst.AreAllArgsImmediates()) { |
|||
ReplaceUsesWith(inst, false, inst.GetArg(0).GetU64() << (inst.GetArg(1).GetU64() & 0x3f)); |
|||
} |
|||
break; |
|||
case Op::LogicalShiftRightMasked32: |
|||
if (inst.AreAllArgsImmediates()) { |
|||
ReplaceUsesWith(inst, true, inst.GetArg(0).GetU32() >> (inst.GetArg(1).GetU32() & 0x1f)); |
|||
} |
|||
break; |
|||
case Op::LogicalShiftRightMasked64: |
|||
if (inst.AreAllArgsImmediates()) { |
|||
ReplaceUsesWith(inst, false, inst.GetArg(0).GetU64() >> (inst.GetArg(1).GetU64() & 0x3f)); |
|||
} |
|||
break; |
|||
case Op::ArithmeticShiftRightMasked32: |
|||
if (inst.AreAllArgsImmediates()) { |
|||
ReplaceUsesWith(inst, true, static_cast<s32>(inst.GetArg(0).GetU32()) >> (inst.GetArg(1).GetU32() & 0x1f)); |
|||
} |
|||
break; |
|||
case Op::ArithmeticShiftRightMasked64: |
|||
if (inst.AreAllArgsImmediates()) { |
|||
ReplaceUsesWith(inst, false, static_cast<s64>(inst.GetArg(0).GetU64()) >> (inst.GetArg(1).GetU64() & 0x3f)); |
|||
} |
|||
break; |
|||
case Op::RotateRightMasked32: |
|||
if (inst.AreAllArgsImmediates()) { |
|||
ReplaceUsesWith(inst, true, mcl::bit::rotate_right<u32>(inst.GetArg(0).GetU32(), inst.GetArg(1).GetU32())); |
|||
} |
|||
break; |
|||
case Op::RotateRightMasked64: |
|||
if (inst.AreAllArgsImmediates()) { |
|||
ReplaceUsesWith(inst, false, mcl::bit::rotate_right<u64>(inst.GetArg(0).GetU64(), inst.GetArg(1).GetU64())); |
|||
} |
|||
break; |
|||
case Op::Add32: |
|||
case Op::Add64: |
|||
FoldAdd(inst, opcode == Op::Add32); |
|||
break; |
|||
case Op::Sub32: |
|||
case Op::Sub64: |
|||
FoldSub(inst, opcode == Op::Sub32); |
|||
break; |
|||
case Op::Mul32: |
|||
case Op::Mul64: |
|||
FoldMultiply(inst, opcode == Op::Mul32); |
|||
break; |
|||
case Op::SignedDiv32: |
|||
case Op::SignedDiv64: |
|||
FoldDivide(inst, opcode == Op::SignedDiv32, true); |
|||
break; |
|||
case Op::UnsignedDiv32: |
|||
case Op::UnsignedDiv64: |
|||
FoldDivide(inst, opcode == Op::UnsignedDiv32, false); |
|||
break; |
|||
case Op::And32: |
|||
case Op::And64: |
|||
FoldAND(inst, opcode == Op::And32); |
|||
break; |
|||
case Op::Eor32: |
|||
case Op::Eor64: |
|||
FoldEOR(inst, opcode == Op::Eor32); |
|||
break; |
|||
case Op::Or32: |
|||
case Op::Or64: |
|||
FoldOR(inst, opcode == Op::Or32); |
|||
break; |
|||
case Op::Not32: |
|||
case Op::Not64: |
|||
FoldNOT(inst, opcode == Op::Not32); |
|||
break; |
|||
case Op::SignExtendByteToWord: |
|||
case Op::SignExtendHalfToWord: |
|||
FoldSignExtendXToWord(inst); |
|||
break; |
|||
case Op::SignExtendByteToLong: |
|||
case Op::SignExtendHalfToLong: |
|||
case Op::SignExtendWordToLong: |
|||
FoldSignExtendXToLong(inst); |
|||
break; |
|||
case Op::ZeroExtendByteToWord: |
|||
case Op::ZeroExtendHalfToWord: |
|||
FoldZeroExtendXToWord(inst); |
|||
break; |
|||
case Op::ZeroExtendByteToLong: |
|||
case Op::ZeroExtendHalfToLong: |
|||
case Op::ZeroExtendWordToLong: |
|||
FoldZeroExtendXToLong(inst); |
|||
break; |
|||
case Op::ByteReverseWord: |
|||
case Op::ByteReverseHalf: |
|||
case Op::ByteReverseDual: |
|||
FoldByteReverse(inst, opcode); |
|||
break; |
|||
default: |
|||
break; |
|||
} |
|||
} |
|||
} |
|||
|
|||
} // namespace Dynarmic::Optimization
|
|||
@ -1,23 +0,0 @@ |
|||
/* This file is part of the dynarmic project.
|
|||
* Copyright (c) 2016 MerryMage |
|||
* SPDX-License-Identifier: 0BSD |
|||
*/ |
|||
|
|||
#include <mcl/iterator/reverse.hpp>
|
|||
|
|||
#include "dynarmic/ir/basic_block.h"
|
|||
#include "dynarmic/ir/opt/passes.h"
|
|||
|
|||
namespace Dynarmic::Optimization { |
|||
|
|||
void DeadCodeElimination(IR::Block& block) { |
|||
// We iterate over the instructions in reverse order.
|
|||
// This is because removing an instruction reduces the number of uses for earlier instructions.
|
|||
for (auto& inst : mcl::iterator::reverse(block)) { |
|||
if (!inst.HasUses() && !MayHaveSideEffects(inst.GetOpcode())) { |
|||
inst.Invalidate(); |
|||
} |
|||
} |
|||
} |
|||
|
|||
} // namespace Dynarmic::Optimization
|
|||
@ -1,44 +0,0 @@ |
|||
/* This file is part of the dynarmic project.
|
|||
* Copyright (c) 2020 MerryMage |
|||
* SPDX-License-Identifier: 0BSD |
|||
*/ |
|||
|
|||
#include <vector>
|
|||
|
|||
#include "dynarmic/ir/basic_block.h"
|
|||
#include "dynarmic/ir/opcodes.h"
|
|||
#include "dynarmic/ir/opt/passes.h"
|
|||
|
|||
namespace Dynarmic::Optimization { |
|||
|
|||
void IdentityRemovalPass(IR::Block& block) { |
|||
std::vector<IR::Inst*> to_invalidate; |
|||
|
|||
auto iter = block.begin(); |
|||
while (iter != block.end()) { |
|||
IR::Inst& inst = *iter; |
|||
|
|||
const size_t num_args = inst.NumArgs(); |
|||
for (size_t i = 0; i < num_args; i++) { |
|||
while (true) { |
|||
IR::Value arg = inst.GetArg(i); |
|||
if (!arg.IsIdentity()) |
|||
break; |
|||
inst.SetArg(i, arg.GetInst()->GetArg(0)); |
|||
} |
|||
} |
|||
|
|||
if (inst.GetOpcode() == IR::Opcode::Identity || inst.GetOpcode() == IR::Opcode::Void) { |
|||
iter = block.Instructions().erase(inst); |
|||
to_invalidate.push_back(&inst); |
|||
} else { |
|||
++iter; |
|||
} |
|||
} |
|||
|
|||
for (IR::Inst* inst : to_invalidate) { |
|||
inst->Invalidate(); |
|||
} |
|||
} |
|||
|
|||
} // namespace Dynarmic::Optimization
|
|||
@ -1,127 +0,0 @@ |
|||
/* This file is part of the dynarmic project. |
|||
* Copyright (c) 2020 MerryMage |
|||
* SPDX-License-Identifier: 0BSD |
|||
*/ |
|||
|
|||
#pragma once |
|||
|
|||
#include <optional> |
|||
#include <tuple> |
|||
|
|||
#include <mp/metafunction/apply.h> |
|||
#include <mp/typelist/concat.h> |
|||
#include <mp/typelist/drop.h> |
|||
#include <mp/typelist/get.h> |
|||
#include <mp/typelist/head.h> |
|||
#include <mp/typelist/list.h> |
|||
#include <mp/typelist/prepend.h> |
|||
|
|||
#include "dynarmic/ir/microinstruction.h" |
|||
#include "dynarmic/ir/opcodes.h" |
|||
#include "dynarmic/ir/value.h" |
|||
|
|||
namespace Dynarmic::Optimization::IRMatcher { |
|||
|
|||
struct CaptureValue { |
|||
using ReturnType = std::tuple<IR::Value>; |
|||
|
|||
static std::optional<ReturnType> Match(IR::Value value) { |
|||
return std::tuple(value); |
|||
} |
|||
}; |
|||
|
|||
struct CaptureInst { |
|||
using ReturnType = std::tuple<IR::Inst*>; |
|||
|
|||
static std::optional<ReturnType> Match(IR::Value value) { |
|||
if (value.IsImmediate()) |
|||
return std::nullopt; |
|||
return std::tuple(value.GetInstRecursive()); |
|||
} |
|||
}; |
|||
|
|||
struct CaptureUImm { |
|||
using ReturnType = std::tuple<u64>; |
|||
|
|||
static std::optional<ReturnType> Match(IR::Value value) { |
|||
return std::tuple(value.GetImmediateAsU64()); |
|||
} |
|||
}; |
|||
|
|||
struct CaptureSImm { |
|||
using ReturnType = std::tuple<s64>; |
|||
|
|||
static std::optional<ReturnType> Match(IR::Value value) { |
|||
return std::tuple(value.GetImmediateAsS64()); |
|||
} |
|||
}; |
|||
|
|||
template<u64 Value> |
|||
struct UImm { |
|||
using ReturnType = std::tuple<>; |
|||
|
|||
static std::optional<std::tuple<>> Match(IR::Value value) { |
|||
if (value.GetImmediateAsU64() == Value) |
|||
return std::tuple(); |
|||
return std::nullopt; |
|||
} |
|||
}; |
|||
|
|||
template<s64 Value> |
|||
struct SImm { |
|||
using ReturnType = std::tuple<>; |
|||
|
|||
static std::optional<std::tuple<>> Match(IR::Value value) { |
|||
if (value.GetImmediateAsS64() == Value) |
|||
return std::tuple(); |
|||
return std::nullopt; |
|||
} |
|||
}; |
|||
|
|||
template<IR::Opcode Opcode, typename... Args> |
|||
struct Inst { |
|||
public: |
|||
using ReturnType = mp::concat<std::tuple<>, typename Args::ReturnType...>; |
|||
|
|||
static std::optional<ReturnType> Match(const IR::Inst& inst) { |
|||
if (inst.GetOpcode() != Opcode) |
|||
return std::nullopt; |
|||
if (inst.HasAssociatedPseudoOperation()) |
|||
return std::nullopt; |
|||
return MatchArgs<0>(inst); |
|||
} |
|||
|
|||
static std::optional<ReturnType> Match(IR::Value value) { |
|||
if (value.IsImmediate()) |
|||
return std::nullopt; |
|||
return Match(*value.GetInstRecursive()); |
|||
} |
|||
|
|||
private: |
|||
template<size_t I> |
|||
static auto MatchArgs(const IR::Inst& inst) -> std::optional<mp::apply<mp::concat, mp::prepend<mp::drop<I, mp::list<typename Args::ReturnType...>>, std::tuple<>>>> { |
|||
if constexpr (I >= sizeof...(Args)) { |
|||
return std::tuple(); |
|||
} else { |
|||
using Arg = mp::get<I, mp::list<Args...>>; |
|||
|
|||
if (const auto arg = Arg::Match(inst.GetArg(I))) { |
|||
if (const auto rest = MatchArgs<I + 1>(inst)) { |
|||
return std::tuple_cat(*arg, *rest); |
|||
} |
|||
} |
|||
|
|||
return std::nullopt; |
|||
} |
|||
} |
|||
}; |
|||
|
|||
inline bool IsSameInst(std::tuple<IR::Inst*, IR::Inst*> t) { |
|||
return std::get<0>(t) == std::get<1>(t); |
|||
} |
|||
|
|||
inline bool IsSameInst(std::tuple<IR::Inst*, IR::Inst*, IR::Inst*> t) { |
|||
return std::get<0>(t) == std::get<1>(t) && std::get<0>(t) == std::get<2>(t); |
|||
} |
|||
|
|||
} // namespace Dynarmic::Optimization::IRMatcher |
|||
@ -1,18 +0,0 @@ |
|||
/* This file is part of the dynarmic project.
|
|||
* Copyright (c) 2023 MerryMage |
|||
* SPDX-License-Identifier: 0BSD |
|||
*/ |
|||
|
|||
#include "dynarmic/ir/basic_block.h"
|
|||
#include "dynarmic/ir/microinstruction.h"
|
|||
|
|||
namespace Dynarmic::Optimization { |
|||
|
|||
void NamingPass(IR::Block& block) { |
|||
unsigned name = 1; |
|||
for (auto& inst : block) { |
|||
inst.SetName(name++); |
|||
} |
|||
} |
|||
|
|||
} // namespace Dynarmic::Optimization
|
|||
@ -1,47 +0,0 @@ |
|||
/* This file is part of the dynarmic project. |
|||
* Copyright (c) 2016 MerryMage |
|||
* SPDX-License-Identifier: 0BSD |
|||
*/ |
|||
|
|||
#pragma once |
|||
|
|||
namespace Dynarmic::A32 { |
|||
struct UserCallbacks; |
|||
} |
|||
|
|||
namespace Dynarmic::A64 { |
|||
struct UserCallbacks; |
|||
struct UserConfig; |
|||
} // namespace Dynarmic::A64 |
|||
|
|||
namespace Dynarmic::IR { |
|||
class Block; |
|||
} |
|||
|
|||
namespace Dynarmic::Optimization { |
|||
|
|||
struct PolyfillOptions { |
|||
bool sha256 = false; |
|||
bool vector_multiply_widen = false; |
|||
|
|||
bool operator==(const PolyfillOptions&) const = default; |
|||
}; |
|||
|
|||
struct A32GetSetEliminationOptions { |
|||
bool convert_nzc_to_nz = false; |
|||
bool convert_nz_to_nzc = false; |
|||
}; |
|||
|
|||
void PolyfillPass(IR::Block& block, const PolyfillOptions& opt); |
|||
void A32ConstantMemoryReads(IR::Block& block, A32::UserCallbacks* cb); |
|||
void A32GetSetElimination(IR::Block& block, A32GetSetEliminationOptions opt); |
|||
void A64CallbackConfigPass(IR::Block& block, const A64::UserConfig& conf); |
|||
void A64GetSetElimination(IR::Block& block); |
|||
void A64MergeInterpretBlocksPass(IR::Block& block, A64::UserCallbacks* cb); |
|||
void ConstantPropagation(IR::Block& block); |
|||
void DeadCodeElimination(IR::Block& block); |
|||
void IdentityRemovalPass(IR::Block& block); |
|||
void VerificationPass(const IR::Block& block); |
|||
void NamingPass(IR::Block& block); |
|||
|
|||
} // namespace Dynarmic::Optimization |
|||
@ -1,218 +0,0 @@ |
|||
/* This file is part of the dynarmic project.
|
|||
* Copyright (c) 2022 MerryMage |
|||
* SPDX-License-Identifier: 0BSD |
|||
*/ |
|||
|
|||
#include "dynarmic/ir/basic_block.h"
|
|||
#include "dynarmic/ir/ir_emitter.h"
|
|||
#include "dynarmic/ir/microinstruction.h"
|
|||
#include "dynarmic/ir/opcodes.h"
|
|||
#include "dynarmic/ir/opt/passes.h"
|
|||
|
|||
namespace Dynarmic::Optimization { |
|||
|
|||
namespace { |
|||
|
|||
void PolyfillSHA256MessageSchedule0(IR::IREmitter& ir, IR::Inst& inst) { |
|||
const IR::U128 x = (IR::U128)inst.GetArg(0); |
|||
const IR::U128 y = (IR::U128)inst.GetArg(1); |
|||
|
|||
const IR::U128 t = ir.VectorExtract(x, y, 32); |
|||
|
|||
IR::U128 result = ir.ZeroVector(); |
|||
for (size_t i = 0; i < 4; i++) { |
|||
const IR::U32 modified_element = [&] { |
|||
const IR::U32 element = ir.VectorGetElement(32, t, i); |
|||
const IR::U32 tmp1 = ir.RotateRight(element, ir.Imm8(7)); |
|||
const IR::U32 tmp2 = ir.RotateRight(element, ir.Imm8(18)); |
|||
const IR::U32 tmp3 = ir.LogicalShiftRight(element, ir.Imm8(3)); |
|||
|
|||
return ir.Eor(tmp1, ir.Eor(tmp2, tmp3)); |
|||
}(); |
|||
|
|||
result = ir.VectorSetElement(32, result, i, modified_element); |
|||
} |
|||
result = ir.VectorAdd(32, result, x); |
|||
|
|||
inst.ReplaceUsesWith(result); |
|||
} |
|||
|
|||
void PolyfillSHA256MessageSchedule1(IR::IREmitter& ir, IR::Inst& inst) { |
|||
const IR::U128 x = (IR::U128)inst.GetArg(0); |
|||
const IR::U128 y = (IR::U128)inst.GetArg(1); |
|||
const IR::U128 z = (IR::U128)inst.GetArg(2); |
|||
|
|||
const IR::U128 T0 = ir.VectorExtract(y, z, 32); |
|||
|
|||
const IR::U128 lower_half = [&] { |
|||
const IR::U128 T = ir.VectorRotateWholeVectorRight(z, 64); |
|||
const IR::U128 tmp1 = ir.VectorRotateRight(32, T, 17); |
|||
const IR::U128 tmp2 = ir.VectorRotateRight(32, T, 19); |
|||
const IR::U128 tmp3 = ir.VectorLogicalShiftRight(32, T, 10); |
|||
const IR::U128 tmp4 = ir.VectorEor(tmp1, ir.VectorEor(tmp2, tmp3)); |
|||
const IR::U128 tmp5 = ir.VectorAdd(32, tmp4, ir.VectorAdd(32, x, T0)); |
|||
return ir.VectorZeroUpper(tmp5); |
|||
}(); |
|||
|
|||
const IR::U64 upper_half = [&] { |
|||
const IR::U128 tmp1 = ir.VectorRotateRight(32, lower_half, 17); |
|||
const IR::U128 tmp2 = ir.VectorRotateRight(32, lower_half, 19); |
|||
const IR::U128 tmp3 = ir.VectorLogicalShiftRight(32, lower_half, 10); |
|||
const IR::U128 tmp4 = ir.VectorEor(tmp1, ir.VectorEor(tmp2, tmp3)); |
|||
|
|||
// Shuffle the top two 32-bit elements downwards [3, 2, 1, 0] -> [1, 0, 3, 2]
|
|||
const IR::U128 shuffled_d = ir.VectorRotateWholeVectorRight(x, 64); |
|||
const IR::U128 shuffled_T0 = ir.VectorRotateWholeVectorRight(T0, 64); |
|||
|
|||
const IR::U128 tmp5 = ir.VectorAdd(32, tmp4, ir.VectorAdd(32, shuffled_d, shuffled_T0)); |
|||
return ir.VectorGetElement(64, tmp5, 0); |
|||
}(); |
|||
|
|||
const IR::U128 result = ir.VectorSetElement(64, lower_half, 1, upper_half); |
|||
|
|||
inst.ReplaceUsesWith(result); |
|||
} |
|||
|
|||
IR::U32 SHAchoose(IR::IREmitter& ir, IR::U32 x, IR::U32 y, IR::U32 z) { |
|||
return ir.Eor(ir.And(ir.Eor(y, z), x), z); |
|||
} |
|||
|
|||
IR::U32 SHAmajority(IR::IREmitter& ir, IR::U32 x, IR::U32 y, IR::U32 z) { |
|||
return ir.Or(ir.And(x, y), ir.And(ir.Or(x, y), z)); |
|||
} |
|||
|
|||
IR::U32 SHAhashSIGMA0(IR::IREmitter& ir, IR::U32 x) { |
|||
const IR::U32 tmp1 = ir.RotateRight(x, ir.Imm8(2)); |
|||
const IR::U32 tmp2 = ir.RotateRight(x, ir.Imm8(13)); |
|||
const IR::U32 tmp3 = ir.RotateRight(x, ir.Imm8(22)); |
|||
|
|||
return ir.Eor(tmp1, ir.Eor(tmp2, tmp3)); |
|||
} |
|||
|
|||
IR::U32 SHAhashSIGMA1(IR::IREmitter& ir, IR::U32 x) { |
|||
const IR::U32 tmp1 = ir.RotateRight(x, ir.Imm8(6)); |
|||
const IR::U32 tmp2 = ir.RotateRight(x, ir.Imm8(11)); |
|||
const IR::U32 tmp3 = ir.RotateRight(x, ir.Imm8(25)); |
|||
|
|||
return ir.Eor(tmp1, ir.Eor(tmp2, tmp3)); |
|||
} |
|||
|
|||
void PolyfillSHA256Hash(IR::IREmitter& ir, IR::Inst& inst) { |
|||
IR::U128 x = (IR::U128)inst.GetArg(0); |
|||
IR::U128 y = (IR::U128)inst.GetArg(1); |
|||
const IR::U128 w = (IR::U128)inst.GetArg(2); |
|||
const bool part1 = inst.GetArg(3).GetU1(); |
|||
|
|||
for (size_t i = 0; i < 4; i++) { |
|||
const IR::U32 low_x = ir.VectorGetElement(32, x, 0); |
|||
const IR::U32 after_low_x = ir.VectorGetElement(32, x, 1); |
|||
const IR::U32 before_high_x = ir.VectorGetElement(32, x, 2); |
|||
const IR::U32 high_x = ir.VectorGetElement(32, x, 3); |
|||
|
|||
const IR::U32 low_y = ir.VectorGetElement(32, y, 0); |
|||
const IR::U32 after_low_y = ir.VectorGetElement(32, y, 1); |
|||
const IR::U32 before_high_y = ir.VectorGetElement(32, y, 2); |
|||
const IR::U32 high_y = ir.VectorGetElement(32, y, 3); |
|||
|
|||
const IR::U32 choice = SHAchoose(ir, low_y, after_low_y, before_high_y); |
|||
const IR::U32 majority = SHAmajority(ir, low_x, after_low_x, before_high_x); |
|||
|
|||
const IR::U32 t = [&] { |
|||
const IR::U32 w_element = ir.VectorGetElement(32, w, i); |
|||
const IR::U32 sig = SHAhashSIGMA1(ir, low_y); |
|||
|
|||
return ir.Add(high_y, ir.Add(sig, ir.Add(choice, w_element))); |
|||
}(); |
|||
|
|||
const IR::U32 new_low_x = ir.Add(t, ir.Add(SHAhashSIGMA0(ir, low_x), majority)); |
|||
const IR::U32 new_low_y = ir.Add(t, high_x); |
|||
|
|||
// Shuffle all words left by 1 element: [3, 2, 1, 0] -> [2, 1, 0, 3]
|
|||
const IR::U128 shuffled_x = ir.VectorRotateWholeVectorRight(x, 96); |
|||
const IR::U128 shuffled_y = ir.VectorRotateWholeVectorRight(y, 96); |
|||
|
|||
x = ir.VectorSetElement(32, shuffled_x, 0, new_low_x); |
|||
y = ir.VectorSetElement(32, shuffled_y, 0, new_low_y); |
|||
} |
|||
|
|||
inst.ReplaceUsesWith(part1 ? x : y); |
|||
} |
|||
|
|||
template<size_t esize, bool is_signed> |
|||
void PolyfillVectorMultiplyWiden(IR::IREmitter& ir, IR::Inst& inst) { |
|||
IR::U128 n = (IR::U128)inst.GetArg(0); |
|||
IR::U128 m = (IR::U128)inst.GetArg(1); |
|||
|
|||
const IR::U128 wide_n = is_signed ? ir.VectorSignExtend(esize, n) : ir.VectorZeroExtend(esize, n); |
|||
const IR::U128 wide_m = is_signed ? ir.VectorSignExtend(esize, m) : ir.VectorZeroExtend(esize, m); |
|||
|
|||
const IR::U128 result = ir.VectorMultiply(esize * 2, wide_n, wide_m); |
|||
|
|||
inst.ReplaceUsesWith(result); |
|||
} |
|||
|
|||
} // namespace
|
|||
|
|||
void PolyfillPass(IR::Block& block, const PolyfillOptions& polyfill) { |
|||
if (polyfill == PolyfillOptions{}) { |
|||
return; |
|||
} |
|||
|
|||
IR::IREmitter ir{block}; |
|||
|
|||
for (auto& inst : block) { |
|||
ir.SetInsertionPointBefore(&inst); |
|||
|
|||
switch (inst.GetOpcode()) { |
|||
case IR::Opcode::SHA256MessageSchedule0: |
|||
if (polyfill.sha256) { |
|||
PolyfillSHA256MessageSchedule0(ir, inst); |
|||
} |
|||
break; |
|||
case IR::Opcode::SHA256MessageSchedule1: |
|||
if (polyfill.sha256) { |
|||
PolyfillSHA256MessageSchedule1(ir, inst); |
|||
} |
|||
break; |
|||
case IR::Opcode::SHA256Hash: |
|||
if (polyfill.sha256) { |
|||
PolyfillSHA256Hash(ir, inst); |
|||
} |
|||
break; |
|||
case IR::Opcode::VectorMultiplySignedWiden8: |
|||
if (polyfill.vector_multiply_widen) { |
|||
PolyfillVectorMultiplyWiden<8, true>(ir, inst); |
|||
} |
|||
break; |
|||
case IR::Opcode::VectorMultiplySignedWiden16: |
|||
if (polyfill.vector_multiply_widen) { |
|||
PolyfillVectorMultiplyWiden<16, true>(ir, inst); |
|||
} |
|||
break; |
|||
case IR::Opcode::VectorMultiplySignedWiden32: |
|||
if (polyfill.vector_multiply_widen) { |
|||
PolyfillVectorMultiplyWiden<32, true>(ir, inst); |
|||
} |
|||
break; |
|||
case IR::Opcode::VectorMultiplyUnsignedWiden8: |
|||
if (polyfill.vector_multiply_widen) { |
|||
PolyfillVectorMultiplyWiden<8, false>(ir, inst); |
|||
} |
|||
break; |
|||
case IR::Opcode::VectorMultiplyUnsignedWiden16: |
|||
if (polyfill.vector_multiply_widen) { |
|||
PolyfillVectorMultiplyWiden<16, false>(ir, inst); |
|||
} |
|||
break; |
|||
case IR::Opcode::VectorMultiplyUnsignedWiden32: |
|||
if (polyfill.vector_multiply_widen) { |
|||
PolyfillVectorMultiplyWiden<32, false>(ir, inst); |
|||
} |
|||
break; |
|||
default: |
|||
break; |
|||
} |
|||
} |
|||
} |
|||
|
|||
} // namespace Dynarmic::Optimization
|
|||
@ -1,51 +0,0 @@ |
|||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
|||
// SPDX-License-Identifier: GPL-3.0-or-later
|
|||
|
|||
/* This file is part of the dynarmic project.
|
|||
* Copyright (c) 2016 MerryMage |
|||
* SPDX-License-Identifier: 0BSD |
|||
*/ |
|||
|
|||
#include <cstdio>
|
|||
#include <map>
|
|||
|
|||
#include "dynarmic/common/assert.h"
|
|||
#include "dynarmic/common/common_types.h"
|
|||
#include <ankerl/unordered_dense.h>
|
|||
|
|||
#include "dynarmic/ir/basic_block.h"
|
|||
#include "dynarmic/ir/microinstruction.h"
|
|||
#include "dynarmic/ir/opcodes.h"
|
|||
#include "dynarmic/ir/opt/passes.h"
|
|||
#include "dynarmic/ir/type.h"
|
|||
|
|||
namespace Dynarmic::Optimization { |
|||
|
|||
void VerificationPass(const IR::Block& block) { |
|||
for (const auto& inst : block) { |
|||
for (size_t i = 0; i < inst.NumArgs(); i++) { |
|||
const IR::Type t1 = inst.GetArg(i).GetType(); |
|||
const IR::Type t2 = IR::GetArgTypeOf(inst.GetOpcode(), i); |
|||
if (!IR::AreTypesCompatible(t1, t2)) { |
|||
std::puts(IR::DumpBlock(block).c_str()); |
|||
ASSERT_FALSE("above block failed validation"); |
|||
} |
|||
} |
|||
} |
|||
|
|||
ankerl::unordered_dense::map<IR::Inst*, size_t> actual_uses; |
|||
for (const auto& inst : block) { |
|||
for (size_t i = 0; i < inst.NumArgs(); i++) { |
|||
const auto arg = inst.GetArg(i); |
|||
if (!arg.IsImmediate()) { |
|||
actual_uses[arg.GetInst()]++; |
|||
} |
|||
} |
|||
} |
|||
|
|||
for (const auto& pair : actual_uses) { |
|||
ASSERT(pair.first->UseCount() == pair.second); |
|||
} |
|||
} |
|||
|
|||
} // namespace Dynarmic::Optimization
|
|||
1519
src/dynarmic/src/dynarmic/ir/opt_passes.cpp
File diff suppressed because it is too large
View File
File diff suppressed because it is too large
View File
@ -0,0 +1,37 @@ |
|||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project |
|||
// SPDX-License-Identifier: GPL-3.0-or-later |
|||
|
|||
/* This file is part of the dynarmic project. |
|||
* Copyright (c) 2016 MerryMage |
|||
* SPDX-License-Identifier: 0BSD |
|||
*/ |
|||
|
|||
#pragma once |
|||
|
|||
namespace Dynarmic::A32 { |
|||
struct UserCallbacks; |
|||
struct UserConfig; |
|||
} |
|||
|
|||
namespace Dynarmic::A64 { |
|||
struct UserCallbacks; |
|||
struct UserConfig; |
|||
} |
|||
|
|||
namespace Dynarmic::IR { |
|||
class Block; |
|||
} |
|||
|
|||
namespace Dynarmic::Optimization { |
|||
|
|||
struct PolyfillOptions { |
|||
bool sha256 = false; |
|||
bool vector_multiply_widen = false; |
|||
|
|||
bool operator==(const PolyfillOptions&) const = default; |
|||
}; |
|||
|
|||
void Optimize(IR::Block& block, const A32::UserConfig& conf, const Optimization::PolyfillOptions& polyfill_options); |
|||
void Optimize(IR::Block& block, const A64::UserConfig& conf, const Optimization::PolyfillOptions& polyfill_options); |
|||
|
|||
} // namespace Dynarmic::Optimization |
|||
Write
Preview
Loading…
Cancel
Save
Reference in new issue