committed by
crueter
12 changed files with 1701 additions and 1903 deletions
-
12src/video_core/CMakeLists.txt
-
7src/video_core/engines/maxwell_3d.cpp
-
4src/video_core/engines/maxwell_3d.h
-
1670src/video_core/macro.cpp
-
39src/video_core/macro.h
-
140src/video_core/macro/macro.cpp
-
606src/video_core/macro/macro_hle.cpp
-
33src/video_core/macro/macro_hle.h
-
362src/video_core/macro/macro_interpreter.cpp
-
27src/video_core/macro/macro_interpreter.h
-
678src/video_core/macro/macro_jit_x64.cpp
-
26src/video_core/macro/macro_jit_x64.h
1670
src/video_core/macro.cpp
File diff suppressed because it is too large
View File
File diff suppressed because it is too large
View File
@ -1,140 +0,0 @@ |
|||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
|||
// SPDX-License-Identifier: GPL-3.0-or-later
|
|||
|
|||
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
|
|||
// SPDX-License-Identifier: GPL-2.0-or-later
|
|||
|
|||
#include <cstring>
|
|||
#include <fstream>
|
|||
#include <optional>
|
|||
#include <span>
|
|||
|
|||
#include "common/container_hash.h"
|
|||
|
|||
#include <fstream>
|
|||
#include "common/assert.h"
|
|||
#include "common/fs/fs.h"
|
|||
#include "common/fs/path_util.h"
|
|||
#include "common/settings.h"
|
|||
#include "video_core/engines/maxwell_3d.h"
|
|||
#include "video_core/macro/macro.h"
|
|||
#include "video_core/macro/macro_hle.h"
|
|||
#include "video_core/macro/macro_interpreter.h"
|
|||
|
|||
#ifdef ARCHITECTURE_x86_64
|
|||
#include "video_core/macro/macro_jit_x64.h"
|
|||
#endif
|
|||
|
|||
namespace Tegra { |
|||
|
|||
static void Dump(u64 hash, std::span<const u32> code, bool decompiled = false) { |
|||
const auto base_dir{Common::FS::GetEdenPath(Common::FS::EdenPath::DumpDir)}; |
|||
const auto macro_dir{base_dir / "macros"}; |
|||
if (!Common::FS::CreateDir(base_dir) || !Common::FS::CreateDir(macro_dir)) { |
|||
LOG_ERROR(Common_Filesystem, "Failed to create macro dump directories"); |
|||
return; |
|||
} |
|||
auto name{macro_dir / fmt::format("{:016x}.macro", hash)}; |
|||
|
|||
if (decompiled) { |
|||
auto new_name{macro_dir / fmt::format("decompiled_{:016x}.macro", hash)}; |
|||
if (Common::FS::Exists(name)) { |
|||
(void)Common::FS::RenameFile(name, new_name); |
|||
return; |
|||
} |
|||
name = new_name; |
|||
} |
|||
|
|||
std::fstream macro_file(name, std::ios::out | std::ios::binary); |
|||
if (!macro_file) { |
|||
LOG_ERROR(Common_Filesystem, "Unable to open or create file at {}", |
|||
Common::FS::PathToUTF8String(name)); |
|||
return; |
|||
} |
|||
macro_file.write(reinterpret_cast<const char*>(code.data()), code.size_bytes()); |
|||
} |
|||
|
|||
MacroEngine::MacroEngine(Engines::Maxwell3D& maxwell3d_) |
|||
: hle_macros{std::make_unique<Tegra::HLEMacro>(maxwell3d_)}, maxwell3d{maxwell3d_} {} |
|||
|
|||
MacroEngine::~MacroEngine() = default; |
|||
|
|||
void MacroEngine::AddCode(u32 method, u32 data) { |
|||
uploaded_macro_code[method].push_back(data); |
|||
} |
|||
|
|||
void MacroEngine::ClearCode(u32 method) { |
|||
macro_cache.erase(method); |
|||
uploaded_macro_code.erase(method); |
|||
} |
|||
|
|||
void MacroEngine::Execute(u32 method, const std::vector<u32>& parameters) { |
|||
auto compiled_macro = macro_cache.find(method); |
|||
if (compiled_macro != macro_cache.end()) { |
|||
const auto& cache_info = compiled_macro->second; |
|||
if (cache_info.has_hle_program) { |
|||
cache_info.hle_program->Execute(parameters, method); |
|||
} else { |
|||
maxwell3d.RefreshParameters(); |
|||
cache_info.lle_program->Execute(parameters, method); |
|||
} |
|||
} else { |
|||
// Macro not compiled, check if it's uploaded and if so, compile it
|
|||
std::optional<u32> mid_method; |
|||
const auto macro_code = uploaded_macro_code.find(method); |
|||
if (macro_code == uploaded_macro_code.end()) { |
|||
for (const auto& [method_base, code] : uploaded_macro_code) { |
|||
if (method >= method_base && (method - method_base) < code.size()) { |
|||
mid_method = method_base; |
|||
break; |
|||
} |
|||
} |
|||
if (!mid_method.has_value()) { |
|||
ASSERT_MSG(false, "Macro 0x{0:x} was not uploaded", method); |
|||
return; |
|||
} |
|||
} |
|||
auto& cache_info = macro_cache[method]; |
|||
|
|||
if (!mid_method.has_value()) { |
|||
cache_info.lle_program = Compile(macro_code->second); |
|||
cache_info.hash = Common::HashValue(macro_code->second); |
|||
} else { |
|||
const auto& macro_cached = uploaded_macro_code[mid_method.value()]; |
|||
const auto rebased_method = method - mid_method.value(); |
|||
auto& code = uploaded_macro_code[method]; |
|||
code.resize(macro_cached.size() - rebased_method); |
|||
std::memcpy(code.data(), macro_cached.data() + rebased_method, |
|||
code.size() * sizeof(u32)); |
|||
cache_info.hash = Common::HashValue(code); |
|||
cache_info.lle_program = Compile(code); |
|||
} |
|||
|
|||
auto hle_program = hle_macros->GetHLEProgram(cache_info.hash); |
|||
if (!hle_program || Settings::values.disable_macro_hle) { |
|||
maxwell3d.RefreshParameters(); |
|||
cache_info.lle_program->Execute(parameters, method); |
|||
} else { |
|||
cache_info.has_hle_program = true; |
|||
cache_info.hle_program = std::move(hle_program); |
|||
cache_info.hle_program->Execute(parameters, method); |
|||
} |
|||
|
|||
if (Settings::values.dump_macros) { |
|||
Dump(cache_info.hash, macro_code->second, cache_info.has_hle_program); |
|||
} |
|||
} |
|||
} |
|||
|
|||
std::unique_ptr<MacroEngine> GetMacroEngine(Engines::Maxwell3D& maxwell3d) { |
|||
if (Settings::values.disable_macro_jit) { |
|||
return std::make_unique<MacroInterpreter>(maxwell3d); |
|||
} |
|||
#ifdef ARCHITECTURE_x86_64
|
|||
return std::make_unique<MacroJITx64>(maxwell3d); |
|||
#else
|
|||
return std::make_unique<MacroInterpreter>(maxwell3d); |
|||
#endif
|
|||
} |
|||
|
|||
} // namespace Tegra
|
|||
@ -1,606 +0,0 @@ |
|||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
|||
// SPDX-License-Identifier: GPL-3.0-or-later
|
|||
|
|||
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
|
|||
// SPDX-License-Identifier: GPL-3.0-or-later
|
|||
|
|||
#include <array>
|
|||
#include <vector>
|
|||
#include "common/assert.h"
|
|||
#include "common/scope_exit.h"
|
|||
#include "video_core/dirty_flags.h"
|
|||
#include "video_core/engines/draw_manager.h"
|
|||
#include "video_core/engines/maxwell_3d.h"
|
|||
#include "video_core/macro/macro.h"
|
|||
#include "video_core/macro/macro_hle.h"
|
|||
#include "video_core/memory_manager.h"
|
|||
#include "video_core/rasterizer_interface.h"
|
|||
|
|||
namespace Tegra { |
|||
|
|||
using Maxwell3D = Engines::Maxwell3D; |
|||
|
|||
namespace { |
|||
|
|||
bool IsTopologySafe(Maxwell3D::Regs::PrimitiveTopology topology) { |
|||
switch (topology) { |
|||
case Maxwell3D::Regs::PrimitiveTopology::Points: |
|||
case Maxwell3D::Regs::PrimitiveTopology::Lines: |
|||
case Maxwell3D::Regs::PrimitiveTopology::LineLoop: |
|||
case Maxwell3D::Regs::PrimitiveTopology::LineStrip: |
|||
case Maxwell3D::Regs::PrimitiveTopology::Triangles: |
|||
case Maxwell3D::Regs::PrimitiveTopology::TriangleStrip: |
|||
case Maxwell3D::Regs::PrimitiveTopology::TriangleFan: |
|||
case Maxwell3D::Regs::PrimitiveTopology::LinesAdjacency: |
|||
case Maxwell3D::Regs::PrimitiveTopology::LineStripAdjacency: |
|||
case Maxwell3D::Regs::PrimitiveTopology::TrianglesAdjacency: |
|||
case Maxwell3D::Regs::PrimitiveTopology::TriangleStripAdjacency: |
|||
case Maxwell3D::Regs::PrimitiveTopology::Patches: |
|||
return true; |
|||
case Maxwell3D::Regs::PrimitiveTopology::Quads: |
|||
case Maxwell3D::Regs::PrimitiveTopology::QuadStrip: |
|||
case Maxwell3D::Regs::PrimitiveTopology::Polygon: |
|||
default: |
|||
return false; |
|||
} |
|||
} |
|||
|
|||
class HLEMacroImpl : public CachedMacro { |
|||
public: |
|||
explicit HLEMacroImpl(Maxwell3D& maxwell3d_) : maxwell3d{maxwell3d_} {} |
|||
|
|||
protected: |
|||
Maxwell3D& maxwell3d; |
|||
}; |
|||
|
|||
/*
|
|||
* @note: these macros have two versions, a normal and extended version, with the extended version |
|||
* also assigning the base vertex/instance. |
|||
*/ |
|||
template <bool extended> |
|||
class HLE_DrawArraysIndirect final : public HLEMacroImpl { |
|||
public: |
|||
explicit HLE_DrawArraysIndirect(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {} |
|||
|
|||
void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override { |
|||
auto topology = static_cast<Maxwell3D::Regs::PrimitiveTopology>(parameters[0]); |
|||
if (!maxwell3d.AnyParametersDirty() || !IsTopologySafe(topology)) { |
|||
Fallback(parameters); |
|||
return; |
|||
} |
|||
|
|||
auto& params = maxwell3d.draw_manager->GetIndirectParams(); |
|||
params.is_byte_count = false; |
|||
params.is_indexed = false; |
|||
params.include_count = false; |
|||
params.count_start_address = 0; |
|||
params.indirect_start_address = maxwell3d.GetMacroAddress(1); |
|||
params.buffer_size = 4 * sizeof(u32); |
|||
params.max_draw_counts = 1; |
|||
params.stride = 0; |
|||
|
|||
if constexpr (extended) { |
|||
maxwell3d.engine_state = Maxwell3D::EngineHint::OnHLEMacro; |
|||
maxwell3d.SetHLEReplacementAttributeType( |
|||
0, 0x640, Maxwell3D::HLEReplacementAttributeType::BaseInstance); |
|||
} |
|||
|
|||
maxwell3d.draw_manager->DrawArrayIndirect(topology); |
|||
|
|||
if constexpr (extended) { |
|||
maxwell3d.engine_state = Maxwell3D::EngineHint::None; |
|||
maxwell3d.replace_table.clear(); |
|||
} |
|||
} |
|||
|
|||
private: |
|||
void Fallback(const std::vector<u32>& parameters) { |
|||
SCOPE_EXIT { |
|||
if (extended) { |
|||
maxwell3d.engine_state = Maxwell3D::EngineHint::None; |
|||
maxwell3d.replace_table.clear(); |
|||
} |
|||
}; |
|||
maxwell3d.RefreshParameters(); |
|||
const u32 instance_count = (maxwell3d.GetRegisterValue(0xD1B) & parameters[2]); |
|||
|
|||
auto topology = static_cast<Maxwell3D::Regs::PrimitiveTopology>(parameters[0]); |
|||
const u32 vertex_first = parameters[3]; |
|||
const u32 vertex_count = parameters[1]; |
|||
|
|||
if (!IsTopologySafe(topology) && |
|||
static_cast<size_t>(maxwell3d.GetMaxCurrentVertices()) < |
|||
static_cast<size_t>(vertex_first) + static_cast<size_t>(vertex_count)) { |
|||
ASSERT_MSG(false, "Faulty draw!"); |
|||
return; |
|||
} |
|||
|
|||
const u32 base_instance = parameters[4]; |
|||
if constexpr (extended) { |
|||
maxwell3d.regs.global_base_instance_index = base_instance; |
|||
maxwell3d.engine_state = Maxwell3D::EngineHint::OnHLEMacro; |
|||
maxwell3d.SetHLEReplacementAttributeType( |
|||
0, 0x640, Maxwell3D::HLEReplacementAttributeType::BaseInstance); |
|||
} |
|||
|
|||
maxwell3d.draw_manager->DrawArray(topology, vertex_first, vertex_count, base_instance, |
|||
instance_count); |
|||
|
|||
if constexpr (extended) { |
|||
maxwell3d.regs.global_base_instance_index = 0; |
|||
maxwell3d.engine_state = Maxwell3D::EngineHint::None; |
|||
maxwell3d.replace_table.clear(); |
|||
} |
|||
} |
|||
}; |
|||
|
|||
/*
|
|||
* @note: these macros have two versions, a normal and extended version, with the extended version |
|||
* also assigning the base vertex/instance. |
|||
*/ |
|||
template <bool extended> |
|||
class HLE_DrawIndexedIndirect final : public HLEMacroImpl { |
|||
public: |
|||
explicit HLE_DrawIndexedIndirect(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {} |
|||
|
|||
void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override { |
|||
auto topology = static_cast<Maxwell3D::Regs::PrimitiveTopology>(parameters[0]); |
|||
if (!maxwell3d.AnyParametersDirty() || !IsTopologySafe(topology)) { |
|||
Fallback(parameters); |
|||
return; |
|||
} |
|||
|
|||
const u32 estimate = static_cast<u32>(maxwell3d.EstimateIndexBufferSize()); |
|||
const u32 element_base = parameters[4]; |
|||
const u32 base_instance = parameters[5]; |
|||
maxwell3d.regs.vertex_id_base = element_base; |
|||
maxwell3d.regs.global_base_vertex_index = element_base; |
|||
maxwell3d.regs.global_base_instance_index = base_instance; |
|||
maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; |
|||
if constexpr (extended) { |
|||
maxwell3d.engine_state = Maxwell3D::EngineHint::OnHLEMacro; |
|||
maxwell3d.SetHLEReplacementAttributeType( |
|||
0, 0x640, Maxwell3D::HLEReplacementAttributeType::BaseVertex); |
|||
maxwell3d.SetHLEReplacementAttributeType( |
|||
0, 0x644, Maxwell3D::HLEReplacementAttributeType::BaseInstance); |
|||
} |
|||
auto& params = maxwell3d.draw_manager->GetIndirectParams(); |
|||
params.is_byte_count = false; |
|||
params.is_indexed = true; |
|||
params.include_count = false; |
|||
params.count_start_address = 0; |
|||
params.indirect_start_address = maxwell3d.GetMacroAddress(1); |
|||
params.buffer_size = 5 * sizeof(u32); |
|||
params.max_draw_counts = 1; |
|||
params.stride = 0; |
|||
maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; |
|||
maxwell3d.draw_manager->DrawIndexedIndirect(topology, 0, estimate); |
|||
maxwell3d.regs.vertex_id_base = 0x0; |
|||
maxwell3d.regs.global_base_vertex_index = 0x0; |
|||
maxwell3d.regs.global_base_instance_index = 0x0; |
|||
if constexpr (extended) { |
|||
maxwell3d.engine_state = Maxwell3D::EngineHint::None; |
|||
maxwell3d.replace_table.clear(); |
|||
} |
|||
} |
|||
|
|||
private: |
|||
void Fallback(const std::vector<u32>& parameters) { |
|||
maxwell3d.RefreshParameters(); |
|||
const u32 instance_count = (maxwell3d.GetRegisterValue(0xD1B) & parameters[2]); |
|||
const u32 element_base = parameters[4]; |
|||
const u32 base_instance = parameters[5]; |
|||
maxwell3d.regs.vertex_id_base = element_base; |
|||
maxwell3d.regs.global_base_vertex_index = element_base; |
|||
maxwell3d.regs.global_base_instance_index = base_instance; |
|||
maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; |
|||
if constexpr (extended) { |
|||
maxwell3d.engine_state = Maxwell3D::EngineHint::OnHLEMacro; |
|||
maxwell3d.SetHLEReplacementAttributeType( |
|||
0, 0x640, Maxwell3D::HLEReplacementAttributeType::BaseVertex); |
|||
maxwell3d.SetHLEReplacementAttributeType( |
|||
0, 0x644, Maxwell3D::HLEReplacementAttributeType::BaseInstance); |
|||
} |
|||
|
|||
maxwell3d.draw_manager->DrawIndex( |
|||
static_cast<Tegra::Maxwell3D::Regs::PrimitiveTopology>(parameters[0]), parameters[3], |
|||
parameters[1], element_base, base_instance, instance_count); |
|||
|
|||
maxwell3d.regs.vertex_id_base = 0x0; |
|||
maxwell3d.regs.global_base_vertex_index = 0x0; |
|||
maxwell3d.regs.global_base_instance_index = 0x0; |
|||
if constexpr (extended) { |
|||
maxwell3d.engine_state = Maxwell3D::EngineHint::None; |
|||
maxwell3d.replace_table.clear(); |
|||
} |
|||
} |
|||
}; |
|||
|
|||
class HLE_MultiLayerClear final : public HLEMacroImpl { |
|||
public: |
|||
explicit HLE_MultiLayerClear(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {} |
|||
|
|||
void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override { |
|||
maxwell3d.RefreshParameters(); |
|||
ASSERT(parameters.size() == 1); |
|||
|
|||
const Maxwell3D::Regs::ClearSurface clear_params{parameters[0]}; |
|||
const u32 rt_index = clear_params.RT; |
|||
const u32 num_layers = maxwell3d.regs.rt[rt_index].depth; |
|||
ASSERT(clear_params.layer == 0); |
|||
|
|||
maxwell3d.regs.clear_surface.raw = clear_params.raw; |
|||
maxwell3d.draw_manager->Clear(num_layers); |
|||
} |
|||
}; |
|||
|
|||
class HLE_MultiDrawIndexedIndirectCount final : public HLEMacroImpl { |
|||
public: |
|||
explicit HLE_MultiDrawIndexedIndirectCount(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {} |
|||
|
|||
void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override { |
|||
const auto topology = static_cast<Maxwell3D::Regs::PrimitiveTopology>(parameters[2]); |
|||
if (!IsTopologySafe(topology)) { |
|||
Fallback(parameters); |
|||
return; |
|||
} |
|||
|
|||
const u32 start_indirect = parameters[0]; |
|||
const u32 end_indirect = parameters[1]; |
|||
if (start_indirect >= end_indirect) { |
|||
// Nothing to do.
|
|||
return; |
|||
} |
|||
|
|||
const u32 padding = parameters[3]; // padding is in words
|
|||
|
|||
// size of each indirect segment
|
|||
const u32 indirect_words = 5 + padding; |
|||
const u32 stride = indirect_words * sizeof(u32); |
|||
const std::size_t draw_count = end_indirect - start_indirect; |
|||
const u32 estimate = static_cast<u32>(maxwell3d.EstimateIndexBufferSize()); |
|||
maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; |
|||
auto& params = maxwell3d.draw_manager->GetIndirectParams(); |
|||
params.is_byte_count = false; |
|||
params.is_indexed = true; |
|||
params.include_count = true; |
|||
params.count_start_address = maxwell3d.GetMacroAddress(4); |
|||
params.indirect_start_address = maxwell3d.GetMacroAddress(5); |
|||
params.buffer_size = stride * draw_count; |
|||
params.max_draw_counts = draw_count; |
|||
params.stride = stride; |
|||
maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; |
|||
maxwell3d.engine_state = Maxwell3D::EngineHint::OnHLEMacro; |
|||
maxwell3d.SetHLEReplacementAttributeType( |
|||
0, 0x640, Maxwell3D::HLEReplacementAttributeType::BaseVertex); |
|||
maxwell3d.SetHLEReplacementAttributeType( |
|||
0, 0x644, Maxwell3D::HLEReplacementAttributeType::BaseInstance); |
|||
maxwell3d.SetHLEReplacementAttributeType(0, 0x648, |
|||
Maxwell3D::HLEReplacementAttributeType::DrawID); |
|||
maxwell3d.draw_manager->DrawIndexedIndirect(topology, 0, estimate); |
|||
maxwell3d.engine_state = Maxwell3D::EngineHint::None; |
|||
maxwell3d.replace_table.clear(); |
|||
} |
|||
|
|||
private: |
|||
void Fallback(const std::vector<u32>& parameters) { |
|||
SCOPE_EXIT { |
|||
// Clean everything.
|
|||
maxwell3d.regs.vertex_id_base = 0x0; |
|||
maxwell3d.engine_state = Maxwell3D::EngineHint::None; |
|||
maxwell3d.replace_table.clear(); |
|||
}; |
|||
maxwell3d.RefreshParameters(); |
|||
const u32 start_indirect = parameters[0]; |
|||
const u32 end_indirect = parameters[1]; |
|||
if (start_indirect >= end_indirect) { |
|||
// Nothing to do.
|
|||
return; |
|||
} |
|||
const auto topology = static_cast<Maxwell3D::Regs::PrimitiveTopology>(parameters[2]); |
|||
const u32 padding = parameters[3]; |
|||
const std::size_t max_draws = parameters[4]; |
|||
|
|||
const u32 indirect_words = 5 + padding; |
|||
const std::size_t first_draw = start_indirect; |
|||
const std::size_t effective_draws = end_indirect - start_indirect; |
|||
const std::size_t last_draw = start_indirect + (std::min)(effective_draws, max_draws); |
|||
|
|||
for (std::size_t index = first_draw; index < last_draw; index++) { |
|||
const std::size_t base = index * indirect_words + 5; |
|||
const u32 base_vertex = parameters[base + 3]; |
|||
const u32 base_instance = parameters[base + 4]; |
|||
maxwell3d.regs.vertex_id_base = base_vertex; |
|||
maxwell3d.engine_state = Maxwell3D::EngineHint::OnHLEMacro; |
|||
maxwell3d.SetHLEReplacementAttributeType( |
|||
0, 0x640, Maxwell3D::HLEReplacementAttributeType::BaseVertex); |
|||
maxwell3d.SetHLEReplacementAttributeType( |
|||
0, 0x644, Maxwell3D::HLEReplacementAttributeType::BaseInstance); |
|||
maxwell3d.CallMethod(0x8e3, 0x648, true); |
|||
maxwell3d.CallMethod(0x8e4, static_cast<u32>(index), true); |
|||
maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; |
|||
maxwell3d.draw_manager->DrawIndex(topology, parameters[base + 2], parameters[base], |
|||
base_vertex, base_instance, parameters[base + 1]); |
|||
} |
|||
} |
|||
}; |
|||
|
|||
class HLE_DrawIndirectByteCount final : public HLEMacroImpl { |
|||
public: |
|||
explicit HLE_DrawIndirectByteCount(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {} |
|||
|
|||
void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override { |
|||
const bool force = maxwell3d.Rasterizer().HasDrawTransformFeedback(); |
|||
|
|||
auto topology = static_cast<Maxwell3D::Regs::PrimitiveTopology>(parameters[0] & 0xFFFFU); |
|||
if (!force && (!maxwell3d.AnyParametersDirty() || !IsTopologySafe(topology))) { |
|||
Fallback(parameters); |
|||
return; |
|||
} |
|||
auto& params = maxwell3d.draw_manager->GetIndirectParams(); |
|||
params.is_byte_count = true; |
|||
params.is_indexed = false; |
|||
params.include_count = false; |
|||
params.count_start_address = 0; |
|||
params.indirect_start_address = maxwell3d.GetMacroAddress(2); |
|||
params.buffer_size = 4; |
|||
params.max_draw_counts = 1; |
|||
params.stride = parameters[1]; |
|||
maxwell3d.regs.draw.begin = parameters[0]; |
|||
maxwell3d.regs.draw_auto_stride = parameters[1]; |
|||
maxwell3d.regs.draw_auto_byte_count = parameters[2]; |
|||
|
|||
maxwell3d.draw_manager->DrawArrayIndirect(topology); |
|||
} |
|||
|
|||
private: |
|||
void Fallback(const std::vector<u32>& parameters) { |
|||
maxwell3d.RefreshParameters(); |
|||
|
|||
maxwell3d.regs.draw.begin = parameters[0]; |
|||
maxwell3d.regs.draw_auto_stride = parameters[1]; |
|||
maxwell3d.regs.draw_auto_byte_count = parameters[2]; |
|||
|
|||
maxwell3d.draw_manager->DrawArray( |
|||
maxwell3d.regs.draw.topology, 0, |
|||
maxwell3d.regs.draw_auto_byte_count / maxwell3d.regs.draw_auto_stride, 0, 1); |
|||
} |
|||
}; |
|||
|
|||
class HLE_C713C83D8F63CCF3 final : public HLEMacroImpl { |
|||
public: |
|||
explicit HLE_C713C83D8F63CCF3(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {} |
|||
|
|||
void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override { |
|||
maxwell3d.RefreshParameters(); |
|||
const u32 offset = (parameters[0] & 0x3FFFFFFF) << 2; |
|||
const u32 address = maxwell3d.regs.shadow_scratch[24]; |
|||
auto& const_buffer = maxwell3d.regs.const_buffer; |
|||
const_buffer.size = 0x7000; |
|||
const_buffer.address_high = (address >> 24) & 0xFF; |
|||
const_buffer.address_low = address << 8; |
|||
const_buffer.offset = offset; |
|||
} |
|||
}; |
|||
|
|||
class HLE_D7333D26E0A93EDE final : public HLEMacroImpl { |
|||
public: |
|||
explicit HLE_D7333D26E0A93EDE(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {} |
|||
|
|||
void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override { |
|||
maxwell3d.RefreshParameters(); |
|||
const size_t index = parameters[0]; |
|||
const u32 address = maxwell3d.regs.shadow_scratch[42 + index]; |
|||
const u32 size = maxwell3d.regs.shadow_scratch[47 + index]; |
|||
auto& const_buffer = maxwell3d.regs.const_buffer; |
|||
const_buffer.size = size; |
|||
const_buffer.address_high = (address >> 24) & 0xFF; |
|||
const_buffer.address_low = address << 8; |
|||
} |
|||
}; |
|||
|
|||
class HLE_BindShader final : public HLEMacroImpl { |
|||
public: |
|||
explicit HLE_BindShader(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {} |
|||
|
|||
void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override { |
|||
maxwell3d.RefreshParameters(); |
|||
auto& regs = maxwell3d.regs; |
|||
const u32 index = parameters[0]; |
|||
if ((parameters[1] - regs.shadow_scratch[28 + index]) == 0) { |
|||
return; |
|||
} |
|||
|
|||
regs.pipelines[index & 0xF].offset = parameters[2]; |
|||
maxwell3d.dirty.flags[VideoCommon::Dirty::Shaders] = true; |
|||
regs.shadow_scratch[28 + index] = parameters[1]; |
|||
regs.shadow_scratch[34 + index] = parameters[2]; |
|||
|
|||
const u32 address = parameters[4]; |
|||
auto& const_buffer = regs.const_buffer; |
|||
const_buffer.size = 0x10000; |
|||
const_buffer.address_high = (address >> 24) & 0xFF; |
|||
const_buffer.address_low = address << 8; |
|||
|
|||
const size_t bind_group_id = parameters[3] & 0x7F; |
|||
auto& bind_group = regs.bind_groups[bind_group_id]; |
|||
bind_group.raw_config = 0x11; |
|||
maxwell3d.ProcessCBBind(bind_group_id); |
|||
} |
|||
}; |
|||
|
|||
class HLE_SetRasterBoundingBox final : public HLEMacroImpl { |
|||
public: |
|||
explicit HLE_SetRasterBoundingBox(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {} |
|||
|
|||
void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override { |
|||
maxwell3d.RefreshParameters(); |
|||
const u32 raster_mode = parameters[0]; |
|||
auto& regs = maxwell3d.regs; |
|||
const u32 raster_enabled = maxwell3d.regs.conservative_raster_enable; |
|||
const u32 scratch_data = maxwell3d.regs.shadow_scratch[52]; |
|||
regs.raster_bounding_box.raw = raster_mode & 0xFFFFF00F; |
|||
regs.raster_bounding_box.pad.Assign(scratch_data & raster_enabled); |
|||
} |
|||
}; |
|||
|
|||
template <size_t base_size> |
|||
class HLE_ClearConstBuffer final : public HLEMacroImpl { |
|||
public: |
|||
explicit HLE_ClearConstBuffer(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {} |
|||
|
|||
void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override { |
|||
maxwell3d.RefreshParameters(); |
|||
static constexpr std::array<u32, base_size> zeroes{}; |
|||
auto& regs = maxwell3d.regs; |
|||
regs.const_buffer.size = static_cast<u32>(base_size); |
|||
regs.const_buffer.address_high = parameters[0]; |
|||
regs.const_buffer.address_low = parameters[1]; |
|||
regs.const_buffer.offset = 0; |
|||
maxwell3d.ProcessCBMultiData(zeroes.data(), parameters[2] * 4); |
|||
} |
|||
}; |
|||
|
|||
class HLE_ClearMemory final : public HLEMacroImpl { |
|||
public: |
|||
explicit HLE_ClearMemory(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {} |
|||
|
|||
void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override { |
|||
maxwell3d.RefreshParameters(); |
|||
|
|||
const u32 needed_memory = parameters[2] / sizeof(u32); |
|||
if (needed_memory > zero_memory.size()) { |
|||
zero_memory.resize(needed_memory, 0); |
|||
} |
|||
auto& regs = maxwell3d.regs; |
|||
regs.upload.line_length_in = parameters[2]; |
|||
regs.upload.line_count = 1; |
|||
regs.upload.dest.address_high = parameters[0]; |
|||
regs.upload.dest.address_low = parameters[1]; |
|||
maxwell3d.CallMethod(static_cast<size_t>(MAXWELL3D_REG_INDEX(launch_dma)), 0x1011, true); |
|||
maxwell3d.CallMultiMethod(static_cast<size_t>(MAXWELL3D_REG_INDEX(inline_data)), |
|||
zero_memory.data(), needed_memory, needed_memory); |
|||
} |
|||
|
|||
private: |
|||
std::vector<u32> zero_memory; |
|||
}; |
|||
|
|||
class HLE_TransformFeedbackSetup final : public HLEMacroImpl { |
|||
public: |
|||
explicit HLE_TransformFeedbackSetup(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {} |
|||
|
|||
void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override { |
|||
maxwell3d.RefreshParameters(); |
|||
|
|||
auto& regs = maxwell3d.regs; |
|||
regs.transform_feedback_enabled = 1; |
|||
regs.transform_feedback.buffers[0].start_offset = 0; |
|||
regs.transform_feedback.buffers[1].start_offset = 0; |
|||
regs.transform_feedback.buffers[2].start_offset = 0; |
|||
regs.transform_feedback.buffers[3].start_offset = 0; |
|||
|
|||
regs.upload.line_length_in = 4; |
|||
regs.upload.line_count = 1; |
|||
regs.upload.dest.address_high = parameters[0]; |
|||
regs.upload.dest.address_low = parameters[1]; |
|||
maxwell3d.CallMethod(static_cast<size_t>(MAXWELL3D_REG_INDEX(launch_dma)), 0x1011, true); |
|||
maxwell3d.CallMethod(static_cast<size_t>(MAXWELL3D_REG_INDEX(inline_data)), |
|||
regs.transform_feedback.controls[0].stride, true); |
|||
|
|||
maxwell3d.Rasterizer().RegisterTransformFeedback(regs.upload.dest.Address()); |
|||
} |
|||
}; |
|||
|
|||
} // Anonymous namespace
|
|||
|
|||
HLEMacro::HLEMacro(Maxwell3D& maxwell3d_) : maxwell3d{maxwell3d_} { |
|||
builders.emplace(0x0D61FC9FAAC9FCADULL, |
|||
std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>( |
|||
[](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> { |
|||
return std::make_unique<HLE_DrawArraysIndirect<false>>(maxwell3d__); |
|||
})); |
|||
builders.emplace(0x8A4D173EB99A8603ULL, |
|||
std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>( |
|||
[](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> { |
|||
return std::make_unique<HLE_DrawArraysIndirect<true>>(maxwell3d__); |
|||
})); |
|||
builders.emplace(0x771BB18C62444DA0ULL, |
|||
std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>( |
|||
[](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> { |
|||
return std::make_unique<HLE_DrawIndexedIndirect<false>>(maxwell3d__); |
|||
})); |
|||
builders.emplace(0x0217920100488FF7ULL, |
|||
std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>( |
|||
[](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> { |
|||
return std::make_unique<HLE_DrawIndexedIndirect<true>>(maxwell3d__); |
|||
})); |
|||
builders.emplace(0x3F5E74B9C9A50164ULL, |
|||
std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>( |
|||
[](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> { |
|||
return std::make_unique<HLE_MultiDrawIndexedIndirectCount>( |
|||
maxwell3d__); |
|||
})); |
|||
builders.emplace(0xEAD26C3E2109B06BULL, |
|||
std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>( |
|||
[](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> { |
|||
return std::make_unique<HLE_MultiLayerClear>(maxwell3d__); |
|||
})); |
|||
builders.emplace(0xC713C83D8F63CCF3ULL, |
|||
std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>( |
|||
[](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> { |
|||
return std::make_unique<HLE_C713C83D8F63CCF3>(maxwell3d__); |
|||
})); |
|||
builders.emplace(0xD7333D26E0A93EDEULL, |
|||
std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>( |
|||
[](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> { |
|||
return std::make_unique<HLE_D7333D26E0A93EDE>(maxwell3d__); |
|||
})); |
|||
builders.emplace(0xEB29B2A09AA06D38ULL, |
|||
std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>( |
|||
[](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> { |
|||
return std::make_unique<HLE_BindShader>(maxwell3d__); |
|||
})); |
|||
builders.emplace(0xDB1341DBEB4C8AF7ULL, |
|||
std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>( |
|||
[](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> { |
|||
return std::make_unique<HLE_SetRasterBoundingBox>(maxwell3d__); |
|||
})); |
|||
builders.emplace(0x6C97861D891EDf7EULL, |
|||
std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>( |
|||
[](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> { |
|||
return std::make_unique<HLE_ClearConstBuffer<0x5F00>>(maxwell3d__); |
|||
})); |
|||
builders.emplace(0xD246FDDF3A6173D7ULL, |
|||
std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>( |
|||
[](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> { |
|||
return std::make_unique<HLE_ClearConstBuffer<0x7000>>(maxwell3d__); |
|||
})); |
|||
builders.emplace(0xEE4D0004BEC8ECF4ULL, |
|||
std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>( |
|||
[](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> { |
|||
return std::make_unique<HLE_ClearMemory>(maxwell3d__); |
|||
})); |
|||
builders.emplace(0xFC0CF27F5FFAA661ULL, |
|||
std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>( |
|||
[](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> { |
|||
return std::make_unique<HLE_TransformFeedbackSetup>(maxwell3d__); |
|||
})); |
|||
builders.emplace(0xB5F74EDB717278ECULL, |
|||
std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>( |
|||
[](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> { |
|||
return std::make_unique<HLE_DrawIndirectByteCount>(maxwell3d__); |
|||
})); |
|||
} |
|||
|
|||
HLEMacro::~HLEMacro() = default; |
|||
|
|||
std::unique_ptr<CachedMacro> HLEMacro::GetHLEProgram(u64 hash) const { |
|||
const auto it = builders.find(hash); |
|||
if (it == builders.end()) { |
|||
return nullptr; |
|||
} |
|||
return it->second(maxwell3d); |
|||
} |
|||
|
|||
} // namespace Tegra
|
|||
@ -1,33 +0,0 @@ |
|||
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project |
|||
// SPDX-License-Identifier: GPL-2.0-or-later |
|||
|
|||
#pragma once |
|||
|
|||
#include <functional> |
|||
#include <memory> |
|||
#include <unordered_map> |
|||
|
|||
#include "common/common_types.h" |
|||
|
|||
namespace Tegra { |
|||
|
|||
namespace Engines { |
|||
class Maxwell3D; |
|||
} |
|||
|
|||
class HLEMacro { |
|||
public: |
|||
explicit HLEMacro(Engines::Maxwell3D& maxwell3d_); |
|||
~HLEMacro(); |
|||
|
|||
// Allocates and returns a cached macro if the hash matches a known function. |
|||
// Returns nullptr otherwise. |
|||
[[nodiscard]] std::unique_ptr<CachedMacro> GetHLEProgram(u64 hash) const; |
|||
|
|||
private: |
|||
Engines::Maxwell3D& maxwell3d; |
|||
std::unordered_map<u64, std::function<std::unique_ptr<CachedMacro>(Engines::Maxwell3D&)>> |
|||
builders; |
|||
}; |
|||
|
|||
} // namespace Tegra |
|||
@ -1,362 +0,0 @@ |
|||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
|||
// SPDX-License-Identifier: GPL-3.0-or-later
|
|||
|
|||
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
|
|||
// SPDX-License-Identifier: GPL-2.0-or-later
|
|||
|
|||
#include <array>
|
|||
#include <optional>
|
|||
|
|||
#include "common/assert.h"
|
|||
#include "common/logging/log.h"
|
|||
#include "video_core/engines/maxwell_3d.h"
|
|||
#include "video_core/macro/macro_interpreter.h"
|
|||
|
|||
namespace Tegra { |
|||
namespace { |
|||
class MacroInterpreterImpl final : public CachedMacro { |
|||
public: |
|||
explicit MacroInterpreterImpl(Engines::Maxwell3D& maxwell3d_, const std::vector<u32>& code_) |
|||
: maxwell3d{maxwell3d_}, code{code_} {} |
|||
|
|||
void Execute(const std::vector<u32>& params, u32 method) override; |
|||
|
|||
private: |
|||
/// Resets the execution engine state, zeroing registers, etc.
|
|||
void Reset(); |
|||
|
|||
/**
|
|||
* Executes a single macro instruction located at the current program counter. Returns whether |
|||
* the interpreter should keep running. |
|||
* |
|||
* @param is_delay_slot Whether the current step is being executed due to a delay slot in a |
|||
* previous instruction. |
|||
*/ |
|||
bool Step(bool is_delay_slot); |
|||
|
|||
/// Calculates the result of an ALU operation. src_a OP src_b;
|
|||
u32 GetALUResult(Macro::ALUOperation operation, u32 src_a, u32 src_b); |
|||
|
|||
/// Performs the result operation on the input result and stores it in the specified register
|
|||
/// (if necessary).
|
|||
void ProcessResult(Macro::ResultOperation operation, u32 reg, u32 result); |
|||
|
|||
/// Evaluates the branch condition and returns whether the branch should be taken or not.
|
|||
bool EvaluateBranchCondition(Macro::BranchCondition cond, u32 value) const; |
|||
|
|||
/// Reads an opcode at the current program counter location.
|
|||
Macro::Opcode GetOpcode() const; |
|||
|
|||
/// Returns the specified register's value. Register 0 is hardcoded to always return 0.
|
|||
u32 GetRegister(u32 register_id) const; |
|||
|
|||
/// Sets the register to the input value.
|
|||
void SetRegister(u32 register_id, u32 value); |
|||
|
|||
/// Sets the method address to use for the next Send instruction.
|
|||
void SetMethodAddress(u32 address); |
|||
|
|||
/// Calls a GPU Engine method with the input parameter.
|
|||
void Send(u32 value); |
|||
|
|||
/// Reads a GPU register located at the method address.
|
|||
u32 Read(u32 method) const; |
|||
|
|||
/// Returns the next parameter in the parameter queue.
|
|||
u32 FetchParameter(); |
|||
|
|||
Engines::Maxwell3D& maxwell3d; |
|||
|
|||
/// Current program counter
|
|||
u32 pc{}; |
|||
/// Program counter to execute at after the delay slot is executed.
|
|||
std::optional<u32> delayed_pc; |
|||
|
|||
/// General purpose macro registers.
|
|||
std::array<u32, Macro::NUM_MACRO_REGISTERS> registers = {}; |
|||
|
|||
/// Method address to use for the next Send instruction.
|
|||
Macro::MethodAddress method_address = {}; |
|||
|
|||
/// Input parameters of the current macro.
|
|||
std::unique_ptr<u32[]> parameters; |
|||
std::size_t num_parameters = 0; |
|||
std::size_t parameters_capacity = 0; |
|||
/// Index of the next parameter that will be fetched by the 'parm' instruction.
|
|||
u32 next_parameter_index = 0; |
|||
|
|||
bool carry_flag = false; |
|||
const std::vector<u32>& code; |
|||
}; |
|||
|
|||
void MacroInterpreterImpl::Execute(const std::vector<u32>& params, u32 method) { |
|||
Reset(); |
|||
|
|||
registers[1] = params[0]; |
|||
num_parameters = params.size(); |
|||
|
|||
if (num_parameters > parameters_capacity) { |
|||
parameters_capacity = num_parameters; |
|||
parameters = std::make_unique<u32[]>(num_parameters); |
|||
} |
|||
std::memcpy(parameters.get(), params.data(), num_parameters * sizeof(u32)); |
|||
|
|||
// Execute the code until we hit an exit condition.
|
|||
bool keep_executing = true; |
|||
while (keep_executing) { |
|||
keep_executing = Step(false); |
|||
} |
|||
|
|||
// Assert the the macro used all the input parameters
|
|||
ASSERT(next_parameter_index == num_parameters); |
|||
} |
|||
|
|||
void MacroInterpreterImpl::Reset() { |
|||
registers = {}; |
|||
pc = 0; |
|||
delayed_pc = {}; |
|||
method_address.raw = 0; |
|||
num_parameters = 0; |
|||
// The next parameter index starts at 1, because $r1 already has the value of the first
|
|||
// parameter.
|
|||
next_parameter_index = 1; |
|||
carry_flag = false; |
|||
} |
|||
|
|||
bool MacroInterpreterImpl::Step(bool is_delay_slot) { |
|||
u32 base_address = pc; |
|||
|
|||
Macro::Opcode opcode = GetOpcode(); |
|||
pc += 4; |
|||
|
|||
// Update the program counter if we were delayed
|
|||
if (delayed_pc) { |
|||
ASSERT(is_delay_slot); |
|||
pc = *delayed_pc; |
|||
delayed_pc = {}; |
|||
} |
|||
|
|||
switch (opcode.operation) { |
|||
case Macro::Operation::ALU: { |
|||
u32 result = GetALUResult(opcode.alu_operation, GetRegister(opcode.src_a), |
|||
GetRegister(opcode.src_b)); |
|||
ProcessResult(opcode.result_operation, opcode.dst, result); |
|||
break; |
|||
} |
|||
case Macro::Operation::AddImmediate: { |
|||
ProcessResult(opcode.result_operation, opcode.dst, |
|||
GetRegister(opcode.src_a) + opcode.immediate); |
|||
break; |
|||
} |
|||
case Macro::Operation::ExtractInsert: { |
|||
u32 dst = GetRegister(opcode.src_a); |
|||
u32 src = GetRegister(opcode.src_b); |
|||
|
|||
src = (src >> opcode.bf_src_bit) & opcode.GetBitfieldMask(); |
|||
dst &= ~(opcode.GetBitfieldMask() << opcode.bf_dst_bit); |
|||
dst |= src << opcode.bf_dst_bit; |
|||
ProcessResult(opcode.result_operation, opcode.dst, dst); |
|||
break; |
|||
} |
|||
case Macro::Operation::ExtractShiftLeftImmediate: { |
|||
u32 dst = GetRegister(opcode.src_a); |
|||
u32 src = GetRegister(opcode.src_b); |
|||
|
|||
u32 result = ((src >> dst) & opcode.GetBitfieldMask()) << opcode.bf_dst_bit; |
|||
|
|||
ProcessResult(opcode.result_operation, opcode.dst, result); |
|||
break; |
|||
} |
|||
case Macro::Operation::ExtractShiftLeftRegister: { |
|||
u32 dst = GetRegister(opcode.src_a); |
|||
u32 src = GetRegister(opcode.src_b); |
|||
|
|||
u32 result = ((src >> opcode.bf_src_bit) & opcode.GetBitfieldMask()) << dst; |
|||
|
|||
ProcessResult(opcode.result_operation, opcode.dst, result); |
|||
break; |
|||
} |
|||
case Macro::Operation::Read: { |
|||
u32 result = Read(GetRegister(opcode.src_a) + opcode.immediate); |
|||
ProcessResult(opcode.result_operation, opcode.dst, result); |
|||
break; |
|||
} |
|||
case Macro::Operation::Branch: { |
|||
ASSERT_MSG(!is_delay_slot, "Executing a branch in a delay slot is not valid"); |
|||
u32 value = GetRegister(opcode.src_a); |
|||
bool taken = EvaluateBranchCondition(opcode.branch_condition, value); |
|||
if (taken) { |
|||
// Ignore the delay slot if the branch has the annul bit.
|
|||
if (opcode.branch_annul) { |
|||
pc = base_address + opcode.GetBranchTarget(); |
|||
return true; |
|||
} |
|||
|
|||
delayed_pc = base_address + opcode.GetBranchTarget(); |
|||
// Execute one more instruction due to the delay slot.
|
|||
return Step(true); |
|||
} |
|||
break; |
|||
} |
|||
default: |
|||
UNIMPLEMENTED_MSG("Unimplemented macro operation {}", opcode.operation.Value()); |
|||
break; |
|||
} |
|||
|
|||
// An instruction with the Exit flag will not actually
|
|||
// cause an exit if it's executed inside a delay slot.
|
|||
if (opcode.is_exit && !is_delay_slot) { |
|||
// Exit has a delay slot, execute the next instruction
|
|||
Step(true); |
|||
return false; |
|||
} |
|||
|
|||
return true; |
|||
} |
|||
|
|||
u32 MacroInterpreterImpl::GetALUResult(Macro::ALUOperation operation, u32 src_a, u32 src_b) { |
|||
switch (operation) { |
|||
case Macro::ALUOperation::Add: { |
|||
const u64 result{static_cast<u64>(src_a) + src_b}; |
|||
carry_flag = result > 0xffffffff; |
|||
return static_cast<u32>(result); |
|||
} |
|||
case Macro::ALUOperation::AddWithCarry: { |
|||
const u64 result{static_cast<u64>(src_a) + src_b + (carry_flag ? 1ULL : 0ULL)}; |
|||
carry_flag = result > 0xffffffff; |
|||
return static_cast<u32>(result); |
|||
} |
|||
case Macro::ALUOperation::Subtract: { |
|||
const u64 result{static_cast<u64>(src_a) - src_b}; |
|||
carry_flag = result < 0x100000000; |
|||
return static_cast<u32>(result); |
|||
} |
|||
case Macro::ALUOperation::SubtractWithBorrow: { |
|||
const u64 result{static_cast<u64>(src_a) - src_b - (carry_flag ? 0ULL : 1ULL)}; |
|||
carry_flag = result < 0x100000000; |
|||
return static_cast<u32>(result); |
|||
} |
|||
case Macro::ALUOperation::Xor: |
|||
return src_a ^ src_b; |
|||
case Macro::ALUOperation::Or: |
|||
return src_a | src_b; |
|||
case Macro::ALUOperation::And: |
|||
return src_a & src_b; |
|||
case Macro::ALUOperation::AndNot: |
|||
return src_a & ~src_b; |
|||
case Macro::ALUOperation::Nand: |
|||
return ~(src_a & src_b); |
|||
|
|||
default: |
|||
UNIMPLEMENTED_MSG("Unimplemented ALU operation {}", operation); |
|||
return 0; |
|||
} |
|||
} |
|||
|
|||
void MacroInterpreterImpl::ProcessResult(Macro::ResultOperation operation, u32 reg, u32 result) { |
|||
switch (operation) { |
|||
case Macro::ResultOperation::IgnoreAndFetch: |
|||
// Fetch parameter and ignore result.
|
|||
SetRegister(reg, FetchParameter()); |
|||
break; |
|||
case Macro::ResultOperation::Move: |
|||
// Move result.
|
|||
SetRegister(reg, result); |
|||
break; |
|||
case Macro::ResultOperation::MoveAndSetMethod: |
|||
// Move result and use as Method Address.
|
|||
SetRegister(reg, result); |
|||
SetMethodAddress(result); |
|||
break; |
|||
case Macro::ResultOperation::FetchAndSend: |
|||
// Fetch parameter and send result.
|
|||
SetRegister(reg, FetchParameter()); |
|||
Send(result); |
|||
break; |
|||
case Macro::ResultOperation::MoveAndSend: |
|||
// Move and send result.
|
|||
SetRegister(reg, result); |
|||
Send(result); |
|||
break; |
|||
case Macro::ResultOperation::FetchAndSetMethod: |
|||
// Fetch parameter and use result as Method Address.
|
|||
SetRegister(reg, FetchParameter()); |
|||
SetMethodAddress(result); |
|||
break; |
|||
case Macro::ResultOperation::MoveAndSetMethodFetchAndSend: |
|||
// Move result and use as Method Address, then fetch and send parameter.
|
|||
SetRegister(reg, result); |
|||
SetMethodAddress(result); |
|||
Send(FetchParameter()); |
|||
break; |
|||
case Macro::ResultOperation::MoveAndSetMethodSend: |
|||
// Move result and use as Method Address, then send bits 12:17 of result.
|
|||
SetRegister(reg, result); |
|||
SetMethodAddress(result); |
|||
Send((result >> 12) & 0b111111); |
|||
break; |
|||
default: |
|||
UNIMPLEMENTED_MSG("Unimplemented result operation {}", operation); |
|||
break; |
|||
} |
|||
} |
|||
|
|||
bool MacroInterpreterImpl::EvaluateBranchCondition(Macro::BranchCondition cond, u32 value) const { |
|||
switch (cond) { |
|||
case Macro::BranchCondition::Zero: |
|||
return value == 0; |
|||
case Macro::BranchCondition::NotZero: |
|||
return value != 0; |
|||
} |
|||
UNREACHABLE(); |
|||
} |
|||
|
|||
Macro::Opcode MacroInterpreterImpl::GetOpcode() const { |
|||
ASSERT((pc % sizeof(u32)) == 0); |
|||
ASSERT(pc < code.size() * sizeof(u32)); |
|||
return {code[pc / sizeof(u32)]}; |
|||
} |
|||
|
|||
u32 MacroInterpreterImpl::GetRegister(u32 register_id) const { |
|||
return registers.at(register_id); |
|||
} |
|||
|
|||
void MacroInterpreterImpl::SetRegister(u32 register_id, u32 value) { |
|||
// Register 0 is hardwired as the zero register.
|
|||
// Ensure no writes to it actually occur.
|
|||
if (register_id == 0) { |
|||
return; |
|||
} |
|||
|
|||
registers.at(register_id) = value; |
|||
} |
|||
|
|||
void MacroInterpreterImpl::SetMethodAddress(u32 address) { |
|||
method_address.raw = address; |
|||
} |
|||
|
|||
void MacroInterpreterImpl::Send(u32 value) { |
|||
maxwell3d.CallMethod(method_address.address, value, true); |
|||
// Increment the method address by the method increment.
|
|||
method_address.address.Assign(method_address.address.Value() + |
|||
method_address.increment.Value()); |
|||
} |
|||
|
|||
u32 MacroInterpreterImpl::Read(u32 method) const { |
|||
return maxwell3d.GetRegisterValue(method); |
|||
} |
|||
|
|||
u32 MacroInterpreterImpl::FetchParameter() { |
|||
ASSERT(next_parameter_index < num_parameters); |
|||
return parameters[next_parameter_index++]; |
|||
} |
|||
} // Anonymous namespace
|
|||
|
|||
MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d_) |
|||
: MacroEngine{maxwell3d_}, maxwell3d{maxwell3d_} {} |
|||
|
|||
std::unique_ptr<CachedMacro> MacroInterpreter::Compile(const std::vector<u32>& code) { |
|||
return std::make_unique<MacroInterpreterImpl>(maxwell3d, code); |
|||
} |
|||
|
|||
} // namespace Tegra
|
|||
@ -1,27 +0,0 @@ |
|||
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project |
|||
// SPDX-License-Identifier: GPL-2.0-or-later |
|||
|
|||
#pragma once |
|||
|
|||
#include <vector> |
|||
|
|||
#include "common/common_types.h" |
|||
#include "video_core/macro/macro.h" |
|||
|
|||
namespace Tegra { |
|||
namespace Engines { |
|||
class Maxwell3D; |
|||
} |
|||
|
|||
class MacroInterpreter final : public MacroEngine { |
|||
public: |
|||
explicit MacroInterpreter(Engines::Maxwell3D& maxwell3d_); |
|||
|
|||
protected: |
|||
std::unique_ptr<CachedMacro> Compile(const std::vector<u32>& code) override; |
|||
|
|||
private: |
|||
Engines::Maxwell3D& maxwell3d; |
|||
}; |
|||
|
|||
} // namespace Tegra |
|||
@ -1,678 +0,0 @@ |
|||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
|||
// SPDX-License-Identifier: GPL-3.0-or-later
|
|||
|
|||
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
|
|||
// SPDX-License-Identifier: GPL-2.0-or-later
|
|||
|
|||
#include <array>
|
|||
#include <bitset>
|
|||
#include <optional>
|
|||
|
|||
#include <xbyak/xbyak.h>
|
|||
|
|||
#include "common/assert.h"
|
|||
#include "common/bit_field.h"
|
|||
#include "common/logging/log.h"
|
|||
#include "common/x64/xbyak_abi.h"
|
|||
#include "common/x64/xbyak_util.h"
|
|||
#include "video_core/engines/maxwell_3d.h"
|
|||
#include "video_core/macro/macro_interpreter.h"
|
|||
#include "video_core/macro/macro_jit_x64.h"
|
|||
|
|||
namespace Tegra { |
|||
namespace { |
|||
constexpr Xbyak::Reg64 STATE = Xbyak::util::rbx; |
|||
constexpr Xbyak::Reg32 RESULT = Xbyak::util::r10d; |
|||
constexpr Xbyak::Reg64 MAX_PARAMETER = Xbyak::util::r11; |
|||
constexpr Xbyak::Reg64 PARAMETERS = Xbyak::util::r12; |
|||
constexpr Xbyak::Reg32 METHOD_ADDRESS = Xbyak::util::r14d; |
|||
constexpr Xbyak::Reg64 BRANCH_HOLDER = Xbyak::util::r15; |
|||
|
|||
constexpr std::bitset<32> PERSISTENT_REGISTERS = Common::X64::BuildRegSet({ |
|||
STATE, |
|||
RESULT, |
|||
MAX_PARAMETER, |
|||
PARAMETERS, |
|||
METHOD_ADDRESS, |
|||
BRANCH_HOLDER, |
|||
}); |
|||
|
|||
// Arbitrarily chosen based on current booting games.
|
|||
constexpr size_t MAX_CODE_SIZE = 0x10000; |
|||
|
|||
std::bitset<32> PersistentCallerSavedRegs() { |
|||
return PERSISTENT_REGISTERS & Common::X64::ABI_ALL_CALLER_SAVED; |
|||
} |
|||
|
|||
/// @brief Must enforce W^X constraints, as we yet don't havea global "NO_EXECUTE" support flag
|
|||
/// the speed loss is minimal, and in fact may be negligible, however for your peace of mind
|
|||
/// I simply included known OSes whom had W^X issues
|
|||
#if defined(__OpenBSD__) || defined(__NetBSD__) || defined(__DragonFly__)
|
|||
static const auto default_cg_mode = Xbyak::DontSetProtectRWE; |
|||
#else
|
|||
static const auto default_cg_mode = nullptr; //Allow RWE
|
|||
#endif
|
|||
|
|||
class MacroJITx64Impl final : public Xbyak::CodeGenerator, public CachedMacro { |
|||
public: |
|||
explicit MacroJITx64Impl(Engines::Maxwell3D& maxwell3d_, const std::vector<u32>& code_) |
|||
: Xbyak::CodeGenerator(MAX_CODE_SIZE, default_cg_mode) |
|||
, code{code_}, maxwell3d{maxwell3d_} { |
|||
Compile(); |
|||
} |
|||
|
|||
void Execute(const std::vector<u32>& parameters, u32 method) override; |
|||
|
|||
void Compile_ALU(Macro::Opcode opcode); |
|||
void Compile_AddImmediate(Macro::Opcode opcode); |
|||
void Compile_ExtractInsert(Macro::Opcode opcode); |
|||
void Compile_ExtractShiftLeftImmediate(Macro::Opcode opcode); |
|||
void Compile_ExtractShiftLeftRegister(Macro::Opcode opcode); |
|||
void Compile_Read(Macro::Opcode opcode); |
|||
void Compile_Branch(Macro::Opcode opcode); |
|||
|
|||
private: |
|||
void Optimizer_ScanFlags(); |
|||
|
|||
void Compile(); |
|||
bool Compile_NextInstruction(); |
|||
|
|||
Xbyak::Reg32 Compile_FetchParameter(); |
|||
Xbyak::Reg32 Compile_GetRegister(u32 index, Xbyak::Reg32 dst); |
|||
|
|||
void Compile_ProcessResult(Macro::ResultOperation operation, u32 reg); |
|||
void Compile_Send(Xbyak::Reg32 value); |
|||
|
|||
Macro::Opcode GetOpCode() const; |
|||
|
|||
struct JITState { |
|||
Engines::Maxwell3D* maxwell3d{}; |
|||
std::array<u32, Macro::NUM_MACRO_REGISTERS> registers{}; |
|||
u32 carry_flag{}; |
|||
}; |
|||
static_assert(offsetof(JITState, maxwell3d) == 0, "Maxwell3D is not at 0x0"); |
|||
using ProgramType = void (*)(JITState*, const u32*, const u32*); |
|||
|
|||
struct OptimizerState { |
|||
bool can_skip_carry{}; |
|||
bool has_delayed_pc{}; |
|||
bool zero_reg_skip{}; |
|||
bool skip_dummy_addimmediate{}; |
|||
bool optimize_for_method_move{}; |
|||
bool enable_asserts{}; |
|||
}; |
|||
OptimizerState optimizer{}; |
|||
|
|||
std::optional<Macro::Opcode> next_opcode{}; |
|||
ProgramType program{nullptr}; |
|||
|
|||
std::array<Xbyak::Label, MAX_CODE_SIZE> labels; |
|||
std::array<Xbyak::Label, MAX_CODE_SIZE> delay_skip; |
|||
Xbyak::Label end_of_code{}; |
|||
|
|||
bool is_delay_slot{}; |
|||
u32 pc{}; |
|||
|
|||
const std::vector<u32>& code; |
|||
Engines::Maxwell3D& maxwell3d; |
|||
}; |
|||
|
|||
void MacroJITx64Impl::Execute(const std::vector<u32>& parameters, u32 method) { |
|||
ASSERT_OR_EXECUTE(program != nullptr, { return; }); |
|||
JITState state{}; |
|||
state.maxwell3d = &maxwell3d; |
|||
state.registers = {}; |
|||
program(&state, parameters.data(), parameters.data() + parameters.size()); |
|||
} |
|||
|
|||
void MacroJITx64Impl::Compile_ALU(Macro::Opcode opcode) { |
|||
const bool is_a_zero = opcode.src_a == 0; |
|||
const bool is_b_zero = opcode.src_b == 0; |
|||
const bool valid_operation = !is_a_zero && !is_b_zero; |
|||
[[maybe_unused]] const bool is_move_operation = !is_a_zero && is_b_zero; |
|||
const bool has_zero_register = is_a_zero || is_b_zero; |
|||
const bool no_zero_reg_skip = opcode.alu_operation == Macro::ALUOperation::AddWithCarry || |
|||
opcode.alu_operation == Macro::ALUOperation::SubtractWithBorrow; |
|||
|
|||
Xbyak::Reg32 src_a; |
|||
Xbyak::Reg32 src_b; |
|||
|
|||
if (!optimizer.zero_reg_skip || no_zero_reg_skip) { |
|||
src_a = Compile_GetRegister(opcode.src_a, RESULT); |
|||
src_b = Compile_GetRegister(opcode.src_b, eax); |
|||
} else { |
|||
if (!is_a_zero) { |
|||
src_a = Compile_GetRegister(opcode.src_a, RESULT); |
|||
} |
|||
if (!is_b_zero) { |
|||
src_b = Compile_GetRegister(opcode.src_b, eax); |
|||
} |
|||
} |
|||
|
|||
bool has_emitted = false; |
|||
|
|||
switch (opcode.alu_operation) { |
|||
case Macro::ALUOperation::Add: |
|||
if (optimizer.zero_reg_skip) { |
|||
if (valid_operation) { |
|||
add(src_a, src_b); |
|||
} |
|||
} else { |
|||
add(src_a, src_b); |
|||
} |
|||
|
|||
if (!optimizer.can_skip_carry) { |
|||
setc(byte[STATE + offsetof(JITState, carry_flag)]); |
|||
} |
|||
break; |
|||
case Macro::ALUOperation::AddWithCarry: |
|||
bt(dword[STATE + offsetof(JITState, carry_flag)], 0); |
|||
adc(src_a, src_b); |
|||
setc(byte[STATE + offsetof(JITState, carry_flag)]); |
|||
break; |
|||
case Macro::ALUOperation::Subtract: |
|||
if (optimizer.zero_reg_skip) { |
|||
if (valid_operation) { |
|||
sub(src_a, src_b); |
|||
has_emitted = true; |
|||
} |
|||
} else { |
|||
sub(src_a, src_b); |
|||
has_emitted = true; |
|||
} |
|||
if (!optimizer.can_skip_carry && has_emitted) { |
|||
setc(byte[STATE + offsetof(JITState, carry_flag)]); |
|||
} |
|||
break; |
|||
case Macro::ALUOperation::SubtractWithBorrow: |
|||
bt(dword[STATE + offsetof(JITState, carry_flag)], 0); |
|||
sbb(src_a, src_b); |
|||
setc(byte[STATE + offsetof(JITState, carry_flag)]); |
|||
break; |
|||
case Macro::ALUOperation::Xor: |
|||
if (optimizer.zero_reg_skip) { |
|||
if (valid_operation) { |
|||
xor_(src_a, src_b); |
|||
} |
|||
} else { |
|||
xor_(src_a, src_b); |
|||
} |
|||
break; |
|||
case Macro::ALUOperation::Or: |
|||
if (optimizer.zero_reg_skip) { |
|||
if (valid_operation) { |
|||
or_(src_a, src_b); |
|||
} |
|||
} else { |
|||
or_(src_a, src_b); |
|||
} |
|||
break; |
|||
case Macro::ALUOperation::And: |
|||
if (optimizer.zero_reg_skip) { |
|||
if (!has_zero_register) { |
|||
and_(src_a, src_b); |
|||
} |
|||
} else { |
|||
and_(src_a, src_b); |
|||
} |
|||
break; |
|||
case Macro::ALUOperation::AndNot: |
|||
if (optimizer.zero_reg_skip) { |
|||
if (!is_a_zero) { |
|||
not_(src_b); |
|||
and_(src_a, src_b); |
|||
} |
|||
} else { |
|||
not_(src_b); |
|||
and_(src_a, src_b); |
|||
} |
|||
break; |
|||
case Macro::ALUOperation::Nand: |
|||
if (optimizer.zero_reg_skip) { |
|||
if (!is_a_zero) { |
|||
and_(src_a, src_b); |
|||
not_(src_a); |
|||
} |
|||
} else { |
|||
and_(src_a, src_b); |
|||
not_(src_a); |
|||
} |
|||
break; |
|||
default: |
|||
UNIMPLEMENTED_MSG("Unimplemented ALU operation {}", opcode.alu_operation.Value()); |
|||
break; |
|||
} |
|||
Compile_ProcessResult(opcode.result_operation, opcode.dst); |
|||
} |
|||
|
|||
void MacroJITx64Impl::Compile_AddImmediate(Macro::Opcode opcode) { |
|||
if (optimizer.skip_dummy_addimmediate) { |
|||
// Games tend to use this as an exit instruction placeholder. It's to encode an instruction
|
|||
// without doing anything. In our case we can just not emit anything.
|
|||
if (opcode.result_operation == Macro::ResultOperation::Move && opcode.dst == 0) { |
|||
return; |
|||
} |
|||
} |
|||
// Check for redundant moves
|
|||
if (optimizer.optimize_for_method_move && |
|||
opcode.result_operation == Macro::ResultOperation::MoveAndSetMethod) { |
|||
if (next_opcode.has_value()) { |
|||
const auto next = *next_opcode; |
|||
if (next.result_operation == Macro::ResultOperation::MoveAndSetMethod && |
|||
opcode.dst == next.dst) { |
|||
return; |
|||
} |
|||
} |
|||
} |
|||
if (optimizer.zero_reg_skip && opcode.src_a == 0) { |
|||
if (opcode.immediate == 0) { |
|||
xor_(RESULT, RESULT); |
|||
} else { |
|||
mov(RESULT, opcode.immediate); |
|||
} |
|||
} else { |
|||
auto result = Compile_GetRegister(opcode.src_a, RESULT); |
|||
if (opcode.immediate > 2) { |
|||
add(result, opcode.immediate); |
|||
} else if (opcode.immediate == 1) { |
|||
inc(result); |
|||
} else if (opcode.immediate < 0) { |
|||
sub(result, opcode.immediate * -1); |
|||
} |
|||
} |
|||
Compile_ProcessResult(opcode.result_operation, opcode.dst); |
|||
} |
|||
|
|||
void MacroJITx64Impl::Compile_ExtractInsert(Macro::Opcode opcode) { |
|||
auto dst = Compile_GetRegister(opcode.src_a, RESULT); |
|||
auto src = Compile_GetRegister(opcode.src_b, eax); |
|||
|
|||
const u32 mask = ~(opcode.GetBitfieldMask() << opcode.bf_dst_bit); |
|||
and_(dst, mask); |
|||
shr(src, opcode.bf_src_bit); |
|||
and_(src, opcode.GetBitfieldMask()); |
|||
shl(src, opcode.bf_dst_bit); |
|||
or_(dst, src); |
|||
|
|||
Compile_ProcessResult(opcode.result_operation, opcode.dst); |
|||
} |
|||
|
|||
void MacroJITx64Impl::Compile_ExtractShiftLeftImmediate(Macro::Opcode opcode) { |
|||
const auto dst = Compile_GetRegister(opcode.src_a, ecx); |
|||
const auto src = Compile_GetRegister(opcode.src_b, RESULT); |
|||
|
|||
shr(src, dst.cvt8()); |
|||
and_(src, opcode.GetBitfieldMask()); |
|||
shl(src, opcode.bf_dst_bit); |
|||
|
|||
Compile_ProcessResult(opcode.result_operation, opcode.dst); |
|||
} |
|||
|
|||
void MacroJITx64Impl::Compile_ExtractShiftLeftRegister(Macro::Opcode opcode) { |
|||
const auto dst = Compile_GetRegister(opcode.src_a, ecx); |
|||
const auto src = Compile_GetRegister(opcode.src_b, RESULT); |
|||
|
|||
shr(src, opcode.bf_src_bit); |
|||
and_(src, opcode.GetBitfieldMask()); |
|||
shl(src, dst.cvt8()); |
|||
|
|||
Compile_ProcessResult(opcode.result_operation, opcode.dst); |
|||
} |
|||
|
|||
void MacroJITx64Impl::Compile_Read(Macro::Opcode opcode) { |
|||
if (optimizer.zero_reg_skip && opcode.src_a == 0) { |
|||
if (opcode.immediate == 0) { |
|||
xor_(RESULT, RESULT); |
|||
} else { |
|||
mov(RESULT, opcode.immediate); |
|||
} |
|||
} else { |
|||
auto result = Compile_GetRegister(opcode.src_a, RESULT); |
|||
if (opcode.immediate > 2) { |
|||
add(result, opcode.immediate); |
|||
} else if (opcode.immediate == 1) { |
|||
inc(result); |
|||
} else if (opcode.immediate < 0) { |
|||
sub(result, opcode.immediate * -1); |
|||
} |
|||
} |
|||
|
|||
// Equivalent to Engines::Maxwell3D::GetRegisterValue:
|
|||
if (optimizer.enable_asserts) { |
|||
Xbyak::Label pass_range_check; |
|||
cmp(RESULT, static_cast<u32>(Engines::Maxwell3D::Regs::NUM_REGS)); |
|||
jb(pass_range_check); |
|||
int3(); |
|||
L(pass_range_check); |
|||
} |
|||
mov(rax, qword[STATE]); |
|||
mov(RESULT, |
|||
dword[rax + offsetof(Engines::Maxwell3D, regs) + |
|||
offsetof(Engines::Maxwell3D::Regs, reg_array) + RESULT.cvt64() * sizeof(u32)]); |
|||
|
|||
Compile_ProcessResult(opcode.result_operation, opcode.dst); |
|||
} |
|||
|
|||
void Send(Engines::Maxwell3D* maxwell3d, Macro::MethodAddress method_address, u32 value) { |
|||
maxwell3d->CallMethod(method_address.address, value, true); |
|||
} |
|||
|
|||
void MacroJITx64Impl::Compile_Send(Xbyak::Reg32 value) { |
|||
Common::X64::ABI_PushRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0); |
|||
mov(Common::X64::ABI_PARAM1, qword[STATE]); |
|||
mov(Common::X64::ABI_PARAM2.cvt32(), METHOD_ADDRESS); |
|||
mov(Common::X64::ABI_PARAM3.cvt32(), value); |
|||
Common::X64::CallFarFunction(*this, &Send); |
|||
Common::X64::ABI_PopRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0); |
|||
|
|||
Xbyak::Label dont_process{}; |
|||
// Get increment
|
|||
test(METHOD_ADDRESS, 0x3f000); |
|||
// If zero, method address doesn't update
|
|||
je(dont_process); |
|||
|
|||
mov(ecx, METHOD_ADDRESS); |
|||
and_(METHOD_ADDRESS, 0xfff); |
|||
shr(ecx, 12); |
|||
and_(ecx, 0x3f); |
|||
lea(eax, ptr[rcx + METHOD_ADDRESS.cvt64()]); |
|||
sal(ecx, 12); |
|||
or_(eax, ecx); |
|||
|
|||
mov(METHOD_ADDRESS, eax); |
|||
|
|||
L(dont_process); |
|||
} |
|||
|
|||
void MacroJITx64Impl::Compile_Branch(Macro::Opcode opcode) { |
|||
ASSERT_MSG(!is_delay_slot, "Executing a branch in a delay slot is not valid"); |
|||
const s32 jump_address = |
|||
static_cast<s32>(pc) + static_cast<s32>(opcode.GetBranchTarget() / sizeof(s32)); |
|||
|
|||
Xbyak::Label end; |
|||
auto value = Compile_GetRegister(opcode.src_a, eax); |
|||
cmp(value, 0); // test(value, value);
|
|||
if (optimizer.has_delayed_pc) { |
|||
switch (opcode.branch_condition) { |
|||
case Macro::BranchCondition::Zero: |
|||
jne(end, T_NEAR); |
|||
break; |
|||
case Macro::BranchCondition::NotZero: |
|||
je(end, T_NEAR); |
|||
break; |
|||
} |
|||
|
|||
if (opcode.branch_annul) { |
|||
xor_(BRANCH_HOLDER, BRANCH_HOLDER); |
|||
jmp(labels[jump_address], T_NEAR); |
|||
} else { |
|||
Xbyak::Label handle_post_exit{}; |
|||
Xbyak::Label skip{}; |
|||
jmp(skip, T_NEAR); |
|||
|
|||
L(handle_post_exit); |
|||
xor_(BRANCH_HOLDER, BRANCH_HOLDER); |
|||
jmp(labels[jump_address], T_NEAR); |
|||
|
|||
L(skip); |
|||
mov(BRANCH_HOLDER, handle_post_exit); |
|||
jmp(delay_skip[pc], T_NEAR); |
|||
} |
|||
} else { |
|||
switch (opcode.branch_condition) { |
|||
case Macro::BranchCondition::Zero: |
|||
je(labels[jump_address], T_NEAR); |
|||
break; |
|||
case Macro::BranchCondition::NotZero: |
|||
jne(labels[jump_address], T_NEAR); |
|||
break; |
|||
} |
|||
} |
|||
|
|||
L(end); |
|||
} |
|||
|
|||
void MacroJITx64Impl::Optimizer_ScanFlags() { |
|||
optimizer.can_skip_carry = true; |
|||
optimizer.has_delayed_pc = false; |
|||
for (auto raw_op : code) { |
|||
Macro::Opcode op{}; |
|||
op.raw = raw_op; |
|||
|
|||
if (op.operation == Macro::Operation::ALU) { |
|||
// Scan for any ALU operations which actually use the carry flag, if they don't exist in
|
|||
// our current code we can skip emitting the carry flag handling operations
|
|||
if (op.alu_operation == Macro::ALUOperation::AddWithCarry || |
|||
op.alu_operation == Macro::ALUOperation::SubtractWithBorrow) { |
|||
optimizer.can_skip_carry = false; |
|||
} |
|||
} |
|||
|
|||
if (op.operation == Macro::Operation::Branch) { |
|||
if (!op.branch_annul) { |
|||
optimizer.has_delayed_pc = true; |
|||
} |
|||
} |
|||
} |
|||
} |
|||
|
|||
void MacroJITx64Impl::Compile() { |
|||
labels.fill(Xbyak::Label()); |
|||
|
|||
Common::X64::ABI_PushRegistersAndAdjustStack(*this, Common::X64::ABI_ALL_CALLEE_SAVED, 8); |
|||
// JIT state
|
|||
mov(STATE, Common::X64::ABI_PARAM1); |
|||
mov(PARAMETERS, Common::X64::ABI_PARAM2); |
|||
mov(MAX_PARAMETER, Common::X64::ABI_PARAM3); |
|||
xor_(RESULT, RESULT); |
|||
xor_(METHOD_ADDRESS, METHOD_ADDRESS); |
|||
xor_(BRANCH_HOLDER, BRANCH_HOLDER); |
|||
|
|||
mov(dword[STATE + offsetof(JITState, registers) + 4], Compile_FetchParameter()); |
|||
|
|||
// Track get register for zero registers and mark it as no-op
|
|||
optimizer.zero_reg_skip = true; |
|||
|
|||
// AddImmediate tends to be used as a NOP instruction, if we detect this we can
|
|||
// completely skip the entire code path and no emit anything
|
|||
optimizer.skip_dummy_addimmediate = true; |
|||
|
|||
// SMO tends to emit a lot of unnecessary method moves, we can mitigate this by only emitting
|
|||
// one if our register isn't "dirty"
|
|||
optimizer.optimize_for_method_move = true; |
|||
|
|||
// Enable run-time assertions in JITted code
|
|||
optimizer.enable_asserts = false; |
|||
|
|||
// Check to see if we can skip emitting certain instructions
|
|||
Optimizer_ScanFlags(); |
|||
|
|||
const u32 op_count = static_cast<u32>(code.size()); |
|||
for (u32 i = 0; i < op_count; i++) { |
|||
if (i < op_count - 1) { |
|||
pc = i + 1; |
|||
next_opcode = GetOpCode(); |
|||
} else { |
|||
next_opcode = {}; |
|||
} |
|||
pc = i; |
|||
Compile_NextInstruction(); |
|||
} |
|||
|
|||
L(end_of_code); |
|||
|
|||
Common::X64::ABI_PopRegistersAndAdjustStack(*this, Common::X64::ABI_ALL_CALLEE_SAVED, 8); |
|||
ret(); |
|||
ready(); |
|||
program = getCode<ProgramType>(); |
|||
} |
|||
|
|||
bool MacroJITx64Impl::Compile_NextInstruction() { |
|||
const auto opcode = GetOpCode(); |
|||
if (labels[pc].getAddress()) { |
|||
return false; |
|||
} |
|||
|
|||
L(labels[pc]); |
|||
|
|||
switch (opcode.operation) { |
|||
case Macro::Operation::ALU: |
|||
Compile_ALU(opcode); |
|||
break; |
|||
case Macro::Operation::AddImmediate: |
|||
Compile_AddImmediate(opcode); |
|||
break; |
|||
case Macro::Operation::ExtractInsert: |
|||
Compile_ExtractInsert(opcode); |
|||
break; |
|||
case Macro::Operation::ExtractShiftLeftImmediate: |
|||
Compile_ExtractShiftLeftImmediate(opcode); |
|||
break; |
|||
case Macro::Operation::ExtractShiftLeftRegister: |
|||
Compile_ExtractShiftLeftRegister(opcode); |
|||
break; |
|||
case Macro::Operation::Read: |
|||
Compile_Read(opcode); |
|||
break; |
|||
case Macro::Operation::Branch: |
|||
Compile_Branch(opcode); |
|||
break; |
|||
default: |
|||
UNIMPLEMENTED_MSG("Unimplemented opcode {}", opcode.operation.Value()); |
|||
break; |
|||
} |
|||
|
|||
if (optimizer.has_delayed_pc) { |
|||
if (opcode.is_exit) { |
|||
mov(rax, end_of_code); |
|||
test(BRANCH_HOLDER, BRANCH_HOLDER); |
|||
cmove(BRANCH_HOLDER, rax); |
|||
// Jump to next instruction to skip delay slot check
|
|||
je(labels[pc + 1], T_NEAR); |
|||
} else { |
|||
// TODO(ogniK): Optimize delay slot branching
|
|||
Xbyak::Label no_delay_slot{}; |
|||
test(BRANCH_HOLDER, BRANCH_HOLDER); |
|||
je(no_delay_slot, T_NEAR); |
|||
mov(rax, BRANCH_HOLDER); |
|||
xor_(BRANCH_HOLDER, BRANCH_HOLDER); |
|||
jmp(rax); |
|||
L(no_delay_slot); |
|||
} |
|||
L(delay_skip[pc]); |
|||
if (opcode.is_exit) { |
|||
return false; |
|||
} |
|||
} else { |
|||
test(BRANCH_HOLDER, BRANCH_HOLDER); |
|||
jne(end_of_code, T_NEAR); |
|||
if (opcode.is_exit) { |
|||
inc(BRANCH_HOLDER); |
|||
return false; |
|||
} |
|||
} |
|||
return true; |
|||
} |
|||
|
|||
static void WarnInvalidParameter(uintptr_t parameter, uintptr_t max_parameter) { |
|||
LOG_CRITICAL(HW_GPU, |
|||
"Macro JIT: invalid parameter access 0x{:x} (0x{:x} is the last parameter)", |
|||
parameter, max_parameter - sizeof(u32)); |
|||
} |
|||
|
|||
Xbyak::Reg32 MacroJITx64Impl::Compile_FetchParameter() { |
|||
Xbyak::Label parameter_ok{}; |
|||
cmp(PARAMETERS, MAX_PARAMETER); |
|||
jb(parameter_ok, T_NEAR); |
|||
Common::X64::ABI_PushRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0); |
|||
mov(Common::X64::ABI_PARAM1, PARAMETERS); |
|||
mov(Common::X64::ABI_PARAM2, MAX_PARAMETER); |
|||
Common::X64::CallFarFunction(*this, &WarnInvalidParameter); |
|||
Common::X64::ABI_PopRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0); |
|||
L(parameter_ok); |
|||
mov(eax, dword[PARAMETERS]); |
|||
add(PARAMETERS, sizeof(u32)); |
|||
return eax; |
|||
} |
|||
|
|||
Xbyak::Reg32 MacroJITx64Impl::Compile_GetRegister(u32 index, Xbyak::Reg32 dst) { |
|||
if (index == 0) { |
|||
// Register 0 is always zero
|
|||
xor_(dst, dst); |
|||
} else { |
|||
mov(dst, dword[STATE + offsetof(JITState, registers) + index * sizeof(u32)]); |
|||
} |
|||
|
|||
return dst; |
|||
} |
|||
|
|||
void MacroJITx64Impl::Compile_ProcessResult(Macro::ResultOperation operation, u32 reg) { |
|||
const auto SetRegister = [this](u32 reg_index, const Xbyak::Reg32& result) { |
|||
// Register 0 is supposed to always return 0. NOP is implemented as a store to the zero
|
|||
// register.
|
|||
if (reg_index == 0) { |
|||
return; |
|||
} |
|||
mov(dword[STATE + offsetof(JITState, registers) + reg_index * sizeof(u32)], result); |
|||
}; |
|||
const auto SetMethodAddress = [this](const Xbyak::Reg32& reg32) { mov(METHOD_ADDRESS, reg32); }; |
|||
|
|||
switch (operation) { |
|||
case Macro::ResultOperation::IgnoreAndFetch: |
|||
SetRegister(reg, Compile_FetchParameter()); |
|||
break; |
|||
case Macro::ResultOperation::Move: |
|||
SetRegister(reg, RESULT); |
|||
break; |
|||
case Macro::ResultOperation::MoveAndSetMethod: |
|||
SetRegister(reg, RESULT); |
|||
SetMethodAddress(RESULT); |
|||
break; |
|||
case Macro::ResultOperation::FetchAndSend: |
|||
// Fetch parameter and send result.
|
|||
SetRegister(reg, Compile_FetchParameter()); |
|||
Compile_Send(RESULT); |
|||
break; |
|||
case Macro::ResultOperation::MoveAndSend: |
|||
// Move and send result.
|
|||
SetRegister(reg, RESULT); |
|||
Compile_Send(RESULT); |
|||
break; |
|||
case Macro::ResultOperation::FetchAndSetMethod: |
|||
// Fetch parameter and use result as Method Address.
|
|||
SetRegister(reg, Compile_FetchParameter()); |
|||
SetMethodAddress(RESULT); |
|||
break; |
|||
case Macro::ResultOperation::MoveAndSetMethodFetchAndSend: |
|||
// Move result and use as Method Address, then fetch and send parameter.
|
|||
SetRegister(reg, RESULT); |
|||
SetMethodAddress(RESULT); |
|||
Compile_Send(Compile_FetchParameter()); |
|||
break; |
|||
case Macro::ResultOperation::MoveAndSetMethodSend: |
|||
// Move result and use as Method Address, then send bits 12:17 of result.
|
|||
SetRegister(reg, RESULT); |
|||
SetMethodAddress(RESULT); |
|||
shr(RESULT, 12); |
|||
and_(RESULT, 0b111111); |
|||
Compile_Send(RESULT); |
|||
break; |
|||
default: |
|||
UNIMPLEMENTED_MSG("Unimplemented macro operation {}", operation); |
|||
break; |
|||
} |
|||
} |
|||
|
|||
Macro::Opcode MacroJITx64Impl::GetOpCode() const { |
|||
ASSERT(pc < code.size()); |
|||
return {code[pc]}; |
|||
} |
|||
} // Anonymous namespace
|
|||
|
|||
MacroJITx64::MacroJITx64(Engines::Maxwell3D& maxwell3d_) |
|||
: MacroEngine{maxwell3d_}, maxwell3d{maxwell3d_} {} |
|||
|
|||
std::unique_ptr<CachedMacro> MacroJITx64::Compile(const std::vector<u32>& code) { |
|||
return std::make_unique<MacroJITx64Impl>(maxwell3d, code); |
|||
} |
|||
} // namespace Tegra
|
|||
@ -1,26 +0,0 @@ |
|||
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project |
|||
// SPDX-License-Identifier: GPL-2.0-or-later |
|||
|
|||
#pragma once |
|||
|
|||
#include "common/common_types.h" |
|||
#include "video_core/macro/macro.h" |
|||
|
|||
namespace Tegra { |
|||
|
|||
namespace Engines { |
|||
class Maxwell3D; |
|||
} |
|||
|
|||
class MacroJITx64 final : public MacroEngine { |
|||
public: |
|||
explicit MacroJITx64(Engines::Maxwell3D& maxwell3d_); |
|||
|
|||
protected: |
|||
std::unique_ptr<CachedMacro> Compile(const std::vector<u32>& code) override; |
|||
|
|||
private: |
|||
Engines::Maxwell3D& maxwell3d; |
|||
}; |
|||
|
|||
} // namespace Tegra |
|||
Write
Preview
Loading…
Cancel
Save
Reference in new issue