Browse Source
[common, core] remove uneeded memory indirection overhead at startup (#3306)
[common, core] remove uneeded memory indirection overhead at startup (#3306)
for core stuff: just remove unique ptrs that dont need any pointer stability at all (afterall its an allocation within an allocation so yeah) for fibers: Main reasoning behind this is because virtualBuffer<> is stupidly fucking expensive and it also clutters my fstat view ALSO mmap is a syscall, syscalls are bad for performance or whatever ALSO std::vector<> is better suited for handling this kind of "fixed size thing where its like big but not THAT big" (512 KiB isn't going to kill your memory usage for each fiber...) for core.cpp stuff - inlines stuff into std::optional<> as opposed to std::unique_ptr<> (because yknow, we are making the Impl from an unique_ptr, allocating within an allocation is unnecessary) - reorganizes the structures a bit so padding doesnt screw us up (it's not perfect but eh saves a measly 44 bytes) - removes unused/dead code - uses std::vector<> instead of std::deque<> no perf impact expected, maybe some initialisation boost but very minimal impact nonethless lto gets rid of most calls anyways - the heavy issue is with shared_ptr and the cache coherency from the atomics... but i clumped them together because well, they kinda do not suffer from cache coherency - hopefully not a mistake this balloons the size of Impl to about 1.67 MB - which is fine because we throw it in the stack anyways REST OF INTERFACES: most of them ballooned in size as well, but overhead is ok since its an allocation within an alloc, no stack is used (when it comes to storing these i mean) Signed-off-by: lizzie lizzie@eden-emu.dev Reviewed-on: https://git.eden-emu.dev/eden-emu/eden/pulls/3306 Reviewed-by: CamilleLaVey <camillelavey99@gmail.com> Reviewed-by: MaranBr <maranbr@eden-emu.dev> Co-authored-by: lizzie <lizzie@eden-emu.dev> Co-committed-by: lizzie <lizzie@eden-emu.dev>pull/3325/head
committed by
crueter
No known key found for this signature in database
GPG Key ID: 425ACD2D4830EBC6
40 changed files with 2581 additions and 2942 deletions
-
11src/audio_core/adsp/adsp.cpp
-
9src/audio_core/adsp/adsp.h
-
38src/audio_core/opus/decoder.cpp
-
6src/audio_core/opus/decoder.h
-
110src/common/fiber.cpp
-
10src/common/fiber.h
-
8src/common/wall_clock.cpp
-
7src/common/wall_clock.h
-
303src/core/arm/dynarmic/arm_dynarmic_32.cpp
-
49src/core/arm/dynarmic/arm_dynarmic_32.h
-
382src/core/arm/dynarmic/arm_dynarmic_64.cpp
-
57src/core/arm/dynarmic/arm_dynarmic_64.h
-
177src/core/core.cpp
-
13src/core/core.h
-
14src/core/core_timing.cpp
-
7src/core/core_timing.h
-
14src/core/hle/kernel/k_process.cpp
-
82src/core/hle/kernel/k_process.h
-
119src/core/hle/kernel/kernel.cpp
-
6src/core/hle/service/am/applet.h
-
91src/core/hle/service/ns/platform_service_manager.cpp
-
12src/video_core/CMakeLists.txt
-
67src/video_core/dma_pusher.cpp
-
13src/video_core/engines/maxwell_3d.cpp
-
4src/video_core/engines/maxwell_3d.h
-
1667src/video_core/macro.cpp
-
40src/video_core/macro.h
-
140src/video_core/macro/macro.cpp
-
606src/video_core/macro/macro_hle.cpp
-
33src/video_core/macro/macro_hle.h
-
362src/video_core/macro/macro_interpreter.cpp
-
27src/video_core/macro/macro_interpreter.h
-
678src/video_core/macro/macro_jit_x64.cpp
-
26src/video_core/macro/macro_jit_x64.h
-
17src/video_core/renderer_opengl/gl_texture_cache.cpp
-
2src/video_core/renderer_opengl/gl_texture_cache.h
-
51src/video_core/renderer_vulkan/vk_rasterizer.cpp
-
82src/video_core/renderer_vulkan/vk_texture_cache.cpp
-
175src/video_core/renderer_vulkan/vk_texture_cache.h
-
8src/video_core/texture_cache/texture_cache.h
1667
src/video_core/macro.cpp
File diff suppressed because it is too large
View File
File diff suppressed because it is too large
View File
@ -1,140 +0,0 @@ |
|||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
|||
// SPDX-License-Identifier: GPL-3.0-or-later
|
|||
|
|||
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
|
|||
// SPDX-License-Identifier: GPL-2.0-or-later
|
|||
|
|||
#include <cstring>
|
|||
#include <fstream>
|
|||
#include <optional>
|
|||
#include <span>
|
|||
|
|||
#include "common/container_hash.h"
|
|||
|
|||
#include <fstream>
|
|||
#include "common/assert.h"
|
|||
#include "common/fs/fs.h"
|
|||
#include "common/fs/path_util.h"
|
|||
#include "common/settings.h"
|
|||
#include "video_core/engines/maxwell_3d.h"
|
|||
#include "video_core/macro/macro.h"
|
|||
#include "video_core/macro/macro_hle.h"
|
|||
#include "video_core/macro/macro_interpreter.h"
|
|||
|
|||
#ifdef ARCHITECTURE_x86_64
|
|||
#include "video_core/macro/macro_jit_x64.h"
|
|||
#endif
|
|||
|
|||
namespace Tegra { |
|||
|
|||
static void Dump(u64 hash, std::span<const u32> code, bool decompiled = false) { |
|||
const auto base_dir{Common::FS::GetEdenPath(Common::FS::EdenPath::DumpDir)}; |
|||
const auto macro_dir{base_dir / "macros"}; |
|||
if (!Common::FS::CreateDir(base_dir) || !Common::FS::CreateDir(macro_dir)) { |
|||
LOG_ERROR(Common_Filesystem, "Failed to create macro dump directories"); |
|||
return; |
|||
} |
|||
auto name{macro_dir / fmt::format("{:016x}.macro", hash)}; |
|||
|
|||
if (decompiled) { |
|||
auto new_name{macro_dir / fmt::format("decompiled_{:016x}.macro", hash)}; |
|||
if (Common::FS::Exists(name)) { |
|||
(void)Common::FS::RenameFile(name, new_name); |
|||
return; |
|||
} |
|||
name = new_name; |
|||
} |
|||
|
|||
std::fstream macro_file(name, std::ios::out | std::ios::binary); |
|||
if (!macro_file) { |
|||
LOG_ERROR(Common_Filesystem, "Unable to open or create file at {}", |
|||
Common::FS::PathToUTF8String(name)); |
|||
return; |
|||
} |
|||
macro_file.write(reinterpret_cast<const char*>(code.data()), code.size_bytes()); |
|||
} |
|||
|
|||
MacroEngine::MacroEngine(Engines::Maxwell3D& maxwell3d_) |
|||
: hle_macros{std::make_unique<Tegra::HLEMacro>(maxwell3d_)}, maxwell3d{maxwell3d_} {} |
|||
|
|||
MacroEngine::~MacroEngine() = default; |
|||
|
|||
void MacroEngine::AddCode(u32 method, u32 data) { |
|||
uploaded_macro_code[method].push_back(data); |
|||
} |
|||
|
|||
void MacroEngine::ClearCode(u32 method) { |
|||
macro_cache.erase(method); |
|||
uploaded_macro_code.erase(method); |
|||
} |
|||
|
|||
void MacroEngine::Execute(u32 method, const std::vector<u32>& parameters) { |
|||
auto compiled_macro = macro_cache.find(method); |
|||
if (compiled_macro != macro_cache.end()) { |
|||
const auto& cache_info = compiled_macro->second; |
|||
if (cache_info.has_hle_program) { |
|||
cache_info.hle_program->Execute(parameters, method); |
|||
} else { |
|||
maxwell3d.RefreshParameters(); |
|||
cache_info.lle_program->Execute(parameters, method); |
|||
} |
|||
} else { |
|||
// Macro not compiled, check if it's uploaded and if so, compile it
|
|||
std::optional<u32> mid_method; |
|||
const auto macro_code = uploaded_macro_code.find(method); |
|||
if (macro_code == uploaded_macro_code.end()) { |
|||
for (const auto& [method_base, code] : uploaded_macro_code) { |
|||
if (method >= method_base && (method - method_base) < code.size()) { |
|||
mid_method = method_base; |
|||
break; |
|||
} |
|||
} |
|||
if (!mid_method.has_value()) { |
|||
ASSERT_MSG(false, "Macro 0x{0:x} was not uploaded", method); |
|||
return; |
|||
} |
|||
} |
|||
auto& cache_info = macro_cache[method]; |
|||
|
|||
if (!mid_method.has_value()) { |
|||
cache_info.lle_program = Compile(macro_code->second); |
|||
cache_info.hash = Common::HashValue(macro_code->second); |
|||
} else { |
|||
const auto& macro_cached = uploaded_macro_code[mid_method.value()]; |
|||
const auto rebased_method = method - mid_method.value(); |
|||
auto& code = uploaded_macro_code[method]; |
|||
code.resize(macro_cached.size() - rebased_method); |
|||
std::memcpy(code.data(), macro_cached.data() + rebased_method, |
|||
code.size() * sizeof(u32)); |
|||
cache_info.hash = Common::HashValue(code); |
|||
cache_info.lle_program = Compile(code); |
|||
} |
|||
|
|||
auto hle_program = hle_macros->GetHLEProgram(cache_info.hash); |
|||
if (!hle_program || Settings::values.disable_macro_hle) { |
|||
maxwell3d.RefreshParameters(); |
|||
cache_info.lle_program->Execute(parameters, method); |
|||
} else { |
|||
cache_info.has_hle_program = true; |
|||
cache_info.hle_program = std::move(hle_program); |
|||
cache_info.hle_program->Execute(parameters, method); |
|||
} |
|||
|
|||
if (Settings::values.dump_macros) { |
|||
Dump(cache_info.hash, macro_code->second, cache_info.has_hle_program); |
|||
} |
|||
} |
|||
} |
|||
|
|||
std::unique_ptr<MacroEngine> GetMacroEngine(Engines::Maxwell3D& maxwell3d) { |
|||
if (Settings::values.disable_macro_jit) { |
|||
return std::make_unique<MacroInterpreter>(maxwell3d); |
|||
} |
|||
#ifdef ARCHITECTURE_x86_64
|
|||
return std::make_unique<MacroJITx64>(maxwell3d); |
|||
#else
|
|||
return std::make_unique<MacroInterpreter>(maxwell3d); |
|||
#endif
|
|||
} |
|||
|
|||
} // namespace Tegra
|
|||
@ -1,606 +0,0 @@ |
|||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
|||
// SPDX-License-Identifier: GPL-3.0-or-later
|
|||
|
|||
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
|
|||
// SPDX-License-Identifier: GPL-3.0-or-later
|
|||
|
|||
#include <array>
|
|||
#include <vector>
|
|||
#include "common/assert.h"
|
|||
#include "common/scope_exit.h"
|
|||
#include "video_core/dirty_flags.h"
|
|||
#include "video_core/engines/draw_manager.h"
|
|||
#include "video_core/engines/maxwell_3d.h"
|
|||
#include "video_core/macro/macro.h"
|
|||
#include "video_core/macro/macro_hle.h"
|
|||
#include "video_core/memory_manager.h"
|
|||
#include "video_core/rasterizer_interface.h"
|
|||
|
|||
namespace Tegra { |
|||
|
|||
using Maxwell3D = Engines::Maxwell3D; |
|||
|
|||
namespace { |
|||
|
|||
bool IsTopologySafe(Maxwell3D::Regs::PrimitiveTopology topology) { |
|||
switch (topology) { |
|||
case Maxwell3D::Regs::PrimitiveTopology::Points: |
|||
case Maxwell3D::Regs::PrimitiveTopology::Lines: |
|||
case Maxwell3D::Regs::PrimitiveTopology::LineLoop: |
|||
case Maxwell3D::Regs::PrimitiveTopology::LineStrip: |
|||
case Maxwell3D::Regs::PrimitiveTopology::Triangles: |
|||
case Maxwell3D::Regs::PrimitiveTopology::TriangleStrip: |
|||
case Maxwell3D::Regs::PrimitiveTopology::TriangleFan: |
|||
case Maxwell3D::Regs::PrimitiveTopology::LinesAdjacency: |
|||
case Maxwell3D::Regs::PrimitiveTopology::LineStripAdjacency: |
|||
case Maxwell3D::Regs::PrimitiveTopology::TrianglesAdjacency: |
|||
case Maxwell3D::Regs::PrimitiveTopology::TriangleStripAdjacency: |
|||
case Maxwell3D::Regs::PrimitiveTopology::Patches: |
|||
return true; |
|||
case Maxwell3D::Regs::PrimitiveTopology::Quads: |
|||
case Maxwell3D::Regs::PrimitiveTopology::QuadStrip: |
|||
case Maxwell3D::Regs::PrimitiveTopology::Polygon: |
|||
default: |
|||
return false; |
|||
} |
|||
} |
|||
|
|||
class HLEMacroImpl : public CachedMacro { |
|||
public: |
|||
explicit HLEMacroImpl(Maxwell3D& maxwell3d_) : maxwell3d{maxwell3d_} {} |
|||
|
|||
protected: |
|||
Maxwell3D& maxwell3d; |
|||
}; |
|||
|
|||
/*
|
|||
* @note: these macros have two versions, a normal and extended version, with the extended version |
|||
* also assigning the base vertex/instance. |
|||
*/ |
|||
template <bool extended> |
|||
class HLE_DrawArraysIndirect final : public HLEMacroImpl { |
|||
public: |
|||
explicit HLE_DrawArraysIndirect(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {} |
|||
|
|||
void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override { |
|||
auto topology = static_cast<Maxwell3D::Regs::PrimitiveTopology>(parameters[0]); |
|||
if (!maxwell3d.AnyParametersDirty() || !IsTopologySafe(topology)) { |
|||
Fallback(parameters); |
|||
return; |
|||
} |
|||
|
|||
auto& params = maxwell3d.draw_manager->GetIndirectParams(); |
|||
params.is_byte_count = false; |
|||
params.is_indexed = false; |
|||
params.include_count = false; |
|||
params.count_start_address = 0; |
|||
params.indirect_start_address = maxwell3d.GetMacroAddress(1); |
|||
params.buffer_size = 4 * sizeof(u32); |
|||
params.max_draw_counts = 1; |
|||
params.stride = 0; |
|||
|
|||
if constexpr (extended) { |
|||
maxwell3d.engine_state = Maxwell3D::EngineHint::OnHLEMacro; |
|||
maxwell3d.SetHLEReplacementAttributeType( |
|||
0, 0x640, Maxwell3D::HLEReplacementAttributeType::BaseInstance); |
|||
} |
|||
|
|||
maxwell3d.draw_manager->DrawArrayIndirect(topology); |
|||
|
|||
if constexpr (extended) { |
|||
maxwell3d.engine_state = Maxwell3D::EngineHint::None; |
|||
maxwell3d.replace_table.clear(); |
|||
} |
|||
} |
|||
|
|||
private: |
|||
void Fallback(const std::vector<u32>& parameters) { |
|||
SCOPE_EXIT { |
|||
if (extended) { |
|||
maxwell3d.engine_state = Maxwell3D::EngineHint::None; |
|||
maxwell3d.replace_table.clear(); |
|||
} |
|||
}; |
|||
maxwell3d.RefreshParameters(); |
|||
const u32 instance_count = (maxwell3d.GetRegisterValue(0xD1B) & parameters[2]); |
|||
|
|||
auto topology = static_cast<Maxwell3D::Regs::PrimitiveTopology>(parameters[0]); |
|||
const u32 vertex_first = parameters[3]; |
|||
const u32 vertex_count = parameters[1]; |
|||
|
|||
if (!IsTopologySafe(topology) && |
|||
static_cast<size_t>(maxwell3d.GetMaxCurrentVertices()) < |
|||
static_cast<size_t>(vertex_first) + static_cast<size_t>(vertex_count)) { |
|||
ASSERT_MSG(false, "Faulty draw!"); |
|||
return; |
|||
} |
|||
|
|||
const u32 base_instance = parameters[4]; |
|||
if constexpr (extended) { |
|||
maxwell3d.regs.global_base_instance_index = base_instance; |
|||
maxwell3d.engine_state = Maxwell3D::EngineHint::OnHLEMacro; |
|||
maxwell3d.SetHLEReplacementAttributeType( |
|||
0, 0x640, Maxwell3D::HLEReplacementAttributeType::BaseInstance); |
|||
} |
|||
|
|||
maxwell3d.draw_manager->DrawArray(topology, vertex_first, vertex_count, base_instance, |
|||
instance_count); |
|||
|
|||
if constexpr (extended) { |
|||
maxwell3d.regs.global_base_instance_index = 0; |
|||
maxwell3d.engine_state = Maxwell3D::EngineHint::None; |
|||
maxwell3d.replace_table.clear(); |
|||
} |
|||
} |
|||
}; |
|||
|
|||
/*
|
|||
* @note: these macros have two versions, a normal and extended version, with the extended version |
|||
* also assigning the base vertex/instance. |
|||
*/ |
|||
template <bool extended> |
|||
class HLE_DrawIndexedIndirect final : public HLEMacroImpl { |
|||
public: |
|||
explicit HLE_DrawIndexedIndirect(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {} |
|||
|
|||
void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override { |
|||
auto topology = static_cast<Maxwell3D::Regs::PrimitiveTopology>(parameters[0]); |
|||
if (!maxwell3d.AnyParametersDirty() || !IsTopologySafe(topology)) { |
|||
Fallback(parameters); |
|||
return; |
|||
} |
|||
|
|||
const u32 estimate = static_cast<u32>(maxwell3d.EstimateIndexBufferSize()); |
|||
const u32 element_base = parameters[4]; |
|||
const u32 base_instance = parameters[5]; |
|||
maxwell3d.regs.vertex_id_base = element_base; |
|||
maxwell3d.regs.global_base_vertex_index = element_base; |
|||
maxwell3d.regs.global_base_instance_index = base_instance; |
|||
maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; |
|||
if constexpr (extended) { |
|||
maxwell3d.engine_state = Maxwell3D::EngineHint::OnHLEMacro; |
|||
maxwell3d.SetHLEReplacementAttributeType( |
|||
0, 0x640, Maxwell3D::HLEReplacementAttributeType::BaseVertex); |
|||
maxwell3d.SetHLEReplacementAttributeType( |
|||
0, 0x644, Maxwell3D::HLEReplacementAttributeType::BaseInstance); |
|||
} |
|||
auto& params = maxwell3d.draw_manager->GetIndirectParams(); |
|||
params.is_byte_count = false; |
|||
params.is_indexed = true; |
|||
params.include_count = false; |
|||
params.count_start_address = 0; |
|||
params.indirect_start_address = maxwell3d.GetMacroAddress(1); |
|||
params.buffer_size = 5 * sizeof(u32); |
|||
params.max_draw_counts = 1; |
|||
params.stride = 0; |
|||
maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; |
|||
maxwell3d.draw_manager->DrawIndexedIndirect(topology, 0, estimate); |
|||
maxwell3d.regs.vertex_id_base = 0x0; |
|||
maxwell3d.regs.global_base_vertex_index = 0x0; |
|||
maxwell3d.regs.global_base_instance_index = 0x0; |
|||
if constexpr (extended) { |
|||
maxwell3d.engine_state = Maxwell3D::EngineHint::None; |
|||
maxwell3d.replace_table.clear(); |
|||
} |
|||
} |
|||
|
|||
private: |
|||
void Fallback(const std::vector<u32>& parameters) { |
|||
maxwell3d.RefreshParameters(); |
|||
const u32 instance_count = (maxwell3d.GetRegisterValue(0xD1B) & parameters[2]); |
|||
const u32 element_base = parameters[4]; |
|||
const u32 base_instance = parameters[5]; |
|||
maxwell3d.regs.vertex_id_base = element_base; |
|||
maxwell3d.regs.global_base_vertex_index = element_base; |
|||
maxwell3d.regs.global_base_instance_index = base_instance; |
|||
maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; |
|||
if constexpr (extended) { |
|||
maxwell3d.engine_state = Maxwell3D::EngineHint::OnHLEMacro; |
|||
maxwell3d.SetHLEReplacementAttributeType( |
|||
0, 0x640, Maxwell3D::HLEReplacementAttributeType::BaseVertex); |
|||
maxwell3d.SetHLEReplacementAttributeType( |
|||
0, 0x644, Maxwell3D::HLEReplacementAttributeType::BaseInstance); |
|||
} |
|||
|
|||
maxwell3d.draw_manager->DrawIndex( |
|||
static_cast<Tegra::Maxwell3D::Regs::PrimitiveTopology>(parameters[0]), parameters[3], |
|||
parameters[1], element_base, base_instance, instance_count); |
|||
|
|||
maxwell3d.regs.vertex_id_base = 0x0; |
|||
maxwell3d.regs.global_base_vertex_index = 0x0; |
|||
maxwell3d.regs.global_base_instance_index = 0x0; |
|||
if constexpr (extended) { |
|||
maxwell3d.engine_state = Maxwell3D::EngineHint::None; |
|||
maxwell3d.replace_table.clear(); |
|||
} |
|||
} |
|||
}; |
|||
|
|||
class HLE_MultiLayerClear final : public HLEMacroImpl { |
|||
public: |
|||
explicit HLE_MultiLayerClear(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {} |
|||
|
|||
void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override { |
|||
maxwell3d.RefreshParameters(); |
|||
ASSERT(parameters.size() == 1); |
|||
|
|||
const Maxwell3D::Regs::ClearSurface clear_params{parameters[0]}; |
|||
const u32 rt_index = clear_params.RT; |
|||
const u32 num_layers = maxwell3d.regs.rt[rt_index].depth; |
|||
ASSERT(clear_params.layer == 0); |
|||
|
|||
maxwell3d.regs.clear_surface.raw = clear_params.raw; |
|||
maxwell3d.draw_manager->Clear(num_layers); |
|||
} |
|||
}; |
|||
|
|||
class HLE_MultiDrawIndexedIndirectCount final : public HLEMacroImpl { |
|||
public: |
|||
explicit HLE_MultiDrawIndexedIndirectCount(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {} |
|||
|
|||
void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override { |
|||
const auto topology = static_cast<Maxwell3D::Regs::PrimitiveTopology>(parameters[2]); |
|||
if (!IsTopologySafe(topology)) { |
|||
Fallback(parameters); |
|||
return; |
|||
} |
|||
|
|||
const u32 start_indirect = parameters[0]; |
|||
const u32 end_indirect = parameters[1]; |
|||
if (start_indirect >= end_indirect) { |
|||
// Nothing to do.
|
|||
return; |
|||
} |
|||
|
|||
const u32 padding = parameters[3]; // padding is in words
|
|||
|
|||
// size of each indirect segment
|
|||
const u32 indirect_words = 5 + padding; |
|||
const u32 stride = indirect_words * sizeof(u32); |
|||
const std::size_t draw_count = end_indirect - start_indirect; |
|||
const u32 estimate = static_cast<u32>(maxwell3d.EstimateIndexBufferSize()); |
|||
maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; |
|||
auto& params = maxwell3d.draw_manager->GetIndirectParams(); |
|||
params.is_byte_count = false; |
|||
params.is_indexed = true; |
|||
params.include_count = true; |
|||
params.count_start_address = maxwell3d.GetMacroAddress(4); |
|||
params.indirect_start_address = maxwell3d.GetMacroAddress(5); |
|||
params.buffer_size = stride * draw_count; |
|||
params.max_draw_counts = draw_count; |
|||
params.stride = stride; |
|||
maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; |
|||
maxwell3d.engine_state = Maxwell3D::EngineHint::OnHLEMacro; |
|||
maxwell3d.SetHLEReplacementAttributeType( |
|||
0, 0x640, Maxwell3D::HLEReplacementAttributeType::BaseVertex); |
|||
maxwell3d.SetHLEReplacementAttributeType( |
|||
0, 0x644, Maxwell3D::HLEReplacementAttributeType::BaseInstance); |
|||
maxwell3d.SetHLEReplacementAttributeType(0, 0x648, |
|||
Maxwell3D::HLEReplacementAttributeType::DrawID); |
|||
maxwell3d.draw_manager->DrawIndexedIndirect(topology, 0, estimate); |
|||
maxwell3d.engine_state = Maxwell3D::EngineHint::None; |
|||
maxwell3d.replace_table.clear(); |
|||
} |
|||
|
|||
private: |
|||
void Fallback(const std::vector<u32>& parameters) { |
|||
SCOPE_EXIT { |
|||
// Clean everything.
|
|||
maxwell3d.regs.vertex_id_base = 0x0; |
|||
maxwell3d.engine_state = Maxwell3D::EngineHint::None; |
|||
maxwell3d.replace_table.clear(); |
|||
}; |
|||
maxwell3d.RefreshParameters(); |
|||
const u32 start_indirect = parameters[0]; |
|||
const u32 end_indirect = parameters[1]; |
|||
if (start_indirect >= end_indirect) { |
|||
// Nothing to do.
|
|||
return; |
|||
} |
|||
const auto topology = static_cast<Maxwell3D::Regs::PrimitiveTopology>(parameters[2]); |
|||
const u32 padding = parameters[3]; |
|||
const std::size_t max_draws = parameters[4]; |
|||
|
|||
const u32 indirect_words = 5 + padding; |
|||
const std::size_t first_draw = start_indirect; |
|||
const std::size_t effective_draws = end_indirect - start_indirect; |
|||
const std::size_t last_draw = start_indirect + (std::min)(effective_draws, max_draws); |
|||
|
|||
for (std::size_t index = first_draw; index < last_draw; index++) { |
|||
const std::size_t base = index * indirect_words + 5; |
|||
const u32 base_vertex = parameters[base + 3]; |
|||
const u32 base_instance = parameters[base + 4]; |
|||
maxwell3d.regs.vertex_id_base = base_vertex; |
|||
maxwell3d.engine_state = Maxwell3D::EngineHint::OnHLEMacro; |
|||
maxwell3d.SetHLEReplacementAttributeType( |
|||
0, 0x640, Maxwell3D::HLEReplacementAttributeType::BaseVertex); |
|||
maxwell3d.SetHLEReplacementAttributeType( |
|||
0, 0x644, Maxwell3D::HLEReplacementAttributeType::BaseInstance); |
|||
maxwell3d.CallMethod(0x8e3, 0x648, true); |
|||
maxwell3d.CallMethod(0x8e4, static_cast<u32>(index), true); |
|||
maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; |
|||
maxwell3d.draw_manager->DrawIndex(topology, parameters[base + 2], parameters[base], |
|||
base_vertex, base_instance, parameters[base + 1]); |
|||
} |
|||
} |
|||
}; |
|||
|
|||
class HLE_DrawIndirectByteCount final : public HLEMacroImpl { |
|||
public: |
|||
explicit HLE_DrawIndirectByteCount(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {} |
|||
|
|||
void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override { |
|||
const bool force = maxwell3d.Rasterizer().HasDrawTransformFeedback(); |
|||
|
|||
auto topology = static_cast<Maxwell3D::Regs::PrimitiveTopology>(parameters[0] & 0xFFFFU); |
|||
if (!force && (!maxwell3d.AnyParametersDirty() || !IsTopologySafe(topology))) { |
|||
Fallback(parameters); |
|||
return; |
|||
} |
|||
auto& params = maxwell3d.draw_manager->GetIndirectParams(); |
|||
params.is_byte_count = true; |
|||
params.is_indexed = false; |
|||
params.include_count = false; |
|||
params.count_start_address = 0; |
|||
params.indirect_start_address = maxwell3d.GetMacroAddress(2); |
|||
params.buffer_size = 4; |
|||
params.max_draw_counts = 1; |
|||
params.stride = parameters[1]; |
|||
maxwell3d.regs.draw.begin = parameters[0]; |
|||
maxwell3d.regs.draw_auto_stride = parameters[1]; |
|||
maxwell3d.regs.draw_auto_byte_count = parameters[2]; |
|||
|
|||
maxwell3d.draw_manager->DrawArrayIndirect(topology); |
|||
} |
|||
|
|||
private: |
|||
void Fallback(const std::vector<u32>& parameters) { |
|||
maxwell3d.RefreshParameters(); |
|||
|
|||
maxwell3d.regs.draw.begin = parameters[0]; |
|||
maxwell3d.regs.draw_auto_stride = parameters[1]; |
|||
maxwell3d.regs.draw_auto_byte_count = parameters[2]; |
|||
|
|||
maxwell3d.draw_manager->DrawArray( |
|||
maxwell3d.regs.draw.topology, 0, |
|||
maxwell3d.regs.draw_auto_byte_count / maxwell3d.regs.draw_auto_stride, 0, 1); |
|||
} |
|||
}; |
|||
|
|||
class HLE_C713C83D8F63CCF3 final : public HLEMacroImpl { |
|||
public: |
|||
explicit HLE_C713C83D8F63CCF3(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {} |
|||
|
|||
void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override { |
|||
maxwell3d.RefreshParameters(); |
|||
const u32 offset = (parameters[0] & 0x3FFFFFFF) << 2; |
|||
const u32 address = maxwell3d.regs.shadow_scratch[24]; |
|||
auto& const_buffer = maxwell3d.regs.const_buffer; |
|||
const_buffer.size = 0x7000; |
|||
const_buffer.address_high = (address >> 24) & 0xFF; |
|||
const_buffer.address_low = address << 8; |
|||
const_buffer.offset = offset; |
|||
} |
|||
}; |
|||
|
|||
class HLE_D7333D26E0A93EDE final : public HLEMacroImpl { |
|||
public: |
|||
explicit HLE_D7333D26E0A93EDE(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {} |
|||
|
|||
void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override { |
|||
maxwell3d.RefreshParameters(); |
|||
const size_t index = parameters[0]; |
|||
const u32 address = maxwell3d.regs.shadow_scratch[42 + index]; |
|||
const u32 size = maxwell3d.regs.shadow_scratch[47 + index]; |
|||
auto& const_buffer = maxwell3d.regs.const_buffer; |
|||
const_buffer.size = size; |
|||
const_buffer.address_high = (address >> 24) & 0xFF; |
|||
const_buffer.address_low = address << 8; |
|||
} |
|||
}; |
|||
|
|||
class HLE_BindShader final : public HLEMacroImpl { |
|||
public: |
|||
explicit HLE_BindShader(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {} |
|||
|
|||
void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override { |
|||
maxwell3d.RefreshParameters(); |
|||
auto& regs = maxwell3d.regs; |
|||
const u32 index = parameters[0]; |
|||
if ((parameters[1] - regs.shadow_scratch[28 + index]) == 0) { |
|||
return; |
|||
} |
|||
|
|||
regs.pipelines[index & 0xF].offset = parameters[2]; |
|||
maxwell3d.dirty.flags[VideoCommon::Dirty::Shaders] = true; |
|||
regs.shadow_scratch[28 + index] = parameters[1]; |
|||
regs.shadow_scratch[34 + index] = parameters[2]; |
|||
|
|||
const u32 address = parameters[4]; |
|||
auto& const_buffer = regs.const_buffer; |
|||
const_buffer.size = 0x10000; |
|||
const_buffer.address_high = (address >> 24) & 0xFF; |
|||
const_buffer.address_low = address << 8; |
|||
|
|||
const size_t bind_group_id = parameters[3] & 0x7F; |
|||
auto& bind_group = regs.bind_groups[bind_group_id]; |
|||
bind_group.raw_config = 0x11; |
|||
maxwell3d.ProcessCBBind(bind_group_id); |
|||
} |
|||
}; |
|||
|
|||
class HLE_SetRasterBoundingBox final : public HLEMacroImpl { |
|||
public: |
|||
explicit HLE_SetRasterBoundingBox(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {} |
|||
|
|||
void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override { |
|||
maxwell3d.RefreshParameters(); |
|||
const u32 raster_mode = parameters[0]; |
|||
auto& regs = maxwell3d.regs; |
|||
const u32 raster_enabled = maxwell3d.regs.conservative_raster_enable; |
|||
const u32 scratch_data = maxwell3d.regs.shadow_scratch[52]; |
|||
regs.raster_bounding_box.raw = raster_mode & 0xFFFFF00F; |
|||
regs.raster_bounding_box.pad.Assign(scratch_data & raster_enabled); |
|||
} |
|||
}; |
|||
|
|||
template <size_t base_size> |
|||
class HLE_ClearConstBuffer final : public HLEMacroImpl { |
|||
public: |
|||
explicit HLE_ClearConstBuffer(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {} |
|||
|
|||
void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override { |
|||
maxwell3d.RefreshParameters(); |
|||
static constexpr std::array<u32, base_size> zeroes{}; |
|||
auto& regs = maxwell3d.regs; |
|||
regs.const_buffer.size = static_cast<u32>(base_size); |
|||
regs.const_buffer.address_high = parameters[0]; |
|||
regs.const_buffer.address_low = parameters[1]; |
|||
regs.const_buffer.offset = 0; |
|||
maxwell3d.ProcessCBMultiData(zeroes.data(), parameters[2] * 4); |
|||
} |
|||
}; |
|||
|
|||
class HLE_ClearMemory final : public HLEMacroImpl { |
|||
public: |
|||
explicit HLE_ClearMemory(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {} |
|||
|
|||
void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override { |
|||
maxwell3d.RefreshParameters(); |
|||
|
|||
const u32 needed_memory = parameters[2] / sizeof(u32); |
|||
if (needed_memory > zero_memory.size()) { |
|||
zero_memory.resize(needed_memory, 0); |
|||
} |
|||
auto& regs = maxwell3d.regs; |
|||
regs.upload.line_length_in = parameters[2]; |
|||
regs.upload.line_count = 1; |
|||
regs.upload.dest.address_high = parameters[0]; |
|||
regs.upload.dest.address_low = parameters[1]; |
|||
maxwell3d.CallMethod(static_cast<size_t>(MAXWELL3D_REG_INDEX(launch_dma)), 0x1011, true); |
|||
maxwell3d.CallMultiMethod(static_cast<size_t>(MAXWELL3D_REG_INDEX(inline_data)), |
|||
zero_memory.data(), needed_memory, needed_memory); |
|||
} |
|||
|
|||
private: |
|||
std::vector<u32> zero_memory; |
|||
}; |
|||
|
|||
class HLE_TransformFeedbackSetup final : public HLEMacroImpl { |
|||
public: |
|||
explicit HLE_TransformFeedbackSetup(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {} |
|||
|
|||
void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override { |
|||
maxwell3d.RefreshParameters(); |
|||
|
|||
auto& regs = maxwell3d.regs; |
|||
regs.transform_feedback_enabled = 1; |
|||
regs.transform_feedback.buffers[0].start_offset = 0; |
|||
regs.transform_feedback.buffers[1].start_offset = 0; |
|||
regs.transform_feedback.buffers[2].start_offset = 0; |
|||
regs.transform_feedback.buffers[3].start_offset = 0; |
|||
|
|||
regs.upload.line_length_in = 4; |
|||
regs.upload.line_count = 1; |
|||
regs.upload.dest.address_high = parameters[0]; |
|||
regs.upload.dest.address_low = parameters[1]; |
|||
maxwell3d.CallMethod(static_cast<size_t>(MAXWELL3D_REG_INDEX(launch_dma)), 0x1011, true); |
|||
maxwell3d.CallMethod(static_cast<size_t>(MAXWELL3D_REG_INDEX(inline_data)), |
|||
regs.transform_feedback.controls[0].stride, true); |
|||
|
|||
maxwell3d.Rasterizer().RegisterTransformFeedback(regs.upload.dest.Address()); |
|||
} |
|||
}; |
|||
|
|||
} // Anonymous namespace
|
|||
|
|||
HLEMacro::HLEMacro(Maxwell3D& maxwell3d_) : maxwell3d{maxwell3d_} { |
|||
builders.emplace(0x0D61FC9FAAC9FCADULL, |
|||
std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>( |
|||
[](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> { |
|||
return std::make_unique<HLE_DrawArraysIndirect<false>>(maxwell3d__); |
|||
})); |
|||
builders.emplace(0x8A4D173EB99A8603ULL, |
|||
std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>( |
|||
[](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> { |
|||
return std::make_unique<HLE_DrawArraysIndirect<true>>(maxwell3d__); |
|||
})); |
|||
builders.emplace(0x771BB18C62444DA0ULL, |
|||
std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>( |
|||
[](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> { |
|||
return std::make_unique<HLE_DrawIndexedIndirect<false>>(maxwell3d__); |
|||
})); |
|||
builders.emplace(0x0217920100488FF7ULL, |
|||
std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>( |
|||
[](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> { |
|||
return std::make_unique<HLE_DrawIndexedIndirect<true>>(maxwell3d__); |
|||
})); |
|||
builders.emplace(0x3F5E74B9C9A50164ULL, |
|||
std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>( |
|||
[](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> { |
|||
return std::make_unique<HLE_MultiDrawIndexedIndirectCount>( |
|||
maxwell3d__); |
|||
})); |
|||
builders.emplace(0xEAD26C3E2109B06BULL, |
|||
std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>( |
|||
[](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> { |
|||
return std::make_unique<HLE_MultiLayerClear>(maxwell3d__); |
|||
})); |
|||
builders.emplace(0xC713C83D8F63CCF3ULL, |
|||
std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>( |
|||
[](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> { |
|||
return std::make_unique<HLE_C713C83D8F63CCF3>(maxwell3d__); |
|||
})); |
|||
builders.emplace(0xD7333D26E0A93EDEULL, |
|||
std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>( |
|||
[](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> { |
|||
return std::make_unique<HLE_D7333D26E0A93EDE>(maxwell3d__); |
|||
})); |
|||
builders.emplace(0xEB29B2A09AA06D38ULL, |
|||
std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>( |
|||
[](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> { |
|||
return std::make_unique<HLE_BindShader>(maxwell3d__); |
|||
})); |
|||
builders.emplace(0xDB1341DBEB4C8AF7ULL, |
|||
std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>( |
|||
[](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> { |
|||
return std::make_unique<HLE_SetRasterBoundingBox>(maxwell3d__); |
|||
})); |
|||
builders.emplace(0x6C97861D891EDf7EULL, |
|||
std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>( |
|||
[](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> { |
|||
return std::make_unique<HLE_ClearConstBuffer<0x5F00>>(maxwell3d__); |
|||
})); |
|||
builders.emplace(0xD246FDDF3A6173D7ULL, |
|||
std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>( |
|||
[](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> { |
|||
return std::make_unique<HLE_ClearConstBuffer<0x7000>>(maxwell3d__); |
|||
})); |
|||
builders.emplace(0xEE4D0004BEC8ECF4ULL, |
|||
std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>( |
|||
[](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> { |
|||
return std::make_unique<HLE_ClearMemory>(maxwell3d__); |
|||
})); |
|||
builders.emplace(0xFC0CF27F5FFAA661ULL, |
|||
std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>( |
|||
[](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> { |
|||
return std::make_unique<HLE_TransformFeedbackSetup>(maxwell3d__); |
|||
})); |
|||
builders.emplace(0xB5F74EDB717278ECULL, |
|||
std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>( |
|||
[](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> { |
|||
return std::make_unique<HLE_DrawIndirectByteCount>(maxwell3d__); |
|||
})); |
|||
} |
|||
|
|||
HLEMacro::~HLEMacro() = default; |
|||
|
|||
std::unique_ptr<CachedMacro> HLEMacro::GetHLEProgram(u64 hash) const { |
|||
const auto it = builders.find(hash); |
|||
if (it == builders.end()) { |
|||
return nullptr; |
|||
} |
|||
return it->second(maxwell3d); |
|||
} |
|||
|
|||
} // namespace Tegra
|
|||
@ -1,33 +0,0 @@ |
|||
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project |
|||
// SPDX-License-Identifier: GPL-2.0-or-later |
|||
|
|||
#pragma once |
|||
|
|||
#include <functional> |
|||
#include <memory> |
|||
#include <unordered_map> |
|||
|
|||
#include "common/common_types.h" |
|||
|
|||
namespace Tegra { |
|||
|
|||
namespace Engines { |
|||
class Maxwell3D; |
|||
} |
|||
|
|||
class HLEMacro { |
|||
public: |
|||
explicit HLEMacro(Engines::Maxwell3D& maxwell3d_); |
|||
~HLEMacro(); |
|||
|
|||
// Allocates and returns a cached macro if the hash matches a known function. |
|||
// Returns nullptr otherwise. |
|||
[[nodiscard]] std::unique_ptr<CachedMacro> GetHLEProgram(u64 hash) const; |
|||
|
|||
private: |
|||
Engines::Maxwell3D& maxwell3d; |
|||
std::unordered_map<u64, std::function<std::unique_ptr<CachedMacro>(Engines::Maxwell3D&)>> |
|||
builders; |
|||
}; |
|||
|
|||
} // namespace Tegra |
|||
@ -1,362 +0,0 @@ |
|||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
|||
// SPDX-License-Identifier: GPL-3.0-or-later
|
|||
|
|||
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
|
|||
// SPDX-License-Identifier: GPL-2.0-or-later
|
|||
|
|||
#include <array>
|
|||
#include <optional>
|
|||
|
|||
#include "common/assert.h"
|
|||
#include "common/logging/log.h"
|
|||
#include "video_core/engines/maxwell_3d.h"
|
|||
#include "video_core/macro/macro_interpreter.h"
|
|||
|
|||
namespace Tegra { |
|||
namespace { |
|||
class MacroInterpreterImpl final : public CachedMacro { |
|||
public: |
|||
explicit MacroInterpreterImpl(Engines::Maxwell3D& maxwell3d_, const std::vector<u32>& code_) |
|||
: maxwell3d{maxwell3d_}, code{code_} {} |
|||
|
|||
void Execute(const std::vector<u32>& params, u32 method) override; |
|||
|
|||
private: |
|||
/// Resets the execution engine state, zeroing registers, etc.
|
|||
void Reset(); |
|||
|
|||
/**
|
|||
* Executes a single macro instruction located at the current program counter. Returns whether |
|||
* the interpreter should keep running. |
|||
* |
|||
* @param is_delay_slot Whether the current step is being executed due to a delay slot in a |
|||
* previous instruction. |
|||
*/ |
|||
bool Step(bool is_delay_slot); |
|||
|
|||
/// Calculates the result of an ALU operation. src_a OP src_b;
|
|||
u32 GetALUResult(Macro::ALUOperation operation, u32 src_a, u32 src_b); |
|||
|
|||
/// Performs the result operation on the input result and stores it in the specified register
|
|||
/// (if necessary).
|
|||
void ProcessResult(Macro::ResultOperation operation, u32 reg, u32 result); |
|||
|
|||
/// Evaluates the branch condition and returns whether the branch should be taken or not.
|
|||
bool EvaluateBranchCondition(Macro::BranchCondition cond, u32 value) const; |
|||
|
|||
/// Reads an opcode at the current program counter location.
|
|||
Macro::Opcode GetOpcode() const; |
|||
|
|||
/// Returns the specified register's value. Register 0 is hardcoded to always return 0.
|
|||
u32 GetRegister(u32 register_id) const; |
|||
|
|||
/// Sets the register to the input value.
|
|||
void SetRegister(u32 register_id, u32 value); |
|||
|
|||
/// Sets the method address to use for the next Send instruction.
|
|||
void SetMethodAddress(u32 address); |
|||
|
|||
/// Calls a GPU Engine method with the input parameter.
|
|||
void Send(u32 value); |
|||
|
|||
/// Reads a GPU register located at the method address.
|
|||
u32 Read(u32 method) const; |
|||
|
|||
/// Returns the next parameter in the parameter queue.
|
|||
u32 FetchParameter(); |
|||
|
|||
Engines::Maxwell3D& maxwell3d; |
|||
|
|||
/// Current program counter
|
|||
u32 pc{}; |
|||
/// Program counter to execute at after the delay slot is executed.
|
|||
std::optional<u32> delayed_pc; |
|||
|
|||
/// General purpose macro registers.
|
|||
std::array<u32, Macro::NUM_MACRO_REGISTERS> registers = {}; |
|||
|
|||
/// Method address to use for the next Send instruction.
|
|||
Macro::MethodAddress method_address = {}; |
|||
|
|||
/// Input parameters of the current macro.
|
|||
std::unique_ptr<u32[]> parameters; |
|||
std::size_t num_parameters = 0; |
|||
std::size_t parameters_capacity = 0; |
|||
/// Index of the next parameter that will be fetched by the 'parm' instruction.
|
|||
u32 next_parameter_index = 0; |
|||
|
|||
bool carry_flag = false; |
|||
const std::vector<u32>& code; |
|||
}; |
|||
|
|||
void MacroInterpreterImpl::Execute(const std::vector<u32>& params, u32 method) { |
|||
Reset(); |
|||
|
|||
registers[1] = params[0]; |
|||
num_parameters = params.size(); |
|||
|
|||
if (num_parameters > parameters_capacity) { |
|||
parameters_capacity = num_parameters; |
|||
parameters = std::make_unique<u32[]>(num_parameters); |
|||
} |
|||
std::memcpy(parameters.get(), params.data(), num_parameters * sizeof(u32)); |
|||
|
|||
// Execute the code until we hit an exit condition.
|
|||
bool keep_executing = true; |
|||
while (keep_executing) { |
|||
keep_executing = Step(false); |
|||
} |
|||
|
|||
// Assert the the macro used all the input parameters
|
|||
ASSERT(next_parameter_index == num_parameters); |
|||
} |
|||
|
|||
void MacroInterpreterImpl::Reset() { |
|||
registers = {}; |
|||
pc = 0; |
|||
delayed_pc = {}; |
|||
method_address.raw = 0; |
|||
num_parameters = 0; |
|||
// The next parameter index starts at 1, because $r1 already has the value of the first
|
|||
// parameter.
|
|||
next_parameter_index = 1; |
|||
carry_flag = false; |
|||
} |
|||
|
|||
bool MacroInterpreterImpl::Step(bool is_delay_slot) { |
|||
u32 base_address = pc; |
|||
|
|||
Macro::Opcode opcode = GetOpcode(); |
|||
pc += 4; |
|||
|
|||
// Update the program counter if we were delayed
|
|||
if (delayed_pc) { |
|||
ASSERT(is_delay_slot); |
|||
pc = *delayed_pc; |
|||
delayed_pc = {}; |
|||
} |
|||
|
|||
switch (opcode.operation) { |
|||
case Macro::Operation::ALU: { |
|||
u32 result = GetALUResult(opcode.alu_operation, GetRegister(opcode.src_a), |
|||
GetRegister(opcode.src_b)); |
|||
ProcessResult(opcode.result_operation, opcode.dst, result); |
|||
break; |
|||
} |
|||
case Macro::Operation::AddImmediate: { |
|||
ProcessResult(opcode.result_operation, opcode.dst, |
|||
GetRegister(opcode.src_a) + opcode.immediate); |
|||
break; |
|||
} |
|||
case Macro::Operation::ExtractInsert: { |
|||
u32 dst = GetRegister(opcode.src_a); |
|||
u32 src = GetRegister(opcode.src_b); |
|||
|
|||
src = (src >> opcode.bf_src_bit) & opcode.GetBitfieldMask(); |
|||
dst &= ~(opcode.GetBitfieldMask() << opcode.bf_dst_bit); |
|||
dst |= src << opcode.bf_dst_bit; |
|||
ProcessResult(opcode.result_operation, opcode.dst, dst); |
|||
break; |
|||
} |
|||
case Macro::Operation::ExtractShiftLeftImmediate: { |
|||
u32 dst = GetRegister(opcode.src_a); |
|||
u32 src = GetRegister(opcode.src_b); |
|||
|
|||
u32 result = ((src >> dst) & opcode.GetBitfieldMask()) << opcode.bf_dst_bit; |
|||
|
|||
ProcessResult(opcode.result_operation, opcode.dst, result); |
|||
break; |
|||
} |
|||
case Macro::Operation::ExtractShiftLeftRegister: { |
|||
u32 dst = GetRegister(opcode.src_a); |
|||
u32 src = GetRegister(opcode.src_b); |
|||
|
|||
u32 result = ((src >> opcode.bf_src_bit) & opcode.GetBitfieldMask()) << dst; |
|||
|
|||
ProcessResult(opcode.result_operation, opcode.dst, result); |
|||
break; |
|||
} |
|||
case Macro::Operation::Read: { |
|||
u32 result = Read(GetRegister(opcode.src_a) + opcode.immediate); |
|||
ProcessResult(opcode.result_operation, opcode.dst, result); |
|||
break; |
|||
} |
|||
case Macro::Operation::Branch: { |
|||
ASSERT_MSG(!is_delay_slot, "Executing a branch in a delay slot is not valid"); |
|||
u32 value = GetRegister(opcode.src_a); |
|||
bool taken = EvaluateBranchCondition(opcode.branch_condition, value); |
|||
if (taken) { |
|||
// Ignore the delay slot if the branch has the annul bit.
|
|||
if (opcode.branch_annul) { |
|||
pc = base_address + opcode.GetBranchTarget(); |
|||
return true; |
|||
} |
|||
|
|||
delayed_pc = base_address + opcode.GetBranchTarget(); |
|||
// Execute one more instruction due to the delay slot.
|
|||
return Step(true); |
|||
} |
|||
break; |
|||
} |
|||
default: |
|||
UNIMPLEMENTED_MSG("Unimplemented macro operation {}", opcode.operation.Value()); |
|||
break; |
|||
} |
|||
|
|||
// An instruction with the Exit flag will not actually
|
|||
// cause an exit if it's executed inside a delay slot.
|
|||
if (opcode.is_exit && !is_delay_slot) { |
|||
// Exit has a delay slot, execute the next instruction
|
|||
Step(true); |
|||
return false; |
|||
} |
|||
|
|||
return true; |
|||
} |
|||
|
|||
u32 MacroInterpreterImpl::GetALUResult(Macro::ALUOperation operation, u32 src_a, u32 src_b) { |
|||
switch (operation) { |
|||
case Macro::ALUOperation::Add: { |
|||
const u64 result{static_cast<u64>(src_a) + src_b}; |
|||
carry_flag = result > 0xffffffff; |
|||
return static_cast<u32>(result); |
|||
} |
|||
case Macro::ALUOperation::AddWithCarry: { |
|||
const u64 result{static_cast<u64>(src_a) + src_b + (carry_flag ? 1ULL : 0ULL)}; |
|||
carry_flag = result > 0xffffffff; |
|||
return static_cast<u32>(result); |
|||
} |
|||
case Macro::ALUOperation::Subtract: { |
|||
const u64 result{static_cast<u64>(src_a) - src_b}; |
|||
carry_flag = result < 0x100000000; |
|||
return static_cast<u32>(result); |
|||
} |
|||
case Macro::ALUOperation::SubtractWithBorrow: { |
|||
const u64 result{static_cast<u64>(src_a) - src_b - (carry_flag ? 0ULL : 1ULL)}; |
|||
carry_flag = result < 0x100000000; |
|||
return static_cast<u32>(result); |
|||
} |
|||
case Macro::ALUOperation::Xor: |
|||
return src_a ^ src_b; |
|||
case Macro::ALUOperation::Or: |
|||
return src_a | src_b; |
|||
case Macro::ALUOperation::And: |
|||
return src_a & src_b; |
|||
case Macro::ALUOperation::AndNot: |
|||
return src_a & ~src_b; |
|||
case Macro::ALUOperation::Nand: |
|||
return ~(src_a & src_b); |
|||
|
|||
default: |
|||
UNIMPLEMENTED_MSG("Unimplemented ALU operation {}", operation); |
|||
return 0; |
|||
} |
|||
} |
|||
|
|||
void MacroInterpreterImpl::ProcessResult(Macro::ResultOperation operation, u32 reg, u32 result) { |
|||
switch (operation) { |
|||
case Macro::ResultOperation::IgnoreAndFetch: |
|||
// Fetch parameter and ignore result.
|
|||
SetRegister(reg, FetchParameter()); |
|||
break; |
|||
case Macro::ResultOperation::Move: |
|||
// Move result.
|
|||
SetRegister(reg, result); |
|||
break; |
|||
case Macro::ResultOperation::MoveAndSetMethod: |
|||
// Move result and use as Method Address.
|
|||
SetRegister(reg, result); |
|||
SetMethodAddress(result); |
|||
break; |
|||
case Macro::ResultOperation::FetchAndSend: |
|||
// Fetch parameter and send result.
|
|||
SetRegister(reg, FetchParameter()); |
|||
Send(result); |
|||
break; |
|||
case Macro::ResultOperation::MoveAndSend: |
|||
// Move and send result.
|
|||
SetRegister(reg, result); |
|||
Send(result); |
|||
break; |
|||
case Macro::ResultOperation::FetchAndSetMethod: |
|||
// Fetch parameter and use result as Method Address.
|
|||
SetRegister(reg, FetchParameter()); |
|||
SetMethodAddress(result); |
|||
break; |
|||
case Macro::ResultOperation::MoveAndSetMethodFetchAndSend: |
|||
// Move result and use as Method Address, then fetch and send parameter.
|
|||
SetRegister(reg, result); |
|||
SetMethodAddress(result); |
|||
Send(FetchParameter()); |
|||
break; |
|||
case Macro::ResultOperation::MoveAndSetMethodSend: |
|||
// Move result and use as Method Address, then send bits 12:17 of result.
|
|||
SetRegister(reg, result); |
|||
SetMethodAddress(result); |
|||
Send((result >> 12) & 0b111111); |
|||
break; |
|||
default: |
|||
UNIMPLEMENTED_MSG("Unimplemented result operation {}", operation); |
|||
break; |
|||
} |
|||
} |
|||
|
|||
bool MacroInterpreterImpl::EvaluateBranchCondition(Macro::BranchCondition cond, u32 value) const { |
|||
switch (cond) { |
|||
case Macro::BranchCondition::Zero: |
|||
return value == 0; |
|||
case Macro::BranchCondition::NotZero: |
|||
return value != 0; |
|||
} |
|||
UNREACHABLE(); |
|||
} |
|||
|
|||
Macro::Opcode MacroInterpreterImpl::GetOpcode() const { |
|||
ASSERT((pc % sizeof(u32)) == 0); |
|||
ASSERT(pc < code.size() * sizeof(u32)); |
|||
return {code[pc / sizeof(u32)]}; |
|||
} |
|||
|
|||
u32 MacroInterpreterImpl::GetRegister(u32 register_id) const { |
|||
return registers.at(register_id); |
|||
} |
|||
|
|||
void MacroInterpreterImpl::SetRegister(u32 register_id, u32 value) { |
|||
// Register 0 is hardwired as the zero register.
|
|||
// Ensure no writes to it actually occur.
|
|||
if (register_id == 0) { |
|||
return; |
|||
} |
|||
|
|||
registers.at(register_id) = value; |
|||
} |
|||
|
|||
void MacroInterpreterImpl::SetMethodAddress(u32 address) { |
|||
method_address.raw = address; |
|||
} |
|||
|
|||
void MacroInterpreterImpl::Send(u32 value) { |
|||
maxwell3d.CallMethod(method_address.address, value, true); |
|||
// Increment the method address by the method increment.
|
|||
method_address.address.Assign(method_address.address.Value() + |
|||
method_address.increment.Value()); |
|||
} |
|||
|
|||
u32 MacroInterpreterImpl::Read(u32 method) const { |
|||
return maxwell3d.GetRegisterValue(method); |
|||
} |
|||
|
|||
u32 MacroInterpreterImpl::FetchParameter() { |
|||
ASSERT(next_parameter_index < num_parameters); |
|||
return parameters[next_parameter_index++]; |
|||
} |
|||
} // Anonymous namespace
|
|||
|
|||
MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d_) |
|||
: MacroEngine{maxwell3d_}, maxwell3d{maxwell3d_} {} |
|||
|
|||
std::unique_ptr<CachedMacro> MacroInterpreter::Compile(const std::vector<u32>& code) { |
|||
return std::make_unique<MacroInterpreterImpl>(maxwell3d, code); |
|||
} |
|||
|
|||
} // namespace Tegra
|
|||
@ -1,27 +0,0 @@ |
|||
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project |
|||
// SPDX-License-Identifier: GPL-2.0-or-later |
|||
|
|||
#pragma once |
|||
|
|||
#include <vector> |
|||
|
|||
#include "common/common_types.h" |
|||
#include "video_core/macro/macro.h" |
|||
|
|||
namespace Tegra { |
|||
namespace Engines { |
|||
class Maxwell3D; |
|||
} |
|||
|
|||
class MacroInterpreter final : public MacroEngine { |
|||
public: |
|||
explicit MacroInterpreter(Engines::Maxwell3D& maxwell3d_); |
|||
|
|||
protected: |
|||
std::unique_ptr<CachedMacro> Compile(const std::vector<u32>& code) override; |
|||
|
|||
private: |
|||
Engines::Maxwell3D& maxwell3d; |
|||
}; |
|||
|
|||
} // namespace Tegra |
|||
@ -1,678 +0,0 @@ |
|||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
|||
// SPDX-License-Identifier: GPL-3.0-or-later
|
|||
|
|||
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
|
|||
// SPDX-License-Identifier: GPL-2.0-or-later
|
|||
|
|||
#include <array>
|
|||
#include <bitset>
|
|||
#include <optional>
|
|||
|
|||
#include <xbyak/xbyak.h>
|
|||
|
|||
#include "common/assert.h"
|
|||
#include "common/bit_field.h"
|
|||
#include "common/logging/log.h"
|
|||
#include "common/x64/xbyak_abi.h"
|
|||
#include "common/x64/xbyak_util.h"
|
|||
#include "video_core/engines/maxwell_3d.h"
|
|||
#include "video_core/macro/macro_interpreter.h"
|
|||
#include "video_core/macro/macro_jit_x64.h"
|
|||
|
|||
namespace Tegra { |
|||
namespace { |
|||
constexpr Xbyak::Reg64 STATE = Xbyak::util::rbx; |
|||
constexpr Xbyak::Reg32 RESULT = Xbyak::util::r10d; |
|||
constexpr Xbyak::Reg64 MAX_PARAMETER = Xbyak::util::r11; |
|||
constexpr Xbyak::Reg64 PARAMETERS = Xbyak::util::r12; |
|||
constexpr Xbyak::Reg32 METHOD_ADDRESS = Xbyak::util::r14d; |
|||
constexpr Xbyak::Reg64 BRANCH_HOLDER = Xbyak::util::r15; |
|||
|
|||
constexpr std::bitset<32> PERSISTENT_REGISTERS = Common::X64::BuildRegSet({ |
|||
STATE, |
|||
RESULT, |
|||
MAX_PARAMETER, |
|||
PARAMETERS, |
|||
METHOD_ADDRESS, |
|||
BRANCH_HOLDER, |
|||
}); |
|||
|
|||
// Arbitrarily chosen based on current booting games.
|
|||
constexpr size_t MAX_CODE_SIZE = 0x10000; |
|||
|
|||
std::bitset<32> PersistentCallerSavedRegs() { |
|||
return PERSISTENT_REGISTERS & Common::X64::ABI_ALL_CALLER_SAVED; |
|||
} |
|||
|
|||
/// @brief Must enforce W^X constraints, as we yet don't havea global "NO_EXECUTE" support flag
|
|||
/// the speed loss is minimal, and in fact may be negligible, however for your peace of mind
|
|||
/// I simply included known OSes whom had W^X issues
|
|||
#if defined(__OpenBSD__) || defined(__NetBSD__) || defined(__DragonFly__)
|
|||
static const auto default_cg_mode = Xbyak::DontSetProtectRWE; |
|||
#else
|
|||
static const auto default_cg_mode = nullptr; //Allow RWE
|
|||
#endif
|
|||
|
|||
class MacroJITx64Impl final : public Xbyak::CodeGenerator, public CachedMacro { |
|||
public: |
|||
explicit MacroJITx64Impl(Engines::Maxwell3D& maxwell3d_, const std::vector<u32>& code_) |
|||
: Xbyak::CodeGenerator(MAX_CODE_SIZE, default_cg_mode) |
|||
, code{code_}, maxwell3d{maxwell3d_} { |
|||
Compile(); |
|||
} |
|||
|
|||
void Execute(const std::vector<u32>& parameters, u32 method) override; |
|||
|
|||
void Compile_ALU(Macro::Opcode opcode); |
|||
void Compile_AddImmediate(Macro::Opcode opcode); |
|||
void Compile_ExtractInsert(Macro::Opcode opcode); |
|||
void Compile_ExtractShiftLeftImmediate(Macro::Opcode opcode); |
|||
void Compile_ExtractShiftLeftRegister(Macro::Opcode opcode); |
|||
void Compile_Read(Macro::Opcode opcode); |
|||
void Compile_Branch(Macro::Opcode opcode); |
|||
|
|||
private: |
|||
void Optimizer_ScanFlags(); |
|||
|
|||
void Compile(); |
|||
bool Compile_NextInstruction(); |
|||
|
|||
Xbyak::Reg32 Compile_FetchParameter(); |
|||
Xbyak::Reg32 Compile_GetRegister(u32 index, Xbyak::Reg32 dst); |
|||
|
|||
void Compile_ProcessResult(Macro::ResultOperation operation, u32 reg); |
|||
void Compile_Send(Xbyak::Reg32 value); |
|||
|
|||
Macro::Opcode GetOpCode() const; |
|||
|
|||
struct JITState { |
|||
Engines::Maxwell3D* maxwell3d{}; |
|||
std::array<u32, Macro::NUM_MACRO_REGISTERS> registers{}; |
|||
u32 carry_flag{}; |
|||
}; |
|||
static_assert(offsetof(JITState, maxwell3d) == 0, "Maxwell3D is not at 0x0"); |
|||
using ProgramType = void (*)(JITState*, const u32*, const u32*); |
|||
|
|||
struct OptimizerState { |
|||
bool can_skip_carry{}; |
|||
bool has_delayed_pc{}; |
|||
bool zero_reg_skip{}; |
|||
bool skip_dummy_addimmediate{}; |
|||
bool optimize_for_method_move{}; |
|||
bool enable_asserts{}; |
|||
}; |
|||
OptimizerState optimizer{}; |
|||
|
|||
std::optional<Macro::Opcode> next_opcode{}; |
|||
ProgramType program{nullptr}; |
|||
|
|||
std::array<Xbyak::Label, MAX_CODE_SIZE> labels; |
|||
std::array<Xbyak::Label, MAX_CODE_SIZE> delay_skip; |
|||
Xbyak::Label end_of_code{}; |
|||
|
|||
bool is_delay_slot{}; |
|||
u32 pc{}; |
|||
|
|||
const std::vector<u32>& code; |
|||
Engines::Maxwell3D& maxwell3d; |
|||
}; |
|||
|
|||
void MacroJITx64Impl::Execute(const std::vector<u32>& parameters, u32 method) { |
|||
ASSERT_OR_EXECUTE(program != nullptr, { return; }); |
|||
JITState state{}; |
|||
state.maxwell3d = &maxwell3d; |
|||
state.registers = {}; |
|||
program(&state, parameters.data(), parameters.data() + parameters.size()); |
|||
} |
|||
|
|||
void MacroJITx64Impl::Compile_ALU(Macro::Opcode opcode) { |
|||
const bool is_a_zero = opcode.src_a == 0; |
|||
const bool is_b_zero = opcode.src_b == 0; |
|||
const bool valid_operation = !is_a_zero && !is_b_zero; |
|||
[[maybe_unused]] const bool is_move_operation = !is_a_zero && is_b_zero; |
|||
const bool has_zero_register = is_a_zero || is_b_zero; |
|||
const bool no_zero_reg_skip = opcode.alu_operation == Macro::ALUOperation::AddWithCarry || |
|||
opcode.alu_operation == Macro::ALUOperation::SubtractWithBorrow; |
|||
|
|||
Xbyak::Reg32 src_a; |
|||
Xbyak::Reg32 src_b; |
|||
|
|||
if (!optimizer.zero_reg_skip || no_zero_reg_skip) { |
|||
src_a = Compile_GetRegister(opcode.src_a, RESULT); |
|||
src_b = Compile_GetRegister(opcode.src_b, eax); |
|||
} else { |
|||
if (!is_a_zero) { |
|||
src_a = Compile_GetRegister(opcode.src_a, RESULT); |
|||
} |
|||
if (!is_b_zero) { |
|||
src_b = Compile_GetRegister(opcode.src_b, eax); |
|||
} |
|||
} |
|||
|
|||
bool has_emitted = false; |
|||
|
|||
switch (opcode.alu_operation) { |
|||
case Macro::ALUOperation::Add: |
|||
if (optimizer.zero_reg_skip) { |
|||
if (valid_operation) { |
|||
add(src_a, src_b); |
|||
} |
|||
} else { |
|||
add(src_a, src_b); |
|||
} |
|||
|
|||
if (!optimizer.can_skip_carry) { |
|||
setc(byte[STATE + offsetof(JITState, carry_flag)]); |
|||
} |
|||
break; |
|||
case Macro::ALUOperation::AddWithCarry: |
|||
bt(dword[STATE + offsetof(JITState, carry_flag)], 0); |
|||
adc(src_a, src_b); |
|||
setc(byte[STATE + offsetof(JITState, carry_flag)]); |
|||
break; |
|||
case Macro::ALUOperation::Subtract: |
|||
if (optimizer.zero_reg_skip) { |
|||
if (valid_operation) { |
|||
sub(src_a, src_b); |
|||
has_emitted = true; |
|||
} |
|||
} else { |
|||
sub(src_a, src_b); |
|||
has_emitted = true; |
|||
} |
|||
if (!optimizer.can_skip_carry && has_emitted) { |
|||
setc(byte[STATE + offsetof(JITState, carry_flag)]); |
|||
} |
|||
break; |
|||
case Macro::ALUOperation::SubtractWithBorrow: |
|||
bt(dword[STATE + offsetof(JITState, carry_flag)], 0); |
|||
sbb(src_a, src_b); |
|||
setc(byte[STATE + offsetof(JITState, carry_flag)]); |
|||
break; |
|||
case Macro::ALUOperation::Xor: |
|||
if (optimizer.zero_reg_skip) { |
|||
if (valid_operation) { |
|||
xor_(src_a, src_b); |
|||
} |
|||
} else { |
|||
xor_(src_a, src_b); |
|||
} |
|||
break; |
|||
case Macro::ALUOperation::Or: |
|||
if (optimizer.zero_reg_skip) { |
|||
if (valid_operation) { |
|||
or_(src_a, src_b); |
|||
} |
|||
} else { |
|||
or_(src_a, src_b); |
|||
} |
|||
break; |
|||
case Macro::ALUOperation::And: |
|||
if (optimizer.zero_reg_skip) { |
|||
if (!has_zero_register) { |
|||
and_(src_a, src_b); |
|||
} |
|||
} else { |
|||
and_(src_a, src_b); |
|||
} |
|||
break; |
|||
case Macro::ALUOperation::AndNot: |
|||
if (optimizer.zero_reg_skip) { |
|||
if (!is_a_zero) { |
|||
not_(src_b); |
|||
and_(src_a, src_b); |
|||
} |
|||
} else { |
|||
not_(src_b); |
|||
and_(src_a, src_b); |
|||
} |
|||
break; |
|||
case Macro::ALUOperation::Nand: |
|||
if (optimizer.zero_reg_skip) { |
|||
if (!is_a_zero) { |
|||
and_(src_a, src_b); |
|||
not_(src_a); |
|||
} |
|||
} else { |
|||
and_(src_a, src_b); |
|||
not_(src_a); |
|||
} |
|||
break; |
|||
default: |
|||
UNIMPLEMENTED_MSG("Unimplemented ALU operation {}", opcode.alu_operation.Value()); |
|||
break; |
|||
} |
|||
Compile_ProcessResult(opcode.result_operation, opcode.dst); |
|||
} |
|||
|
|||
void MacroJITx64Impl::Compile_AddImmediate(Macro::Opcode opcode) { |
|||
if (optimizer.skip_dummy_addimmediate) { |
|||
// Games tend to use this as an exit instruction placeholder. It's to encode an instruction
|
|||
// without doing anything. In our case we can just not emit anything.
|
|||
if (opcode.result_operation == Macro::ResultOperation::Move && opcode.dst == 0) { |
|||
return; |
|||
} |
|||
} |
|||
// Check for redundant moves
|
|||
if (optimizer.optimize_for_method_move && |
|||
opcode.result_operation == Macro::ResultOperation::MoveAndSetMethod) { |
|||
if (next_opcode.has_value()) { |
|||
const auto next = *next_opcode; |
|||
if (next.result_operation == Macro::ResultOperation::MoveAndSetMethod && |
|||
opcode.dst == next.dst) { |
|||
return; |
|||
} |
|||
} |
|||
} |
|||
if (optimizer.zero_reg_skip && opcode.src_a == 0) { |
|||
if (opcode.immediate == 0) { |
|||
xor_(RESULT, RESULT); |
|||
} else { |
|||
mov(RESULT, opcode.immediate); |
|||
} |
|||
} else { |
|||
auto result = Compile_GetRegister(opcode.src_a, RESULT); |
|||
if (opcode.immediate > 2) { |
|||
add(result, opcode.immediate); |
|||
} else if (opcode.immediate == 1) { |
|||
inc(result); |
|||
} else if (opcode.immediate < 0) { |
|||
sub(result, opcode.immediate * -1); |
|||
} |
|||
} |
|||
Compile_ProcessResult(opcode.result_operation, opcode.dst); |
|||
} |
|||
|
|||
void MacroJITx64Impl::Compile_ExtractInsert(Macro::Opcode opcode) { |
|||
auto dst = Compile_GetRegister(opcode.src_a, RESULT); |
|||
auto src = Compile_GetRegister(opcode.src_b, eax); |
|||
|
|||
const u32 mask = ~(opcode.GetBitfieldMask() << opcode.bf_dst_bit); |
|||
and_(dst, mask); |
|||
shr(src, opcode.bf_src_bit); |
|||
and_(src, opcode.GetBitfieldMask()); |
|||
shl(src, opcode.bf_dst_bit); |
|||
or_(dst, src); |
|||
|
|||
Compile_ProcessResult(opcode.result_operation, opcode.dst); |
|||
} |
|||
|
|||
void MacroJITx64Impl::Compile_ExtractShiftLeftImmediate(Macro::Opcode opcode) { |
|||
const auto dst = Compile_GetRegister(opcode.src_a, ecx); |
|||
const auto src = Compile_GetRegister(opcode.src_b, RESULT); |
|||
|
|||
shr(src, dst.cvt8()); |
|||
and_(src, opcode.GetBitfieldMask()); |
|||
shl(src, opcode.bf_dst_bit); |
|||
|
|||
Compile_ProcessResult(opcode.result_operation, opcode.dst); |
|||
} |
|||
|
|||
void MacroJITx64Impl::Compile_ExtractShiftLeftRegister(Macro::Opcode opcode) { |
|||
const auto dst = Compile_GetRegister(opcode.src_a, ecx); |
|||
const auto src = Compile_GetRegister(opcode.src_b, RESULT); |
|||
|
|||
shr(src, opcode.bf_src_bit); |
|||
and_(src, opcode.GetBitfieldMask()); |
|||
shl(src, dst.cvt8()); |
|||
|
|||
Compile_ProcessResult(opcode.result_operation, opcode.dst); |
|||
} |
|||
|
|||
void MacroJITx64Impl::Compile_Read(Macro::Opcode opcode) { |
|||
if (optimizer.zero_reg_skip && opcode.src_a == 0) { |
|||
if (opcode.immediate == 0) { |
|||
xor_(RESULT, RESULT); |
|||
} else { |
|||
mov(RESULT, opcode.immediate); |
|||
} |
|||
} else { |
|||
auto result = Compile_GetRegister(opcode.src_a, RESULT); |
|||
if (opcode.immediate > 2) { |
|||
add(result, opcode.immediate); |
|||
} else if (opcode.immediate == 1) { |
|||
inc(result); |
|||
} else if (opcode.immediate < 0) { |
|||
sub(result, opcode.immediate * -1); |
|||
} |
|||
} |
|||
|
|||
// Equivalent to Engines::Maxwell3D::GetRegisterValue:
|
|||
if (optimizer.enable_asserts) { |
|||
Xbyak::Label pass_range_check; |
|||
cmp(RESULT, static_cast<u32>(Engines::Maxwell3D::Regs::NUM_REGS)); |
|||
jb(pass_range_check); |
|||
int3(); |
|||
L(pass_range_check); |
|||
} |
|||
mov(rax, qword[STATE]); |
|||
mov(RESULT, |
|||
dword[rax + offsetof(Engines::Maxwell3D, regs) + |
|||
offsetof(Engines::Maxwell3D::Regs, reg_array) + RESULT.cvt64() * sizeof(u32)]); |
|||
|
|||
Compile_ProcessResult(opcode.result_operation, opcode.dst); |
|||
} |
|||
|
|||
void Send(Engines::Maxwell3D* maxwell3d, Macro::MethodAddress method_address, u32 value) { |
|||
maxwell3d->CallMethod(method_address.address, value, true); |
|||
} |
|||
|
|||
void MacroJITx64Impl::Compile_Send(Xbyak::Reg32 value) { |
|||
Common::X64::ABI_PushRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0); |
|||
mov(Common::X64::ABI_PARAM1, qword[STATE]); |
|||
mov(Common::X64::ABI_PARAM2.cvt32(), METHOD_ADDRESS); |
|||
mov(Common::X64::ABI_PARAM3.cvt32(), value); |
|||
Common::X64::CallFarFunction(*this, &Send); |
|||
Common::X64::ABI_PopRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0); |
|||
|
|||
Xbyak::Label dont_process{}; |
|||
// Get increment
|
|||
test(METHOD_ADDRESS, 0x3f000); |
|||
// If zero, method address doesn't update
|
|||
je(dont_process); |
|||
|
|||
mov(ecx, METHOD_ADDRESS); |
|||
and_(METHOD_ADDRESS, 0xfff); |
|||
shr(ecx, 12); |
|||
and_(ecx, 0x3f); |
|||
lea(eax, ptr[rcx + METHOD_ADDRESS.cvt64()]); |
|||
sal(ecx, 12); |
|||
or_(eax, ecx); |
|||
|
|||
mov(METHOD_ADDRESS, eax); |
|||
|
|||
L(dont_process); |
|||
} |
|||
|
|||
void MacroJITx64Impl::Compile_Branch(Macro::Opcode opcode) { |
|||
ASSERT_MSG(!is_delay_slot, "Executing a branch in a delay slot is not valid"); |
|||
const s32 jump_address = |
|||
static_cast<s32>(pc) + static_cast<s32>(opcode.GetBranchTarget() / sizeof(s32)); |
|||
|
|||
Xbyak::Label end; |
|||
auto value = Compile_GetRegister(opcode.src_a, eax); |
|||
cmp(value, 0); // test(value, value);
|
|||
if (optimizer.has_delayed_pc) { |
|||
switch (opcode.branch_condition) { |
|||
case Macro::BranchCondition::Zero: |
|||
jne(end, T_NEAR); |
|||
break; |
|||
case Macro::BranchCondition::NotZero: |
|||
je(end, T_NEAR); |
|||
break; |
|||
} |
|||
|
|||
if (opcode.branch_annul) { |
|||
xor_(BRANCH_HOLDER, BRANCH_HOLDER); |
|||
jmp(labels[jump_address], T_NEAR); |
|||
} else { |
|||
Xbyak::Label handle_post_exit{}; |
|||
Xbyak::Label skip{}; |
|||
jmp(skip, T_NEAR); |
|||
|
|||
L(handle_post_exit); |
|||
xor_(BRANCH_HOLDER, BRANCH_HOLDER); |
|||
jmp(labels[jump_address], T_NEAR); |
|||
|
|||
L(skip); |
|||
mov(BRANCH_HOLDER, handle_post_exit); |
|||
jmp(delay_skip[pc], T_NEAR); |
|||
} |
|||
} else { |
|||
switch (opcode.branch_condition) { |
|||
case Macro::BranchCondition::Zero: |
|||
je(labels[jump_address], T_NEAR); |
|||
break; |
|||
case Macro::BranchCondition::NotZero: |
|||
jne(labels[jump_address], T_NEAR); |
|||
break; |
|||
} |
|||
} |
|||
|
|||
L(end); |
|||
} |
|||
|
|||
void MacroJITx64Impl::Optimizer_ScanFlags() { |
|||
optimizer.can_skip_carry = true; |
|||
optimizer.has_delayed_pc = false; |
|||
for (auto raw_op : code) { |
|||
Macro::Opcode op{}; |
|||
op.raw = raw_op; |
|||
|
|||
if (op.operation == Macro::Operation::ALU) { |
|||
// Scan for any ALU operations which actually use the carry flag, if they don't exist in
|
|||
// our current code we can skip emitting the carry flag handling operations
|
|||
if (op.alu_operation == Macro::ALUOperation::AddWithCarry || |
|||
op.alu_operation == Macro::ALUOperation::SubtractWithBorrow) { |
|||
optimizer.can_skip_carry = false; |
|||
} |
|||
} |
|||
|
|||
if (op.operation == Macro::Operation::Branch) { |
|||
if (!op.branch_annul) { |
|||
optimizer.has_delayed_pc = true; |
|||
} |
|||
} |
|||
} |
|||
} |
|||
|
|||
void MacroJITx64Impl::Compile() { |
|||
labels.fill(Xbyak::Label()); |
|||
|
|||
Common::X64::ABI_PushRegistersAndAdjustStack(*this, Common::X64::ABI_ALL_CALLEE_SAVED, 8); |
|||
// JIT state
|
|||
mov(STATE, Common::X64::ABI_PARAM1); |
|||
mov(PARAMETERS, Common::X64::ABI_PARAM2); |
|||
mov(MAX_PARAMETER, Common::X64::ABI_PARAM3); |
|||
xor_(RESULT, RESULT); |
|||
xor_(METHOD_ADDRESS, METHOD_ADDRESS); |
|||
xor_(BRANCH_HOLDER, BRANCH_HOLDER); |
|||
|
|||
mov(dword[STATE + offsetof(JITState, registers) + 4], Compile_FetchParameter()); |
|||
|
|||
// Track get register for zero registers and mark it as no-op
|
|||
optimizer.zero_reg_skip = true; |
|||
|
|||
// AddImmediate tends to be used as a NOP instruction, if we detect this we can
|
|||
// completely skip the entire code path and no emit anything
|
|||
optimizer.skip_dummy_addimmediate = true; |
|||
|
|||
// SMO tends to emit a lot of unnecessary method moves, we can mitigate this by only emitting
|
|||
// one if our register isn't "dirty"
|
|||
optimizer.optimize_for_method_move = true; |
|||
|
|||
// Enable run-time assertions in JITted code
|
|||
optimizer.enable_asserts = false; |
|||
|
|||
// Check to see if we can skip emitting certain instructions
|
|||
Optimizer_ScanFlags(); |
|||
|
|||
const u32 op_count = static_cast<u32>(code.size()); |
|||
for (u32 i = 0; i < op_count; i++) { |
|||
if (i < op_count - 1) { |
|||
pc = i + 1; |
|||
next_opcode = GetOpCode(); |
|||
} else { |
|||
next_opcode = {}; |
|||
} |
|||
pc = i; |
|||
Compile_NextInstruction(); |
|||
} |
|||
|
|||
L(end_of_code); |
|||
|
|||
Common::X64::ABI_PopRegistersAndAdjustStack(*this, Common::X64::ABI_ALL_CALLEE_SAVED, 8); |
|||
ret(); |
|||
ready(); |
|||
program = getCode<ProgramType>(); |
|||
} |
|||
|
|||
bool MacroJITx64Impl::Compile_NextInstruction() { |
|||
const auto opcode = GetOpCode(); |
|||
if (labels[pc].getAddress()) { |
|||
return false; |
|||
} |
|||
|
|||
L(labels[pc]); |
|||
|
|||
switch (opcode.operation) { |
|||
case Macro::Operation::ALU: |
|||
Compile_ALU(opcode); |
|||
break; |
|||
case Macro::Operation::AddImmediate: |
|||
Compile_AddImmediate(opcode); |
|||
break; |
|||
case Macro::Operation::ExtractInsert: |
|||
Compile_ExtractInsert(opcode); |
|||
break; |
|||
case Macro::Operation::ExtractShiftLeftImmediate: |
|||
Compile_ExtractShiftLeftImmediate(opcode); |
|||
break; |
|||
case Macro::Operation::ExtractShiftLeftRegister: |
|||
Compile_ExtractShiftLeftRegister(opcode); |
|||
break; |
|||
case Macro::Operation::Read: |
|||
Compile_Read(opcode); |
|||
break; |
|||
case Macro::Operation::Branch: |
|||
Compile_Branch(opcode); |
|||
break; |
|||
default: |
|||
UNIMPLEMENTED_MSG("Unimplemented opcode {}", opcode.operation.Value()); |
|||
break; |
|||
} |
|||
|
|||
if (optimizer.has_delayed_pc) { |
|||
if (opcode.is_exit) { |
|||
mov(rax, end_of_code); |
|||
test(BRANCH_HOLDER, BRANCH_HOLDER); |
|||
cmove(BRANCH_HOLDER, rax); |
|||
// Jump to next instruction to skip delay slot check
|
|||
je(labels[pc + 1], T_NEAR); |
|||
} else { |
|||
// TODO(ogniK): Optimize delay slot branching
|
|||
Xbyak::Label no_delay_slot{}; |
|||
test(BRANCH_HOLDER, BRANCH_HOLDER); |
|||
je(no_delay_slot, T_NEAR); |
|||
mov(rax, BRANCH_HOLDER); |
|||
xor_(BRANCH_HOLDER, BRANCH_HOLDER); |
|||
jmp(rax); |
|||
L(no_delay_slot); |
|||
} |
|||
L(delay_skip[pc]); |
|||
if (opcode.is_exit) { |
|||
return false; |
|||
} |
|||
} else { |
|||
test(BRANCH_HOLDER, BRANCH_HOLDER); |
|||
jne(end_of_code, T_NEAR); |
|||
if (opcode.is_exit) { |
|||
inc(BRANCH_HOLDER); |
|||
return false; |
|||
} |
|||
} |
|||
return true; |
|||
} |
|||
|
|||
static void WarnInvalidParameter(uintptr_t parameter, uintptr_t max_parameter) { |
|||
LOG_CRITICAL(HW_GPU, |
|||
"Macro JIT: invalid parameter access 0x{:x} (0x{:x} is the last parameter)", |
|||
parameter, max_parameter - sizeof(u32)); |
|||
} |
|||
|
|||
Xbyak::Reg32 MacroJITx64Impl::Compile_FetchParameter() { |
|||
Xbyak::Label parameter_ok{}; |
|||
cmp(PARAMETERS, MAX_PARAMETER); |
|||
jb(parameter_ok, T_NEAR); |
|||
Common::X64::ABI_PushRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0); |
|||
mov(Common::X64::ABI_PARAM1, PARAMETERS); |
|||
mov(Common::X64::ABI_PARAM2, MAX_PARAMETER); |
|||
Common::X64::CallFarFunction(*this, &WarnInvalidParameter); |
|||
Common::X64::ABI_PopRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0); |
|||
L(parameter_ok); |
|||
mov(eax, dword[PARAMETERS]); |
|||
add(PARAMETERS, sizeof(u32)); |
|||
return eax; |
|||
} |
|||
|
|||
Xbyak::Reg32 MacroJITx64Impl::Compile_GetRegister(u32 index, Xbyak::Reg32 dst) { |
|||
if (index == 0) { |
|||
// Register 0 is always zero
|
|||
xor_(dst, dst); |
|||
} else { |
|||
mov(dst, dword[STATE + offsetof(JITState, registers) + index * sizeof(u32)]); |
|||
} |
|||
|
|||
return dst; |
|||
} |
|||
|
|||
void MacroJITx64Impl::Compile_ProcessResult(Macro::ResultOperation operation, u32 reg) { |
|||
const auto SetRegister = [this](u32 reg_index, const Xbyak::Reg32& result) { |
|||
// Register 0 is supposed to always return 0. NOP is implemented as a store to the zero
|
|||
// register.
|
|||
if (reg_index == 0) { |
|||
return; |
|||
} |
|||
mov(dword[STATE + offsetof(JITState, registers) + reg_index * sizeof(u32)], result); |
|||
}; |
|||
const auto SetMethodAddress = [this](const Xbyak::Reg32& reg32) { mov(METHOD_ADDRESS, reg32); }; |
|||
|
|||
switch (operation) { |
|||
case Macro::ResultOperation::IgnoreAndFetch: |
|||
SetRegister(reg, Compile_FetchParameter()); |
|||
break; |
|||
case Macro::ResultOperation::Move: |
|||
SetRegister(reg, RESULT); |
|||
break; |
|||
case Macro::ResultOperation::MoveAndSetMethod: |
|||
SetRegister(reg, RESULT); |
|||
SetMethodAddress(RESULT); |
|||
break; |
|||
case Macro::ResultOperation::FetchAndSend: |
|||
// Fetch parameter and send result.
|
|||
SetRegister(reg, Compile_FetchParameter()); |
|||
Compile_Send(RESULT); |
|||
break; |
|||
case Macro::ResultOperation::MoveAndSend: |
|||
// Move and send result.
|
|||
SetRegister(reg, RESULT); |
|||
Compile_Send(RESULT); |
|||
break; |
|||
case Macro::ResultOperation::FetchAndSetMethod: |
|||
// Fetch parameter and use result as Method Address.
|
|||
SetRegister(reg, Compile_FetchParameter()); |
|||
SetMethodAddress(RESULT); |
|||
break; |
|||
case Macro::ResultOperation::MoveAndSetMethodFetchAndSend: |
|||
// Move result and use as Method Address, then fetch and send parameter.
|
|||
SetRegister(reg, RESULT); |
|||
SetMethodAddress(RESULT); |
|||
Compile_Send(Compile_FetchParameter()); |
|||
break; |
|||
case Macro::ResultOperation::MoveAndSetMethodSend: |
|||
// Move result and use as Method Address, then send bits 12:17 of result.
|
|||
SetRegister(reg, RESULT); |
|||
SetMethodAddress(RESULT); |
|||
shr(RESULT, 12); |
|||
and_(RESULT, 0b111111); |
|||
Compile_Send(RESULT); |
|||
break; |
|||
default: |
|||
UNIMPLEMENTED_MSG("Unimplemented macro operation {}", operation); |
|||
break; |
|||
} |
|||
} |
|||
|
|||
Macro::Opcode MacroJITx64Impl::GetOpCode() const { |
|||
ASSERT(pc < code.size()); |
|||
return {code[pc]}; |
|||
} |
|||
} // Anonymous namespace
|
|||
|
|||
MacroJITx64::MacroJITx64(Engines::Maxwell3D& maxwell3d_) |
|||
: MacroEngine{maxwell3d_}, maxwell3d{maxwell3d_} {} |
|||
|
|||
std::unique_ptr<CachedMacro> MacroJITx64::Compile(const std::vector<u32>& code) { |
|||
return std::make_unique<MacroJITx64Impl>(maxwell3d, code); |
|||
} |
|||
} // namespace Tegra
|
|||
@ -1,26 +0,0 @@ |
|||
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project |
|||
// SPDX-License-Identifier: GPL-2.0-or-later |
|||
|
|||
#pragma once |
|||
|
|||
#include "common/common_types.h" |
|||
#include "video_core/macro/macro.h" |
|||
|
|||
namespace Tegra { |
|||
|
|||
namespace Engines { |
|||
class Maxwell3D; |
|||
} |
|||
|
|||
class MacroJITx64 final : public MacroEngine { |
|||
public: |
|||
explicit MacroJITx64(Engines::Maxwell3D& maxwell3d_); |
|||
|
|||
protected: |
|||
std::unique_ptr<CachedMacro> Compile(const std::vector<u32>& code) override; |
|||
|
|||
private: |
|||
Engines::Maxwell3D& maxwell3d; |
|||
}; |
|||
|
|||
} // namespace Tegra |
|||
Write
Preview
Loading…
Cancel
Save
Reference in new issue