Browse Source

macro HLE inline

lizzie/macro-hle-inline
lizzie 1 week ago
parent
commit
df742f106d
  1. 21
      src/video_core/engines/maxwell_3d.cpp
  2. 2
      src/video_core/engines/maxwell_3d.h
  3. 1078
      src/video_core/macro.cpp
  4. 165
      src/video_core/macro.h

21
src/video_core/engines/maxwell_3d.cpp

@ -1,4 +1,4 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
@ -26,8 +26,15 @@ namespace Tegra::Engines {
constexpr u32 MacroRegistersStart = 0xE00;
Maxwell3D::Maxwell3D(Core::System& system_, MemoryManager& memory_manager_)
: draw_manager{std::make_unique<DrawManager>(this)}, system{system_},
memory_manager{memory_manager_}, macro_engine{GetMacroEngine(*this)}, upload_state{memory_manager, regs.upload} {
: draw_manager{std::make_unique<DrawManager>(this)}, system{system_}
, memory_manager{memory_manager_}
#ifdef ARCHITECTURE_x86_64
, macro_engine(bool(Settings::values.disable_macro_jit))
#else
, macro_engine(maxwell3d, true);
#endif
, upload_state{memory_manager, regs.upload}
{
dirty.flags.flip();
InitializeRegisterDefaults();
execution_mask.reset();
@ -328,9 +335,9 @@ void Maxwell3D::ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argume
shadow_state.shadow_ram_control = static_cast<Regs::ShadowRamControl>(nonshadow_argument);
return;
case MAXWELL3D_REG_INDEX(load_mme.instruction_ptr):
return macro_engine->ClearCode(regs.load_mme.instruction_ptr);
return macro_engine.ClearCode(regs.load_mme.instruction_ptr);
case MAXWELL3D_REG_INDEX(load_mme.instruction):
return macro_engine->AddCode(regs.load_mme.instruction_ptr, argument);
return macro_engine.AddCode(regs.load_mme.instruction_ptr, argument);
case MAXWELL3D_REG_INDEX(load_mme.start_address):
return ProcessMacroBind(argument);
case MAXWELL3D_REG_INDEX(falcon[4]):
@ -398,7 +405,7 @@ void Maxwell3D::CallMacroMethod(u32 method, const std::vector<u32>& parameters)
((method - MacroRegistersStart) >> 1) % static_cast<u32>(macro_positions.size());
// Execute the current macro.
macro_engine->Execute(macro_positions[entry], parameters);
macro_engine.Execute(*this, macro_positions[entry], parameters);
draw_manager->DrawDeferred();
}
@ -464,7 +471,7 @@ void Maxwell3D::CallMultiMethod(u32 method, const u32* base_start, u32 amount,
}
void Maxwell3D::ProcessMacroUpload(u32 data) {
macro_engine->AddCode(regs.load_mme.instruction_ptr++, data);
macro_engine.AddCode(regs.load_mme.instruction_ptr++, data);
}
void Maxwell3D::ProcessMacroBind(u32 data) {

2
src/video_core/engines/maxwell_3d.h

@ -3203,7 +3203,7 @@ private:
std::vector<u32> macro_params;
/// Interpreter for the macro codes uploaded to the GPU.
std::optional<MacroEngine> macro_engine;
MacroEngine macro_engine;
Upload::State upload_state;

1078
src/video_core/macro.cpp
File diff suppressed because it is too large
View File

165
src/video_core/macro.h

@ -8,6 +8,7 @@
#include <memory>
#include <ankerl/unordered_dense.h>
#include <span>
#include <vector>
#include "common/bit_field.h"
#include "common/common_types.h"
@ -98,62 +99,142 @@ union MethodAddress {
} // namespace Macro
class CachedMacro {
public:
CachedMacro(Engines::Maxwell3D& maxwell3d_)
: maxwell3d{maxwell3d_}
{}
virtual ~CachedMacro() = default;
struct HLEMacro {
};
/// @note: these macros have two versions, a normal and extended version, with the extended version
/// also assigning the base vertex/instance.
struct HLE_DrawArraysIndirect final {
HLE_DrawArraysIndirect(bool extended) noexcept : extended{extended} {}
void Execute(Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method);
void Fallback(Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters);
bool extended;
};
/// @note: these macros have two versions, a normal and extended version, with the extended version
/// also assigning the base vertex/instance.
struct HLE_DrawIndexedIndirect final {
explicit HLE_DrawIndexedIndirect(bool extended) noexcept : extended{extended} {}
void Execute(Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method);
void Fallback(Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters);
bool extended;
};
struct HLE_MultiLayerClear final {
void Execute(Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method);
};
struct HLE_MultiDrawIndexedIndirectCount final {
void Execute(Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method);
void Fallback(Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters);
};
struct HLE_DrawIndirectByteCount final {
void Execute(Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method);
void Fallback(Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters);
};
struct HLE_C713C83D8F63CCF3 final {
void Execute(Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method);
};
struct HLE_D7333D26E0A93EDE final {
void Execute(Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method);
};
struct HLE_BindShader final {
void Execute(Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method);
};
struct HLE_SetRasterBoundingBox final {
void Execute(Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method);
};
struct HLE_ClearConstBuffer final {
HLE_ClearConstBuffer(size_t base_size) noexcept : base_size{base_size} {}
void Execute(Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method);
size_t base_size;
};
struct HLE_ClearMemory final {
void Execute(Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method);
std::vector<u32> zero_memory;
};
struct HLE_TransformFeedbackSetup final {
void Execute(Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method);
};
struct MacroInterpreterImpl final {
MacroInterpreterImpl() {}
MacroInterpreterImpl(std::span<const u32> code_) : code{code_} {}
void Execute(Engines::Maxwell3D& maxwell3d, std::span<const u32> params, u32 method);
void Reset();
bool Step(Engines::Maxwell3D& maxwell3d, bool is_delay_slot);
u32 GetALUResult(Macro::ALUOperation operation, u32 src_a, u32 src_b);
void ProcessResult(Engines::Maxwell3D& maxwell3d, Macro::ResultOperation operation, u32 reg, u32 result);
bool EvaluateBranchCondition(Macro::BranchCondition cond, u32 value) const;
Macro::Opcode GetOpcode() const;
u32 GetRegister(u32 register_id) const;
void SetRegister(u32 register_id, u32 value);
/// Sets the method address to use for the next Send instruction.
[[nodiscard]] inline void SetMethodAddress(u32 address) noexcept {
method_address.raw = address;
}
void Send(Engines::Maxwell3D& maxwell3d, u32 value);
u32 Read(Engines::Maxwell3D& maxwell3d, u32 method) const;
u32 FetchParameter();
/// General purpose macro registers.
std::array<u32, Macro::NUM_MACRO_REGISTERS> registers = {};
/// Input parameters of the current macro.
std::vector<u32> parameters;
std::span<const u32> code;
/// Program counter to execute at after the delay slot is executed.
std::optional<u32> delayed_pc;
/// Method address to use for the next Send instruction.
Macro::MethodAddress method_address = {};
/// Current program counter
u32 pc{};
/// Index of the next parameter that will be fetched by the 'parm' instruction.
u32 next_parameter_index = 0;
bool carry_flag = false;
};
struct DynamicCachedMacro {
virtual ~DynamicCachedMacro() = default;
/// Executes the macro code with the specified input parameters.
/// @param parameters The parameters of the macro
/// @param method The method to execute
virtual void Execute(const std::vector<u32>& parameters, u32 method) = 0;
Engines::Maxwell3D& maxwell3d;
};
class HLEMacro {
public:
explicit HLEMacro(Engines::Maxwell3D& maxwell3d_);
~HLEMacro();
// Allocates and returns a cached macro if the hash matches a known function.
// Returns nullptr otherwise.
[[nodiscard]] std::unique_ptr<CachedMacro> GetHLEProgram(u64 hash) const;
private:
Engines::Maxwell3D& maxwell3d;
virtual void Execute(Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, u32 method) = 0;
};
class MacroEngine {
public:
explicit MacroEngine(Engines::Maxwell3D& maxwell3d, bool is_interpreted);
~MacroEngine();
using AnyCachedMacro = std::variant<
std::monostate,
HLEMacro,
HLE_DrawArraysIndirect,
HLE_DrawIndexedIndirect,
HLE_MultiDrawIndexedIndirectCount,
HLE_MultiLayerClear,
HLE_C713C83D8F63CCF3,
HLE_D7333D26E0A93EDE,
HLE_BindShader,
HLE_SetRasterBoundingBox,
HLE_ClearConstBuffer,
HLE_ClearMemory,
HLE_TransformFeedbackSetup,
HLE_DrawIndirectByteCount,
MacroInterpreterImpl,
// Used for JIT x86 macro
std::unique_ptr<DynamicCachedMacro>
>;
struct MacroEngine {
MacroEngine(bool is_interpreted) noexcept : is_interpreted{is_interpreted} {}
// Store the uploaded macro code to compile them when they're called.
void AddCode(u32 method, u32 data);
inline void AddCode(u32 method, u32 data) noexcept {
uploaded_macro_code[method].push_back(data);
}
// Clear the code associated with a method.
void ClearCode(u32 method);
inline void ClearCode(u32 method) noexcept {
macro_cache.erase(method);
uploaded_macro_code.erase(method);
}
// Compiles the macro if its not in the cache, and executes the compiled macro
void Execute(u32 method, const std::vector<u32>& parameters);
protected:
std::unique_ptr<CachedMacro> Compile(const std::vector<u32>& code);
private:
void Execute(Engines::Maxwell3D& maxwell3d, u32 method, std::span<const u32> parameters);
AnyCachedMacro Compile(Engines::Maxwell3D& maxwell3d, std::span<const u32> code);
struct CacheInfo {
std::unique_ptr<CachedMacro> lle_program{};
std::unique_ptr<CachedMacro> hle_program{};
AnyCachedMacro program;
u64 hash{};
bool has_hle_program{};
};
ankerl::unordered_dense::map<u32, CacheInfo> macro_cache;
ankerl::unordered_dense::map<u32, std::vector<u32>> uploaded_macro_code;
std::optional<HLEMacro> hle_macros;
Engines::Maxwell3D& maxwell3d;
bool is_interpreted;
};
std::optional<MacroEngine> GetMacroEngine(Engines::Maxwell3D& maxwell3d);
} // namespace Tegra
Loading…
Cancel
Save