Browse Source

maxwell macros

lizzie/inline-gpu-works1
lizzie 1 month ago
parent
commit
ed48efec20
  1. 10
      src/video_core/control/channel_state.cpp
  2. 6
      src/video_core/dma_pusher.cpp
  3. 6
      src/video_core/engines/maxwell_3d.cpp
  4. 2
      src/video_core/engines/maxwell_3d.h
  5. 338
      src/video_core/macro.cpp
  6. 50
      src/video_core/macro.h

10
src/video_core/control/channel_state.cpp

@ -18,12 +18,12 @@
namespace Tegra::Control { namespace Tegra::Control {
ChannelState::Payload::Payload(Core::System& system, MemoryManager& memory_manager, ChannelState& channel_state) ChannelState::Payload::Payload(Core::System& system, MemoryManager& memory_manager, ChannelState& channel_state)
: maxwell_3d(system, memory_manager)
: maxwell_3d(memory_manager)
, fermi_2d(memory_manager) , fermi_2d(memory_manager)
, kepler_compute(system, memory_manager)
, maxwell_dma(system, memory_manager)
, kepler_memory(system, memory_manager)
, nv01_timer(system, memory_manager)
, kepler_compute(memory_manager)
, maxwell_dma(memory_manager)
, kepler_memory(memory_manager)
, nv01_timer(memory_manager)
, dma_pusher(system, memory_manager, channel_state) , dma_pusher(system, memory_manager, channel_state)
{} {}

6
src/video_core/dma_pusher.cpp

@ -181,9 +181,7 @@ void DmaPusher::CallMethod(u32 argument) {
} else { } else {
subchannel->ConsumeSink(system); subchannel->ConsumeSink(system);
subchannel->current_dma_segment = dma_state.dma_get + dma_state.dma_word_offset; subchannel->current_dma_segment = dma_state.dma_get + dma_state.dma_word_offset;
subchannel->CallMethod(dma_state.method, argument, dma_state.is_last_call);
} else {
subchannel->method_sink.emplace_back(dma_state.method, argument);
subchannel->CallMethod(system, dma_state.method, argument, dma_state.is_last_call);
} }
} }
} }
@ -195,7 +193,7 @@ void DmaPusher::CallMultiMethod(const u32* base_start, u32 num_methods) {
auto subchannel = subchannels[dma_state.subchannel]; auto subchannel = subchannels[dma_state.subchannel];
subchannel->ConsumeSink(system); subchannel->ConsumeSink(system);
subchannel->current_dma_segment = dma_state.dma_get + dma_state.dma_word_offset; subchannel->current_dma_segment = dma_state.dma_get + dma_state.dma_word_offset;
subchannel->CallMultiMethod(dma_state.method, base_start, num_methods, dma_state.method_count);
subchannel->CallMultiMethod(system, dma_state.method, base_start, num_methods, dma_state.method_count);
} }
} }

6
src/video_core/engines/maxwell_3d.cpp

@ -218,7 +218,7 @@ void Maxwell3D::ProcessMacro(Core::System& system, u32 method, const u32* base_s
// Call the macro when there are no more parameters in the command buffer // Call the macro when there are no more parameters in the command buffer
if (is_last_call) { if (is_last_call) {
ConsumeSink(system); ConsumeSink(system);
CallMacroMethod(executing_macro, macro_params);
CallMacroMethod(system, executing_macro, macro_params);
macro_params.clear(); macro_params.clear();
macro_addresses.clear(); macro_addresses.clear();
macro_segments.clear(); macro_segments.clear();
@ -376,7 +376,7 @@ void Maxwell3D::ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argume
} }
} }
void Maxwell3D::CallMacroMethod(u32 method, const std::vector<u32>& parameters) {
void Maxwell3D::CallMacroMethod(Core::System& system, u32 method, const std::vector<u32>& parameters) {
// Reset the current macro. // Reset the current macro.
executing_macro = 0; executing_macro = 0;
@ -385,7 +385,7 @@ void Maxwell3D::CallMacroMethod(u32 method, const std::vector<u32>& parameters)
((method - MacroRegistersStart) >> 1) % static_cast<u32>(macro_positions.size()); ((method - MacroRegistersStart) >> 1) % static_cast<u32>(macro_positions.size());
// Execute the current macro. // Execute the current macro.
macro_engine.Execute(*this, macro_positions[entry], parameters);
macro_engine.Execute(system, *this, macro_positions[entry], parameters);
draw_manager.DrawDeferred(*this); draw_manager.DrawDeferred(*this);
} }

2
src/video_core/engines/maxwell_3d.h

@ -3211,7 +3211,7 @@ private:
* @param method Method to call * @param method Method to call
* @param parameters Arguments to the method call * @param parameters Arguments to the method call
*/ */
void CallMacroMethod(u32 method, const std::vector<u32>& parameters);
void CallMacroMethod(Core::System& system, u32 method, const std::vector<u32>& parameters);
/// Handles writes to the macro uploading register. /// Handles writes to the macro uploading register.
void ProcessMacroUpload(u32 data); void ProcessMacroUpload(u32 data);

338
src/video_core/macro.cpp

@ -64,10 +64,10 @@ bool IsTopologySafe(Maxwell3D::Regs::PrimitiveTopology topology) {
} // Anonymous namespace } // Anonymous namespace
void HLE_DrawArraysIndirect::Execute(Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method) {
void HLE_DrawArraysIndirect::Execute(Core::System& system, Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method) {
auto topology = static_cast<Maxwell3D::Regs::PrimitiveTopology>(parameters[0]); auto topology = static_cast<Maxwell3D::Regs::PrimitiveTopology>(parameters[0]);
if (!maxwell3d.AnyParametersDirty() || !IsTopologySafe(topology)) { if (!maxwell3d.AnyParametersDirty() || !IsTopologySafe(topology)) {
Fallback(maxwell3d, parameters);
Fallback(system, maxwell3d, parameters);
return; return;
} }
@ -93,7 +93,7 @@ void HLE_DrawArraysIndirect::Execute(Engines::Maxwell3D& maxwell3d, std::span<co
maxwell3d.replace_table.clear(); maxwell3d.replace_table.clear();
} }
} }
void HLE_DrawArraysIndirect::Fallback(Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters) {
void HLE_DrawArraysIndirect::Fallback(Core::System& system, Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters) {
SCOPE_EXIT { SCOPE_EXIT {
if (extended) { if (extended) {
maxwell3d.engine_state = Maxwell3D::EngineHint::None; maxwell3d.engine_state = Maxwell3D::EngineHint::None;
@ -123,10 +123,10 @@ void HLE_DrawArraysIndirect::Fallback(Engines::Maxwell3D& maxwell3d, std::span<c
} }
} }
void HLE_DrawIndexedIndirect::Execute(Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method) {
void HLE_DrawIndexedIndirect::Execute(Core::System& system, Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method) {
auto topology = static_cast<Maxwell3D::Regs::PrimitiveTopology>(parameters[0]); auto topology = static_cast<Maxwell3D::Regs::PrimitiveTopology>(parameters[0]);
if (!maxwell3d.AnyParametersDirty() || !IsTopologySafe(topology)) { if (!maxwell3d.AnyParametersDirty() || !IsTopologySafe(topology)) {
Fallback(maxwell3d, parameters);
Fallback(system, maxwell3d, parameters);
return; return;
} }
@ -161,7 +161,7 @@ void HLE_DrawIndexedIndirect::Execute(Engines::Maxwell3D& maxwell3d, std::span<c
maxwell3d.replace_table.clear(); maxwell3d.replace_table.clear();
} }
} }
void HLE_DrawIndexedIndirect::Fallback(Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters) {
void HLE_DrawIndexedIndirect::Fallback(Core::System& system, Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters) {
maxwell3d.RefreshParameters(); maxwell3d.RefreshParameters();
const u32 instance_count = (maxwell3d.GetRegisterValue(0xD1B) & parameters[2]); const u32 instance_count = (maxwell3d.GetRegisterValue(0xD1B) & parameters[2]);
const u32 element_base = parameters[4]; const u32 element_base = parameters[4];
@ -184,7 +184,7 @@ void HLE_DrawIndexedIndirect::Fallback(Engines::Maxwell3D& maxwell3d, std::span<
maxwell3d.replace_table.clear(); maxwell3d.replace_table.clear();
} }
} }
void HLE_MultiLayerClear::Execute(Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method) {
void HLE_MultiLayerClear::Execute(Core::System& system, Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method) {
maxwell3d.RefreshParameters(); maxwell3d.RefreshParameters();
ASSERT(parameters.size() == 1); ASSERT(parameters.size() == 1);
@ -196,47 +196,44 @@ void HLE_MultiLayerClear::Execute(Engines::Maxwell3D& maxwell3d, std::span<const
maxwell3d.regs.clear_surface.raw = clear_params.raw; maxwell3d.regs.clear_surface.raw = clear_params.raw;
maxwell3d.draw_manager.Clear(maxwell3d, num_layers); maxwell3d.draw_manager.Clear(maxwell3d, num_layers);
} }
void HLE_MultiDrawIndexedIndirectCount::Execute(Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method) {
void HLE_MultiDrawIndexedIndirectCount::Execute(Core::System& system, Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method) {
const auto topology = Maxwell3D::Regs::PrimitiveTopology(parameters[2]); const auto topology = Maxwell3D::Regs::PrimitiveTopology(parameters[2]);
if (!IsTopologySafe(topology)) {
Fallback(maxwell3d, parameters);
return;
}
const u32 start_indirect = parameters[0];
const u32 end_indirect = parameters[1];
if (start_indirect >= end_indirect) {
// Nothing to do.
return;
if (IsTopologySafe(topology)) {
const u32 start_indirect = parameters[0];
const u32 end_indirect = parameters[1];
if (start_indirect >= end_indirect) {
// Nothing to do.
return;
}
const u32 padding = parameters[3]; // padding is in words
// size of each indirect segment
const u32 indirect_words = 5 + padding;
const u32 stride = indirect_words * sizeof(u32);
const std::size_t draw_count = end_indirect - start_indirect;
const u32 estimate = u32(maxwell3d.EstimateIndexBufferSize());
maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
auto& params = maxwell3d.draw_manager.indirect_state;
params.is_byte_count = false;
params.is_indexed = true;
params.include_count = true;
params.count_start_address = maxwell3d.GetMacroAddress(4);
params.indirect_start_address = maxwell3d.GetMacroAddress(5);
params.buffer_size = stride * draw_count;
params.max_draw_counts = draw_count;
params.stride = stride;
maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
maxwell3d.engine_state = Maxwell3D::EngineHint::OnHLEMacro;
maxwell3d.SetHLEReplacementAttributeType(0, 0x640, Maxwell3D::HLEReplacementAttributeType::BaseVertex);
maxwell3d.SetHLEReplacementAttributeType(0, 0x644, Maxwell3D::HLEReplacementAttributeType::BaseInstance);
maxwell3d.SetHLEReplacementAttributeType(0, 0x648, Maxwell3D::HLEReplacementAttributeType::DrawID);
maxwell3d.draw_manager.DrawIndexedIndirect(maxwell3d, topology, 0, estimate);
maxwell3d.engine_state = Maxwell3D::EngineHint::None;
maxwell3d.replace_table.clear();
} else {
Fallback(system, maxwell3d, parameters);
} }
const u32 padding = parameters[3]; // padding is in words
// size of each indirect segment
const u32 indirect_words = 5 + padding;
const u32 stride = indirect_words * sizeof(u32);
const std::size_t draw_count = end_indirect - start_indirect;
const u32 estimate = static_cast<u32>(maxwell3d.EstimateIndexBufferSize());
maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
auto& params = maxwell3d.draw_manager.indirect_state;
params.is_byte_count = false;
params.is_indexed = true;
params.include_count = true;
params.count_start_address = maxwell3d.GetMacroAddress(4);
params.indirect_start_address = maxwell3d.GetMacroAddress(5);
params.buffer_size = stride * draw_count;
params.max_draw_counts = draw_count;
params.stride = stride;
maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
maxwell3d.engine_state = Maxwell3D::EngineHint::OnHLEMacro;
maxwell3d.SetHLEReplacementAttributeType(0, 0x640, Maxwell3D::HLEReplacementAttributeType::BaseVertex);
maxwell3d.SetHLEReplacementAttributeType(0, 0x644, Maxwell3D::HLEReplacementAttributeType::BaseInstance);
maxwell3d.SetHLEReplacementAttributeType(0, 0x648, Maxwell3D::HLEReplacementAttributeType::DrawID);
maxwell3d.draw_manager.DrawIndexedIndirect(maxwell3d, topology, 0, estimate);
maxwell3d.engine_state = Maxwell3D::EngineHint::None;
maxwell3d.replace_table.clear();
} }
void HLE_MultiDrawIndexedIndirectCount::Fallback(Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters) {
void HLE_MultiDrawIndexedIndirectCount::Fallback(Core::System& system, Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters) {
SCOPE_EXIT { SCOPE_EXIT {
// Clean everything. // Clean everything.
maxwell3d.regs.vertex_id_base = 0x0; maxwell3d.regs.vertex_id_base = 0x0;
@ -250,7 +247,7 @@ void HLE_MultiDrawIndexedIndirectCount::Fallback(Engines::Maxwell3D& maxwell3d,
// Nothing to do. // Nothing to do.
return; return;
} }
const auto topology = static_cast<Maxwell3D::Regs::PrimitiveTopology>(parameters[2]);
const auto topology = Maxwell3D::Regs::PrimitiveTopology(parameters[2]);
const u32 padding = parameters[3]; const u32 padding = parameters[3];
const std::size_t max_draws = parameters[4]; const std::size_t max_draws = parameters[4];
const u32 indirect_words = 5 + padding; const u32 indirect_words = 5 + padding;
@ -265,41 +262,41 @@ void HLE_MultiDrawIndexedIndirectCount::Fallback(Engines::Maxwell3D& maxwell3d,
maxwell3d.engine_state = Maxwell3D::EngineHint::OnHLEMacro; maxwell3d.engine_state = Maxwell3D::EngineHint::OnHLEMacro;
maxwell3d.SetHLEReplacementAttributeType(0, 0x640, Maxwell3D::HLEReplacementAttributeType::BaseVertex); maxwell3d.SetHLEReplacementAttributeType(0, 0x640, Maxwell3D::HLEReplacementAttributeType::BaseVertex);
maxwell3d.SetHLEReplacementAttributeType(0, 0x644, Maxwell3D::HLEReplacementAttributeType::BaseInstance); maxwell3d.SetHLEReplacementAttributeType(0, 0x644, Maxwell3D::HLEReplacementAttributeType::BaseInstance);
maxwell3d.CallMethod(0x8e3, 0x648, true);
maxwell3d.CallMethod(0x8e4, static_cast<u32>(index), true);
maxwell3d.CallMethod(system, 0x8e3, 0x648, true);
maxwell3d.CallMethod(system, 0x8e4, u32(index), true);
maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
maxwell3d.draw_manager.DrawIndex(maxwell3d, topology, parameters[base + 2], parameters[base], base_vertex, base_instance, parameters[base + 1]); maxwell3d.draw_manager.DrawIndex(maxwell3d, topology, parameters[base + 2], parameters[base], base_vertex, base_instance, parameters[base + 1]);
} }
} }
void HLE_DrawIndirectByteCount::Execute(Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method) {
void HLE_DrawIndirectByteCount::Execute(Core::System& system, Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method) {
const bool force = maxwell3d.Rasterizer().HasDrawTransformFeedback(); const bool force = maxwell3d.Rasterizer().HasDrawTransformFeedback();
if (!force) {
Fallback(maxwell3d, parameters);
return;
if (force) {
auto topology = Maxwell3D::Regs::PrimitiveTopology(parameters[0] & 0xFFFFU);
auto& params = maxwell3d.draw_manager.indirect_state;
params.is_byte_count = true;
params.is_indexed = false;
params.include_count = false;
params.count_start_address = 0;
params.indirect_start_address = maxwell3d.GetMacroAddress(2);
params.buffer_size = 4;
params.max_draw_counts = 1;
params.stride = parameters[1];
maxwell3d.regs.draw.begin = parameters[0];
maxwell3d.regs.draw_auto_stride = parameters[1];
maxwell3d.regs.draw_auto_byte_count = parameters[2];
maxwell3d.draw_manager.DrawArrayIndirect(maxwell3d, topology);
} else {
Fallback(system, maxwell3d, parameters);
} }
auto topology = Maxwell3D::Regs::PrimitiveTopology(parameters[0] & 0xFFFFU);
auto& params = maxwell3d.draw_manager.indirect_state;
params.is_byte_count = true;
params.is_indexed = false;
params.include_count = false;
params.count_start_address = 0;
params.indirect_start_address = maxwell3d.GetMacroAddress(2);
params.buffer_size = 4;
params.max_draw_counts = 1;
params.stride = parameters[1];
maxwell3d.regs.draw.begin = parameters[0];
maxwell3d.regs.draw_auto_stride = parameters[1];
maxwell3d.regs.draw_auto_byte_count = parameters[2];
maxwell3d.draw_manager.DrawArrayIndirect(maxwell3d, topology);
} }
void HLE_DrawIndirectByteCount::Fallback(Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters) {
void HLE_DrawIndirectByteCount::Fallback(Core::System& system, Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters) {
maxwell3d.RefreshParameters(); maxwell3d.RefreshParameters();
maxwell3d.regs.draw.begin = parameters[0]; maxwell3d.regs.draw.begin = parameters[0];
maxwell3d.regs.draw_auto_stride = parameters[1]; maxwell3d.regs.draw_auto_stride = parameters[1];
maxwell3d.regs.draw_auto_byte_count = parameters[2]; maxwell3d.regs.draw_auto_byte_count = parameters[2];
maxwell3d.draw_manager.DrawArray(maxwell3d, maxwell3d.regs.draw.topology, 0, maxwell3d.regs.draw_auto_byte_count / maxwell3d.regs.draw_auto_stride, 0, 1); maxwell3d.draw_manager.DrawArray(maxwell3d, maxwell3d.regs.draw.topology, 0, maxwell3d.regs.draw_auto_byte_count / maxwell3d.regs.draw_auto_stride, 0, 1);
} }
void HLE_C713C83D8F63CCF3::Execute(Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method) {
void HLE_C713C83D8F63CCF3::Execute(Core::System& system, Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method) {
maxwell3d.RefreshParameters(); maxwell3d.RefreshParameters();
const u32 offset = (parameters[0] & 0x3FFFFFFF) << 2; const u32 offset = (parameters[0] & 0x3FFFFFFF) << 2;
const u32 address = maxwell3d.regs.shadow_scratch[24]; const u32 address = maxwell3d.regs.shadow_scratch[24];
@ -309,7 +306,7 @@ void HLE_C713C83D8F63CCF3::Execute(Engines::Maxwell3D& maxwell3d, std::span<cons
const_buffer.address_low = address << 8; const_buffer.address_low = address << 8;
const_buffer.offset = offset; const_buffer.offset = offset;
} }
void HLE_D7333D26E0A93EDE::Execute(Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method) {
void HLE_D7333D26E0A93EDE::Execute(Core::System& system, Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method) {
maxwell3d.RefreshParameters(); maxwell3d.RefreshParameters();
const size_t index = parameters[0]; const size_t index = parameters[0];
const u32 address = maxwell3d.regs.shadow_scratch[42 + index]; const u32 address = maxwell3d.regs.shadow_scratch[42 + index];
@ -319,7 +316,7 @@ void HLE_D7333D26E0A93EDE::Execute(Engines::Maxwell3D& maxwell3d, std::span<cons
const_buffer.address_high = (address >> 24) & 0xFF; const_buffer.address_high = (address >> 24) & 0xFF;
const_buffer.address_low = address << 8; const_buffer.address_low = address << 8;
} }
void HLE_BindShader::Execute(Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method) {
void HLE_BindShader::Execute(Core::System& system, Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method) {
maxwell3d.RefreshParameters(); maxwell3d.RefreshParameters();
auto& regs = maxwell3d.regs; auto& regs = maxwell3d.regs;
const u32 index = parameters[0]; const u32 index = parameters[0];
@ -343,7 +340,7 @@ void HLE_BindShader::Execute(Engines::Maxwell3D& maxwell3d, std::span<const u32>
bind_group.raw_config = 0x11; bind_group.raw_config = 0x11;
maxwell3d.ProcessCBBind(bind_group_id); maxwell3d.ProcessCBBind(bind_group_id);
} }
void HLE_SetRasterBoundingBox::Execute(Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method) {
void HLE_SetRasterBoundingBox::Execute(Core::System& system, Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method) {
maxwell3d.RefreshParameters(); maxwell3d.RefreshParameters();
const u32 raster_mode = parameters[0]; const u32 raster_mode = parameters[0];
auto& regs = maxwell3d.regs; auto& regs = maxwell3d.regs;
@ -352,7 +349,7 @@ void HLE_SetRasterBoundingBox::Execute(Engines::Maxwell3D& maxwell3d, std::span<
regs.raster_bounding_box.raw = raster_mode & 0xFFFFF00F; regs.raster_bounding_box.raw = raster_mode & 0xFFFFF00F;
regs.raster_bounding_box.pad.Assign(scratch_data & raster_enabled); regs.raster_bounding_box.pad.Assign(scratch_data & raster_enabled);
} }
void HLE_ClearConstBuffer::Execute(Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method) {
void HLE_ClearConstBuffer::Execute(Core::System& system, Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method) {
static constexpr std::array<u32, 0x7000> zeroes{}; //must be bigger than either 7000 or 5F00 static constexpr std::array<u32, 0x7000> zeroes{}; //must be bigger than either 7000 or 5F00
maxwell3d.RefreshParameters(); maxwell3d.RefreshParameters();
auto& regs = maxwell3d.regs; auto& regs = maxwell3d.regs;
@ -362,7 +359,7 @@ void HLE_ClearConstBuffer::Execute(Engines::Maxwell3D& maxwell3d, std::span<cons
regs.const_buffer.offset = 0; regs.const_buffer.offset = 0;
maxwell3d.ProcessCBMultiData(zeroes.data(), parameters[2] * 4); maxwell3d.ProcessCBMultiData(zeroes.data(), parameters[2] * 4);
} }
void HLE_ClearMemory::Execute(Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method) {
void HLE_ClearMemory::Execute(Core::System& system, Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method) {
maxwell3d.RefreshParameters(); maxwell3d.RefreshParameters();
const u32 needed_memory = parameters[2] / sizeof(u32); const u32 needed_memory = parameters[2] / sizeof(u32);
if (needed_memory > zero_memory.size()) { if (needed_memory > zero_memory.size()) {
@ -373,10 +370,10 @@ void HLE_ClearMemory::Execute(Engines::Maxwell3D& maxwell3d, std::span<const u32
regs.upload.line_count = 1; regs.upload.line_count = 1;
regs.upload.dest.address_high = parameters[0]; regs.upload.dest.address_high = parameters[0];
regs.upload.dest.address_low = parameters[1]; regs.upload.dest.address_low = parameters[1];
maxwell3d.CallMethod(size_t(MAXWELL3D_REG_INDEX(launch_dma)), 0x1011, true);
maxwell3d.CallMultiMethod(size_t(MAXWELL3D_REG_INDEX(inline_data)), zero_memory.data(), needed_memory, needed_memory);
maxwell3d.CallMethod(system, size_t(MAXWELL3D_REG_INDEX(launch_dma)), 0x1011, true);
maxwell3d.CallMultiMethod(system, size_t(MAXWELL3D_REG_INDEX(inline_data)), zero_memory.data(), needed_memory, needed_memory);
} }
void HLE_TransformFeedbackSetup::Execute(Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method) {
void HLE_TransformFeedbackSetup::Execute(Core::System& system, Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method) {
maxwell3d.RefreshParameters(); maxwell3d.RefreshParameters();
auto& regs = maxwell3d.regs; auto& regs = maxwell3d.regs;
regs.transform_feedback_enabled = 1; regs.transform_feedback_enabled = 1;
@ -388,8 +385,8 @@ void HLE_TransformFeedbackSetup::Execute(Engines::Maxwell3D& maxwell3d, std::spa
regs.upload.line_count = 1; regs.upload.line_count = 1;
regs.upload.dest.address_high = parameters[0]; regs.upload.dest.address_high = parameters[0];
regs.upload.dest.address_low = parameters[1]; regs.upload.dest.address_low = parameters[1];
maxwell3d.CallMethod(size_t(MAXWELL3D_REG_INDEX(launch_dma)), 0x1011, true);
maxwell3d.CallMethod(size_t(MAXWELL3D_REG_INDEX(inline_data)), regs.transform_feedback.controls[0].stride, true);
maxwell3d.CallMethod(system, size_t(MAXWELL3D_REG_INDEX(launch_dma)), 0x1011, true);
maxwell3d.CallMethod(system, size_t(MAXWELL3D_REG_INDEX(inline_data)), regs.transform_feedback.controls[0].stride, true);
maxwell3d.Rasterizer().RegisterTransformFeedback(regs.upload.dest.Address()); maxwell3d.Rasterizer().RegisterTransformFeedback(regs.upload.dest.Address());
} }
@ -429,7 +426,7 @@ void HLE_TransformFeedbackSetup::Execute(Engines::Maxwell3D& maxwell3d, std::spa
} }
} }
void MacroInterpreterImpl::Execute(Engines::Maxwell3D& maxwell3d, std::span<const u32> params, u32 method) {
void MacroInterpreterImpl::Execute(Core::System& system, Engines::Maxwell3D& maxwell3d, std::span<const u32> params, u32 method) {
Reset(); Reset();
registers[1] = params[0]; registers[1] = params[0];
@ -439,7 +436,7 @@ void MacroInterpreterImpl::Execute(Engines::Maxwell3D& maxwell3d, std::span<cons
// Execute the code until we hit an exit condition. // Execute the code until we hit an exit condition.
bool keep_executing = true; bool keep_executing = true;
while (keep_executing) { while (keep_executing) {
keep_executing = Step(maxwell3d, false);
keep_executing = Step(system, maxwell3d, false);
} }
// Assert the the macro used all the input parameters // Assert the the macro used all the input parameters
@ -462,7 +459,7 @@ void MacroInterpreterImpl::Reset() {
/// @brief Executes a single macro instruction located at the current program counter. Returns whether /// @brief Executes a single macro instruction located at the current program counter. Returns whether
/// the interpreter should keep running. /// the interpreter should keep running.
/// @param is_delay_slot Whether the current step is being executed due to a delay slot in a previous instruction. /// @param is_delay_slot Whether the current step is being executed due to a delay slot in a previous instruction.
bool MacroInterpreterImpl::Step(Engines::Maxwell3D& maxwell3d, bool is_delay_slot) {
bool MacroInterpreterImpl::Step(Core::System& system, Engines::Maxwell3D& maxwell3d, bool is_delay_slot) {
u32 base_address = pc; u32 base_address = pc;
Macro::Opcode opcode = GetOpcode(); Macro::Opcode opcode = GetOpcode();
@ -478,11 +475,11 @@ bool MacroInterpreterImpl::Step(Engines::Maxwell3D& maxwell3d, bool is_delay_slo
switch (opcode.operation) { switch (opcode.operation) {
case Macro::Operation::ALU: { case Macro::Operation::ALU: {
u32 result = GetALUResult(opcode.alu_operation, GetRegister(opcode.src_a), GetRegister(opcode.src_b)); u32 result = GetALUResult(opcode.alu_operation, GetRegister(opcode.src_a), GetRegister(opcode.src_b));
ProcessResult(maxwell3d, opcode.result_operation, opcode.dst, result);
ProcessResult(system, maxwell3d, opcode.result_operation, opcode.dst, result);
break; break;
} }
case Macro::Operation::AddImmediate: { case Macro::Operation::AddImmediate: {
ProcessResult(maxwell3d, opcode.result_operation, opcode.dst, GetRegister(opcode.src_a) + opcode.immediate);
ProcessResult(system, maxwell3d, opcode.result_operation, opcode.dst, GetRegister(opcode.src_a) + opcode.immediate);
break; break;
} }
case Macro::Operation::ExtractInsert: { case Macro::Operation::ExtractInsert: {
@ -492,7 +489,7 @@ bool MacroInterpreterImpl::Step(Engines::Maxwell3D& maxwell3d, bool is_delay_slo
src = (src >> opcode.bf_src_bit) & opcode.GetBitfieldMask(); src = (src >> opcode.bf_src_bit) & opcode.GetBitfieldMask();
dst &= ~(opcode.GetBitfieldMask() << opcode.bf_dst_bit); dst &= ~(opcode.GetBitfieldMask() << opcode.bf_dst_bit);
dst |= src << opcode.bf_dst_bit; dst |= src << opcode.bf_dst_bit;
ProcessResult(maxwell3d, opcode.result_operation, opcode.dst, dst);
ProcessResult(system, maxwell3d, opcode.result_operation, opcode.dst, dst);
break; break;
} }
case Macro::Operation::ExtractShiftLeftImmediate: { case Macro::Operation::ExtractShiftLeftImmediate: {
@ -501,7 +498,7 @@ bool MacroInterpreterImpl::Step(Engines::Maxwell3D& maxwell3d, bool is_delay_slo
u32 result = ((src >> dst) & opcode.GetBitfieldMask()) << opcode.bf_dst_bit; u32 result = ((src >> dst) & opcode.GetBitfieldMask()) << opcode.bf_dst_bit;
ProcessResult(maxwell3d, opcode.result_operation, opcode.dst, result);
ProcessResult(system, maxwell3d, opcode.result_operation, opcode.dst, result);
break; break;
} }
case Macro::Operation::ExtractShiftLeftRegister: { case Macro::Operation::ExtractShiftLeftRegister: {
@ -510,12 +507,12 @@ bool MacroInterpreterImpl::Step(Engines::Maxwell3D& maxwell3d, bool is_delay_slo
u32 result = ((src >> opcode.bf_src_bit) & opcode.GetBitfieldMask()) << dst; u32 result = ((src >> opcode.bf_src_bit) & opcode.GetBitfieldMask()) << dst;
ProcessResult(maxwell3d, opcode.result_operation, opcode.dst, result);
ProcessResult(system, maxwell3d, opcode.result_operation, opcode.dst, result);
break; break;
} }
case Macro::Operation::Read: { case Macro::Operation::Read: {
u32 result = Read(maxwell3d, GetRegister(opcode.src_a) + opcode.immediate); u32 result = Read(maxwell3d, GetRegister(opcode.src_a) + opcode.immediate);
ProcessResult(maxwell3d, opcode.result_operation, opcode.dst, result);
ProcessResult(system, maxwell3d, opcode.result_operation, opcode.dst, result);
break; break;
} }
case Macro::Operation::Branch: { case Macro::Operation::Branch: {
@ -531,7 +528,7 @@ bool MacroInterpreterImpl::Step(Engines::Maxwell3D& maxwell3d, bool is_delay_slo
delayed_pc = base_address + opcode.GetBranchTarget(); delayed_pc = base_address + opcode.GetBranchTarget();
// Execute one more instruction due to the delay slot. // Execute one more instruction due to the delay slot.
return Step(maxwell3d, true);
return Step(system, maxwell3d, true);
} }
break; break;
} }
@ -544,7 +541,7 @@ bool MacroInterpreterImpl::Step(Engines::Maxwell3D& maxwell3d, bool is_delay_slo
// cause an exit if it's executed inside a delay slot. // cause an exit if it's executed inside a delay slot.
if (opcode.is_exit && !is_delay_slot) { if (opcode.is_exit && !is_delay_slot) {
// Exit has a delay slot, execute the next instruction // Exit has a delay slot, execute the next instruction
Step(maxwell3d, true);
Step(system, maxwell3d, true);
return false; return false;
} }
return true; return true;
@ -591,7 +588,7 @@ u32 MacroInterpreterImpl::GetALUResult(Macro::ALUOperation operation, u32 src_a,
} }
/// Performs the result operation on the input result and stores it in the specified register (if necessary). /// Performs the result operation on the input result and stores it in the specified register (if necessary).
void MacroInterpreterImpl::ProcessResult(Engines::Maxwell3D& maxwell3d, Macro::ResultOperation operation, u32 reg, u32 result) {
void MacroInterpreterImpl::ProcessResult(Core::System& system, Engines::Maxwell3D& maxwell3d, Macro::ResultOperation operation, u32 reg, u32 result) {
switch (operation) { switch (operation) {
case Macro::ResultOperation::IgnoreAndFetch: case Macro::ResultOperation::IgnoreAndFetch:
// Fetch parameter and ignore result. // Fetch parameter and ignore result.
@ -609,12 +606,12 @@ void MacroInterpreterImpl::ProcessResult(Engines::Maxwell3D& maxwell3d, Macro::R
case Macro::ResultOperation::FetchAndSend: case Macro::ResultOperation::FetchAndSend:
// Fetch parameter and send result. // Fetch parameter and send result.
SetRegister(reg, FetchParameter()); SetRegister(reg, FetchParameter());
Send(maxwell3d, result);
Send(system, maxwell3d, result);
break; break;
case Macro::ResultOperation::MoveAndSend: case Macro::ResultOperation::MoveAndSend:
// Move and send result. // Move and send result.
SetRegister(reg, result); SetRegister(reg, result);
Send(maxwell3d, result);
Send(system, maxwell3d, result);
break; break;
case Macro::ResultOperation::FetchAndSetMethod: case Macro::ResultOperation::FetchAndSetMethod:
// Fetch parameter and use result as Method Address. // Fetch parameter and use result as Method Address.
@ -625,13 +622,13 @@ void MacroInterpreterImpl::ProcessResult(Engines::Maxwell3D& maxwell3d, Macro::R
// Move result and use as Method Address, then fetch and send parameter. // Move result and use as Method Address, then fetch and send parameter.
SetRegister(reg, result); SetRegister(reg, result);
SetMethodAddress(result); SetMethodAddress(result);
Send(maxwell3d, FetchParameter());
Send(system, maxwell3d, FetchParameter());
break; break;
case Macro::ResultOperation::MoveAndSetMethodSend: case Macro::ResultOperation::MoveAndSetMethodSend:
// Move result and use as Method Address, then send bits 12:17 of result. // Move result and use as Method Address, then send bits 12:17 of result.
SetRegister(reg, result); SetRegister(reg, result);
SetMethodAddress(result); SetMethodAddress(result);
Send(maxwell3d, (result >> 12) & 0b111111);
Send(system, maxwell3d, (result >> 12) & 0b111111);
break; break;
default: default:
UNIMPLEMENTED_MSG("Unimplemented result operation {}", operation); UNIMPLEMENTED_MSG("Unimplemented result operation {}", operation);
@ -672,8 +669,8 @@ void MacroInterpreterImpl::SetRegister(u32 register_id, u32 value) {
} }
/// Calls a GPU Engine method with the input parameter. /// Calls a GPU Engine method with the input parameter.
void MacroInterpreterImpl::Send(Engines::Maxwell3D& maxwell3d, u32 value) {
maxwell3d.CallMethod(method_address.address, value, true);
void MacroInterpreterImpl::Send(Core::System& system, Engines::Maxwell3D& maxwell3d, u32 value) {
maxwell3d.CallMethod(system, method_address.address, value, true);
// Increment the method address by the method increment. // Increment the method address by the method increment.
method_address.address.Assign(method_address.address.Value() + method_address.increment.Value()); method_address.address.Assign(method_address.address.Value() + method_address.increment.Value());
} }
@ -724,34 +721,35 @@ static const auto default_cg_mode = nullptr; //Allow RWE
#endif #endif
struct MacroJITx64Impl final : public Xbyak::CodeGenerator, public DynamicCachedMacro { struct MacroJITx64Impl final : public Xbyak::CodeGenerator, public DynamicCachedMacro {
explicit MacroJITx64Impl(std::span<const u32> code_)
explicit MacroJITx64Impl(Core::System& system, std::span<const u32> code_)
: Xbyak::CodeGenerator(MAX_CODE_SIZE, default_cg_mode) : Xbyak::CodeGenerator(MAX_CODE_SIZE, default_cg_mode)
, code{code_} , code{code_}
{ {
Compile();
Compile(system);
} }
void Execute(Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, u32 method) override;
void Execute(Core::System& system, Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, u32 method) override;
void Compile_ALU(Macro::Opcode opcode);
void Compile_AddImmediate(Macro::Opcode opcode);
void Compile_ExtractInsert(Macro::Opcode opcode);
void Compile_ExtractShiftLeftImmediate(Macro::Opcode opcode);
void Compile_ExtractShiftLeftRegister(Macro::Opcode opcode);
void Compile_Read(Macro::Opcode opcode);
void Compile_ALU(Core::System& system, Macro::Opcode opcode);
void Compile_AddImmediate(Core::System& system, Macro::Opcode opcode);
void Compile_ExtractInsert(Core::System& system, Macro::Opcode opcode);
void Compile_ExtractShiftLeftImmediate(Core::System& system, Macro::Opcode opcode);
void Compile_ExtractShiftLeftRegister(Core::System& system, Macro::Opcode opcode);
void Compile_Read(Core::System& system, Macro::Opcode opcode);
void Compile_Branch(Macro::Opcode opcode); void Compile_Branch(Macro::Opcode opcode);
void Optimizer_ScanFlags(); void Optimizer_ScanFlags();
void Compile();
bool Compile_NextInstruction();
void Compile(Core::System& system);
bool Compile_NextInstruction(Core::System& system);
Xbyak::Reg32 Compile_FetchParameter(); Xbyak::Reg32 Compile_FetchParameter();
Xbyak::Reg32 Compile_GetRegister(u32 index, Xbyak::Reg32 dst); Xbyak::Reg32 Compile_GetRegister(u32 index, Xbyak::Reg32 dst);
void Compile_ProcessResult(Macro::ResultOperation operation, u32 reg);
void Compile_Send(Xbyak::Reg32 value);
void Compile_ProcessResult(Core::System& system, Macro::ResultOperation operation, u32 reg);
void Compile_Send(Core::System& system, Xbyak::Reg32 value);
Macro::Opcode GetOpCode() const; Macro::Opcode GetOpCode() const;
struct JITState { struct JITState {
Engines::Maxwell3D* maxwell3d{};
Engines::Maxwell3D* maxwell3d = nullptr;
Core::System* system = nullptr;
std::array<u32, Macro::NUM_MACRO_REGISTERS> registers{}; std::array<u32, Macro::NUM_MACRO_REGISTERS> registers{};
u32 carry_flag{}; u32 carry_flag{};
}; };
@ -777,15 +775,16 @@ struct MacroJITx64Impl final : public Xbyak::CodeGenerator, public DynamicCached
std::span<const u32> code; std::span<const u32> code;
}; };
void MacroJITx64Impl::Execute(Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, u32 method) {
void MacroJITx64Impl::Execute(Core::System& system, Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, u32 method) {
ASSERT_OR_EXECUTE(program != nullptr, { return; }); ASSERT_OR_EXECUTE(program != nullptr, { return; });
JITState state{}; JITState state{};
state.maxwell3d = &maxwell3d; state.maxwell3d = &maxwell3d;
state.system = &system;
state.registers = {}; state.registers = {};
program(&state, parameters.data(), parameters.data() + parameters.size()); program(&state, parameters.data(), parameters.data() + parameters.size());
} }
void MacroJITx64Impl::Compile_ALU(Macro::Opcode opcode) {
void MacroJITx64Impl::Compile_ALU(Core::System& system, Macro::Opcode opcode) {
const bool is_a_zero = opcode.src_a == 0; const bool is_a_zero = opcode.src_a == 0;
const bool is_b_zero = opcode.src_b == 0; const bool is_b_zero = opcode.src_b == 0;
const bool valid_operation = !is_a_zero && !is_b_zero; const bool valid_operation = !is_a_zero && !is_b_zero;
@ -902,10 +901,10 @@ void MacroJITx64Impl::Compile_ALU(Macro::Opcode opcode) {
UNIMPLEMENTED_MSG("Unimplemented ALU operation {}", opcode.alu_operation.Value()); UNIMPLEMENTED_MSG("Unimplemented ALU operation {}", opcode.alu_operation.Value());
break; break;
} }
Compile_ProcessResult(opcode.result_operation, opcode.dst);
Compile_ProcessResult(system, opcode.result_operation, opcode.dst);
} }
void MacroJITx64Impl::Compile_AddImmediate(Macro::Opcode opcode) {
void MacroJITx64Impl::Compile_AddImmediate(Core::System& system, Macro::Opcode opcode) {
if (optimizer.skip_dummy_addimmediate) { if (optimizer.skip_dummy_addimmediate) {
// Games tend to use this as an exit instruction placeholder. It's to encode an instruction // Games tend to use this as an exit instruction placeholder. It's to encode an instruction
// without doing anything. In our case we can just not emit anything. // without doing anything. In our case we can just not emit anything.
@ -940,10 +939,10 @@ void MacroJITx64Impl::Compile_AddImmediate(Macro::Opcode opcode) {
sub(result, opcode.immediate * -1); sub(result, opcode.immediate * -1);
} }
} }
Compile_ProcessResult(opcode.result_operation, opcode.dst);
Compile_ProcessResult(system, opcode.result_operation, opcode.dst);
} }
void MacroJITx64Impl::Compile_ExtractInsert(Macro::Opcode opcode) {
void MacroJITx64Impl::Compile_ExtractInsert(Core::System& system, Macro::Opcode opcode) {
auto dst = Compile_GetRegister(opcode.src_a, RESULT); auto dst = Compile_GetRegister(opcode.src_a, RESULT);
auto src = Compile_GetRegister(opcode.src_b, eax); auto src = Compile_GetRegister(opcode.src_b, eax);
@ -954,10 +953,10 @@ void MacroJITx64Impl::Compile_ExtractInsert(Macro::Opcode opcode) {
shl(src, opcode.bf_dst_bit); shl(src, opcode.bf_dst_bit);
or_(dst, src); or_(dst, src);
Compile_ProcessResult(opcode.result_operation, opcode.dst);
Compile_ProcessResult(system, opcode.result_operation, opcode.dst);
} }
void MacroJITx64Impl::Compile_ExtractShiftLeftImmediate(Macro::Opcode opcode) {
void MacroJITx64Impl::Compile_ExtractShiftLeftImmediate(Core::System& system, Macro::Opcode opcode) {
const auto dst = Compile_GetRegister(opcode.src_a, ecx); const auto dst = Compile_GetRegister(opcode.src_a, ecx);
const auto src = Compile_GetRegister(opcode.src_b, RESULT); const auto src = Compile_GetRegister(opcode.src_b, RESULT);
@ -965,10 +964,10 @@ void MacroJITx64Impl::Compile_ExtractShiftLeftImmediate(Macro::Opcode opcode) {
and_(src, opcode.GetBitfieldMask()); and_(src, opcode.GetBitfieldMask());
shl(src, opcode.bf_dst_bit); shl(src, opcode.bf_dst_bit);
Compile_ProcessResult(opcode.result_operation, opcode.dst);
Compile_ProcessResult(system, opcode.result_operation, opcode.dst);
} }
void MacroJITx64Impl::Compile_ExtractShiftLeftRegister(Macro::Opcode opcode) {
void MacroJITx64Impl::Compile_ExtractShiftLeftRegister(Core::System& system, Macro::Opcode opcode) {
const auto dst = Compile_GetRegister(opcode.src_a, ecx); const auto dst = Compile_GetRegister(opcode.src_a, ecx);
const auto src = Compile_GetRegister(opcode.src_b, RESULT); const auto src = Compile_GetRegister(opcode.src_b, RESULT);
@ -976,10 +975,10 @@ void MacroJITx64Impl::Compile_ExtractShiftLeftRegister(Macro::Opcode opcode) {
and_(src, opcode.GetBitfieldMask()); and_(src, opcode.GetBitfieldMask());
shl(src, dst.cvt8()); shl(src, dst.cvt8());
Compile_ProcessResult(opcode.result_operation, opcode.dst);
Compile_ProcessResult(system, opcode.result_operation, opcode.dst);
} }
void MacroJITx64Impl::Compile_Read(Macro::Opcode opcode) {
void MacroJITx64Impl::Compile_Read(Core::System& system, Macro::Opcode opcode) {
if (optimizer.zero_reg_skip && opcode.src_a == 0) { if (optimizer.zero_reg_skip && opcode.src_a == 0) {
if (opcode.immediate == 0) { if (opcode.immediate == 0) {
xor_(RESULT, RESULT); xor_(RESULT, RESULT);
@ -1005,23 +1004,21 @@ void MacroJITx64Impl::Compile_Read(Macro::Opcode opcode) {
int3(); int3();
L(pass_range_check); L(pass_range_check);
} }
mov(rax, qword[STATE]);
mov(RESULT,
dword[rax + offsetof(Engines::Maxwell3D, regs) +
offsetof(Engines::Maxwell3D::Regs, reg_array) + RESULT.cvt64() * sizeof(u32)]);
Compile_ProcessResult(opcode.result_operation, opcode.dst);
mov(rax, qword[STATE + offsetof(JITState, maxwell3d)]);
mov(RESULT, dword[rax + offsetof(Engines::Maxwell3D, regs) + offsetof(Engines::Maxwell3D::Regs, reg_array) + RESULT.cvt64() * sizeof(u32)]);
Compile_ProcessResult(system, opcode.result_operation, opcode.dst);
} }
static void MacroJIT_SendThunk(Engines::Maxwell3D* maxwell3d, Macro::MethodAddress method_address, u32 value) {
maxwell3d->CallMethod(method_address.address, value, true);
static void MacroJIT_SendThunk(Core::System* system, Engines::Maxwell3D* maxwell3d, Macro::MethodAddress method_address, u32 value) {
maxwell3d->CallMethod(*system, method_address.address, value, true);
} }
void MacroJITx64Impl::Compile_Send(Xbyak::Reg32 value) {
void MacroJITx64Impl::Compile_Send(Core::System& system, Xbyak::Reg32 value) {
Common::X64::ABI_PushRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0); Common::X64::ABI_PushRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0);
mov(Common::X64::ABI_PARAM1, qword[STATE]);
mov(Common::X64::ABI_PARAM2.cvt32(), METHOD_ADDRESS);
mov(Common::X64::ABI_PARAM3.cvt32(), value);
mov(Common::X64::ABI_PARAM1, qword[STATE + offsetof(JITState, system)]);
mov(Common::X64::ABI_PARAM2, qword[STATE + offsetof(JITState, maxwell3d)]);
mov(Common::X64::ABI_PARAM3.cvt32(), METHOD_ADDRESS);
mov(Common::X64::ABI_PARAM4.cvt32(), value);
Common::X64::CallFarFunction(*this, &MacroJIT_SendThunk); Common::X64::CallFarFunction(*this, &MacroJIT_SendThunk);
Common::X64::ABI_PopRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0); Common::X64::ABI_PopRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0);
@ -1045,9 +1042,8 @@ void MacroJITx64Impl::Compile_Send(Xbyak::Reg32 value) {
} }
void MacroJITx64Impl::Compile_Branch(Macro::Opcode opcode) { void MacroJITx64Impl::Compile_Branch(Macro::Opcode opcode) {
ASSERT_MSG(!is_delay_slot, "Executing a branch in a delay slot is not valid");
const s32 jump_address =
static_cast<s32>(pc) + static_cast<s32>(opcode.GetBranchTarget() / sizeof(s32));
ASSERT(!is_delay_slot && "Executing a branch in a delay slot is not valid");
const s32 jump_address = s32(pc) + s32(opcode.GetBranchTarget() / sizeof(s32));
Xbyak::Label end; Xbyak::Label end;
auto value = Compile_GetRegister(opcode.src_a, eax); auto value = Compile_GetRegister(opcode.src_a, eax);
@ -1116,7 +1112,7 @@ void MacroJITx64Impl::Optimizer_ScanFlags() {
} }
} }
void MacroJITx64Impl::Compile() {
void MacroJITx64Impl::Compile(Core::System& system) {
labels.fill(Xbyak::Label()); labels.fill(Xbyak::Label());
Common::X64::ABI_PushRegistersAndAdjustStack(*this, Common::X64::ABI_ALL_CALLEE_SAVED, 8); Common::X64::ABI_PushRegistersAndAdjustStack(*this, Common::X64::ABI_ALL_CALLEE_SAVED, 8);
@ -1156,7 +1152,7 @@ void MacroJITx64Impl::Compile() {
next_opcode = {}; next_opcode = {};
} }
pc = i; pc = i;
Compile_NextInstruction();
Compile_NextInstruction(system);
} }
L(end_of_code); L(end_of_code);
@ -1167,7 +1163,7 @@ void MacroJITx64Impl::Compile() {
program = getCode<ProgramType>(); program = getCode<ProgramType>();
} }
bool MacroJITx64Impl::Compile_NextInstruction() {
bool MacroJITx64Impl::Compile_NextInstruction(Core::System& system) {
const auto opcode = GetOpCode(); const auto opcode = GetOpCode();
if (labels[pc].getAddress()) { if (labels[pc].getAddress()) {
return false; return false;
@ -1177,22 +1173,22 @@ bool MacroJITx64Impl::Compile_NextInstruction() {
switch (opcode.operation) { switch (opcode.operation) {
case Macro::Operation::ALU: case Macro::Operation::ALU:
Compile_ALU(opcode);
Compile_ALU(system, opcode);
break; break;
case Macro::Operation::AddImmediate: case Macro::Operation::AddImmediate:
Compile_AddImmediate(opcode);
Compile_AddImmediate(system, opcode);
break; break;
case Macro::Operation::ExtractInsert: case Macro::Operation::ExtractInsert:
Compile_ExtractInsert(opcode);
Compile_ExtractInsert(system, opcode);
break; break;
case Macro::Operation::ExtractShiftLeftImmediate: case Macro::Operation::ExtractShiftLeftImmediate:
Compile_ExtractShiftLeftImmediate(opcode);
Compile_ExtractShiftLeftImmediate(system, opcode);
break; break;
case Macro::Operation::ExtractShiftLeftRegister: case Macro::Operation::ExtractShiftLeftRegister:
Compile_ExtractShiftLeftRegister(opcode);
Compile_ExtractShiftLeftRegister(system, opcode);
break; break;
case Macro::Operation::Read: case Macro::Operation::Read:
Compile_Read(opcode);
Compile_Read(system, opcode);
break; break;
case Macro::Operation::Branch: case Macro::Operation::Branch:
Compile_Branch(opcode); Compile_Branch(opcode);
@ -1264,7 +1260,7 @@ Xbyak::Reg32 MacroJITx64Impl::Compile_GetRegister(u32 index, Xbyak::Reg32 dst) {
return dst; return dst;
} }
void MacroJITx64Impl::Compile_ProcessResult(Macro::ResultOperation operation, u32 reg) {
void MacroJITx64Impl::Compile_ProcessResult(Core::System& system, Macro::ResultOperation operation, u32 reg) {
const auto SetRegister = [this](u32 reg_index, const Xbyak::Reg32& result) { const auto SetRegister = [this](u32 reg_index, const Xbyak::Reg32& result) {
// Register 0 is supposed to always return 0. NOP is implemented as a store to the zero // Register 0 is supposed to always return 0. NOP is implemented as a store to the zero
// register. // register.
@ -1289,12 +1285,12 @@ void MacroJITx64Impl::Compile_ProcessResult(Macro::ResultOperation operation, u3
case Macro::ResultOperation::FetchAndSend: case Macro::ResultOperation::FetchAndSend:
// Fetch parameter and send result. // Fetch parameter and send result.
SetRegister(reg, Compile_FetchParameter()); SetRegister(reg, Compile_FetchParameter());
Compile_Send(RESULT);
Compile_Send(system, RESULT);
break; break;
case Macro::ResultOperation::MoveAndSend: case Macro::ResultOperation::MoveAndSend:
// Move and send result. // Move and send result.
SetRegister(reg, RESULT); SetRegister(reg, RESULT);
Compile_Send(RESULT);
Compile_Send(system, RESULT);
break; break;
case Macro::ResultOperation::FetchAndSetMethod: case Macro::ResultOperation::FetchAndSetMethod:
// Fetch parameter and use result as Method Address. // Fetch parameter and use result as Method Address.
@ -1305,7 +1301,7 @@ void MacroJITx64Impl::Compile_ProcessResult(Macro::ResultOperation operation, u3
// Move result and use as Method Address, then fetch and send parameter. // Move result and use as Method Address, then fetch and send parameter.
SetRegister(reg, RESULT); SetRegister(reg, RESULT);
SetMethodAddress(RESULT); SetMethodAddress(RESULT);
Compile_Send(Compile_FetchParameter());
Compile_Send(system, Compile_FetchParameter());
break; break;
case Macro::ResultOperation::MoveAndSetMethodSend: case Macro::ResultOperation::MoveAndSetMethodSend:
// Move result and use as Method Address, then send bits 12:17 of result. // Move result and use as Method Address, then send bits 12:17 of result.
@ -1313,7 +1309,7 @@ void MacroJITx64Impl::Compile_ProcessResult(Macro::ResultOperation operation, u3
SetMethodAddress(RESULT); SetMethodAddress(RESULT);
shr(RESULT, 12); shr(RESULT, 12);
and_(RESULT, 0b111111); and_(RESULT, 0b111111);
Compile_Send(RESULT);
Compile_Send(system, RESULT);
break; break;
default: default:
UNIMPLEMENTED_MSG("Unimplemented macro operation {}", operation); UNIMPLEMENTED_MSG("Unimplemented macro operation {}", operation);
@ -1354,36 +1350,36 @@ static void Dump(u64 hash, std::span<const u32> code, bool decompiled = false) {
macro_file.write(reinterpret_cast<const char*>(code.data()), code.size_bytes()); macro_file.write(reinterpret_cast<const char*>(code.data()), code.size_bytes());
} }
void MacroEngine::Execute(Engines::Maxwell3D& maxwell3d, u32 method, std::span<const u32> parameters) {
auto const execute_variant = [&maxwell3d, &parameters, method](AnyCachedMacro& acm) {
void MacroEngine::Execute(Core::System& system, Engines::Maxwell3D& maxwell3d, u32 method, std::span<const u32> parameters) {
auto const execute_variant = [&system, &maxwell3d, &parameters, method](AnyCachedMacro& acm) {
if (auto a = std::get_if<HLE_DrawArraysIndirect>(&acm)) if (auto a = std::get_if<HLE_DrawArraysIndirect>(&acm))
a->Execute(maxwell3d, parameters, method);
a->Execute(system, maxwell3d, parameters, method);
if (auto a = std::get_if<HLE_DrawIndexedIndirect>(&acm)) if (auto a = std::get_if<HLE_DrawIndexedIndirect>(&acm))
a->Execute(maxwell3d, parameters, method);
a->Execute(system, maxwell3d, parameters, method);
if (auto a = std::get_if<HLE_MultiDrawIndexedIndirectCount>(&acm)) if (auto a = std::get_if<HLE_MultiDrawIndexedIndirectCount>(&acm))
a->Execute(maxwell3d, parameters, method);
a->Execute(system, maxwell3d, parameters, method);
if (auto a = std::get_if<HLE_MultiLayerClear>(&acm)) if (auto a = std::get_if<HLE_MultiLayerClear>(&acm))
a->Execute(maxwell3d, parameters, method);
a->Execute(system, maxwell3d, parameters, method);
if (auto a = std::get_if<HLE_C713C83D8F63CCF3>(&acm)) if (auto a = std::get_if<HLE_C713C83D8F63CCF3>(&acm))
a->Execute(maxwell3d, parameters, method);
a->Execute(system, maxwell3d, parameters, method);
if (auto a = std::get_if<HLE_D7333D26E0A93EDE>(&acm)) if (auto a = std::get_if<HLE_D7333D26E0A93EDE>(&acm))
a->Execute(maxwell3d, parameters, method);
a->Execute(system, maxwell3d, parameters, method);
if (auto a = std::get_if<HLE_BindShader>(&acm)) if (auto a = std::get_if<HLE_BindShader>(&acm))
a->Execute(maxwell3d, parameters, method);
a->Execute(system, maxwell3d, parameters, method);
if (auto a = std::get_if<HLE_SetRasterBoundingBox>(&acm)) if (auto a = std::get_if<HLE_SetRasterBoundingBox>(&acm))
a->Execute(maxwell3d, parameters, method);
a->Execute(system, maxwell3d, parameters, method);
if (auto a = std::get_if<HLE_ClearConstBuffer>(&acm)) if (auto a = std::get_if<HLE_ClearConstBuffer>(&acm))
a->Execute(maxwell3d, parameters, method);
a->Execute(system, maxwell3d, parameters, method);
if (auto a = std::get_if<HLE_ClearMemory>(&acm)) if (auto a = std::get_if<HLE_ClearMemory>(&acm))
a->Execute(maxwell3d, parameters, method);
a->Execute(system, maxwell3d, parameters, method);
if (auto a = std::get_if<HLE_TransformFeedbackSetup>(&acm)) if (auto a = std::get_if<HLE_TransformFeedbackSetup>(&acm))
a->Execute(maxwell3d, parameters, method);
a->Execute(system, maxwell3d, parameters, method);
if (auto a = std::get_if<HLE_DrawIndirectByteCount>(&acm)) if (auto a = std::get_if<HLE_DrawIndirectByteCount>(&acm))
a->Execute(maxwell3d, parameters, method);
a->Execute(system, maxwell3d, parameters, method);
if (auto a = std::get_if<MacroInterpreterImpl>(&acm)) if (auto a = std::get_if<MacroInterpreterImpl>(&acm))
a->Execute(maxwell3d, parameters, method);
a->Execute(system, maxwell3d, parameters, method);
if (auto a = std::get_if<std::unique_ptr<DynamicCachedMacro>>(&acm)) if (auto a = std::get_if<std::unique_ptr<DynamicCachedMacro>>(&acm))
a->get()->Execute(maxwell3d, parameters, method);
a->get()->Execute(system, maxwell3d, parameters, method);
}; };
if (auto const it = macro_cache.find(method); it != macro_cache.end()) { if (auto const it = macro_cache.find(method); it != macro_cache.end()) {
auto& ci = it->second; auto& ci = it->second;
@ -1414,9 +1410,9 @@ void MacroEngine::Execute(Engines::Maxwell3D& maxwell3d, u32 method, std::span<c
code.resize(macro_cached.size() - rebased_method); code.resize(macro_cached.size() - rebased_method);
std::memcpy(code.data(), macro_cached.data() + rebased_method, code.size() * sizeof(u32)); std::memcpy(code.data(), macro_cached.data() + rebased_method, code.size() * sizeof(u32));
ci.hash = Common::HashValue(code); ci.hash = Common::HashValue(code);
ci.program = Compile(maxwell3d, code);
ci.program = Compile(system, maxwell3d, code);
} else { } else {
ci.program = Compile(maxwell3d, macro_code->second);
ci.program = Compile(system, maxwell3d, macro_code->second);
ci.hash = Common::HashValue(macro_code->second); ci.hash = Common::HashValue(macro_code->second);
} }
if (CanBeHLEProgram(ci.hash) && !Settings::values.disable_macro_hle) { if (CanBeHLEProgram(ci.hash) && !Settings::values.disable_macro_hle) {
@ -1431,10 +1427,10 @@ void MacroEngine::Execute(Engines::Maxwell3D& maxwell3d, u32 method, std::span<c
} }
} }
AnyCachedMacro MacroEngine::Compile(Engines::Maxwell3D& maxwell3d, std::span<const u32> code) {
AnyCachedMacro MacroEngine::Compile(Core::System& system, Engines::Maxwell3D& maxwell3d, std::span<const u32> code) {
#ifdef ARCHITECTURE_x86_64 #ifdef ARCHITECTURE_x86_64
if (!is_interpreted) if (!is_interpreted)
return std::make_unique<MacroJITx64Impl>(code);
return std::make_unique<MacroJITx64Impl>(system, code);
#endif #endif
return MacroInterpreterImpl(code); return MacroInterpreterImpl(code);
} }

50
src/video_core/macro.h

@ -14,6 +14,10 @@
#include "common/bit_field.h" #include "common/bit_field.h"
#include "common/common_types.h" #include "common/common_types.h"
namespace Core {
class System;
}
namespace Tegra { namespace Tegra {
namespace Engines { namespace Engines {
@ -106,61 +110,61 @@ struct HLEMacro {
/// also assigning the base vertex/instance. /// also assigning the base vertex/instance.
struct HLE_DrawArraysIndirect final { struct HLE_DrawArraysIndirect final {
HLE_DrawArraysIndirect(bool extended_) noexcept : extended{extended_} {} HLE_DrawArraysIndirect(bool extended_) noexcept : extended{extended_} {}
void Execute(Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method);
void Fallback(Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters);
void Execute(Core::System& system, Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method);
void Fallback(Core::System& system, Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters);
bool extended; bool extended;
}; };
/// @note: these macros have two versions, a normal and extended version, with the extended version /// @note: these macros have two versions, a normal and extended version, with the extended version
/// also assigning the base vertex/instance. /// also assigning the base vertex/instance.
struct HLE_DrawIndexedIndirect final { struct HLE_DrawIndexedIndirect final {
explicit HLE_DrawIndexedIndirect(bool extended_) noexcept : extended{extended_} {} explicit HLE_DrawIndexedIndirect(bool extended_) noexcept : extended{extended_} {}
void Execute(Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method);
void Fallback(Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters);
void Execute(Core::System& system, Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method);
void Fallback(Core::System& system, Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters);
bool extended; bool extended;
}; };
struct HLE_MultiLayerClear final { struct HLE_MultiLayerClear final {
void Execute(Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method);
void Execute(Core::System& system, Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method);
}; };
struct HLE_MultiDrawIndexedIndirectCount final { struct HLE_MultiDrawIndexedIndirectCount final {
void Execute(Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method);
void Fallback(Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters);
void Execute(Core::System& system, Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method);
void Fallback(Core::System& system, Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters);
}; };
struct HLE_DrawIndirectByteCount final { struct HLE_DrawIndirectByteCount final {
void Execute(Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method);
void Fallback(Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters);
void Execute(Core::System& system, Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method);
void Fallback(Core::System& system, Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters);
}; };
struct HLE_C713C83D8F63CCF3 final { struct HLE_C713C83D8F63CCF3 final {
void Execute(Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method);
void Execute(Core::System& system, Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method);
}; };
struct HLE_D7333D26E0A93EDE final { struct HLE_D7333D26E0A93EDE final {
void Execute(Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method);
void Execute(Core::System& system, Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method);
}; };
struct HLE_BindShader final { struct HLE_BindShader final {
void Execute(Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method);
void Execute(Core::System& system, Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method);
}; };
struct HLE_SetRasterBoundingBox final { struct HLE_SetRasterBoundingBox final {
void Execute(Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method);
void Execute(Core::System& system, Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method);
}; };
struct HLE_ClearConstBuffer final { struct HLE_ClearConstBuffer final {
HLE_ClearConstBuffer(size_t base_size_) noexcept : base_size{base_size_} {} HLE_ClearConstBuffer(size_t base_size_) noexcept : base_size{base_size_} {}
void Execute(Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method);
void Execute(Core::System& system, Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method);
size_t base_size; size_t base_size;
}; };
struct HLE_ClearMemory final { struct HLE_ClearMemory final {
void Execute(Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method);
void Execute(Core::System& system, Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method);
std::vector<u32> zero_memory; std::vector<u32> zero_memory;
}; };
struct HLE_TransformFeedbackSetup final { struct HLE_TransformFeedbackSetup final {
void Execute(Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method);
void Execute(Core::System& system, Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method);
}; };
struct MacroInterpreterImpl final { struct MacroInterpreterImpl final {
MacroInterpreterImpl() {} MacroInterpreterImpl() {}
MacroInterpreterImpl(std::span<const u32> code_) : code{code_} {} MacroInterpreterImpl(std::span<const u32> code_) : code{code_} {}
void Execute(Engines::Maxwell3D& maxwell3d, std::span<const u32> params, u32 method);
void Execute(Core::System& system, Engines::Maxwell3D& maxwell3d, std::span<const u32> params, u32 method);
void Reset(); void Reset();
bool Step(Engines::Maxwell3D& maxwell3d, bool is_delay_slot);
bool Step(Core::System& system, Engines::Maxwell3D& maxwell3d, bool is_delay_slot);
u32 GetALUResult(Macro::ALUOperation operation, u32 src_a, u32 src_b); u32 GetALUResult(Macro::ALUOperation operation, u32 src_a, u32 src_b);
void ProcessResult(Engines::Maxwell3D& maxwell3d, Macro::ResultOperation operation, u32 reg, u32 result);
void ProcessResult(Core::System& system, Engines::Maxwell3D& maxwell3d, Macro::ResultOperation operation, u32 reg, u32 result);
bool EvaluateBranchCondition(Macro::BranchCondition cond, u32 value) const; bool EvaluateBranchCondition(Macro::BranchCondition cond, u32 value) const;
Macro::Opcode GetOpcode() const; Macro::Opcode GetOpcode() const;
u32 GetRegister(u32 register_id) const; u32 GetRegister(u32 register_id) const;
@ -169,7 +173,7 @@ struct MacroInterpreterImpl final {
[[nodiscard]] inline void SetMethodAddress(u32 address) noexcept { [[nodiscard]] inline void SetMethodAddress(u32 address) noexcept {
method_address.raw = address; method_address.raw = address;
} }
void Send(Engines::Maxwell3D& maxwell3d, u32 value);
void Send(Core::System& system, Engines::Maxwell3D& maxwell3d, u32 value);
u32 Read(Engines::Maxwell3D& maxwell3d, u32 method) const; u32 Read(Engines::Maxwell3D& maxwell3d, u32 method) const;
u32 FetchParameter(); u32 FetchParameter();
/// General purpose macro registers. /// General purpose macro registers.
@ -192,7 +196,7 @@ struct DynamicCachedMacro {
/// Executes the macro code with the specified input parameters. /// Executes the macro code with the specified input parameters.
/// @param parameters The parameters of the macro /// @param parameters The parameters of the macro
/// @param method The method to execute /// @param method The method to execute
virtual void Execute(Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, u32 method) = 0;
virtual void Execute(Core::System& system, Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, u32 method) = 0;
}; };
using AnyCachedMacro = std::variant< using AnyCachedMacro = std::variant<
@ -227,8 +231,8 @@ struct MacroEngine {
uploaded_macro_code.erase(method); uploaded_macro_code.erase(method);
} }
// Compiles the macro if its not in the cache, and executes the compiled macro // Compiles the macro if its not in the cache, and executes the compiled macro
void Execute(Engines::Maxwell3D& maxwell3d, u32 method, std::span<const u32> parameters);
AnyCachedMacro Compile(Engines::Maxwell3D& maxwell3d, std::span<const u32> code);
void Execute(Core::System& system, Engines::Maxwell3D& maxwell3d, u32 method, std::span<const u32> parameters);
AnyCachedMacro Compile(Core::System& system, Engines::Maxwell3D& maxwell3d, std::span<const u32> code);
struct CacheInfo { struct CacheInfo {
AnyCachedMacro program; AnyCachedMacro program;
u64 hash{}; u64 hash{};

Loading…
Cancel
Save