From 5c1a672921e641077e4b1f2fe301dbb9bf64d78a Mon Sep 17 00:00:00 2001 From: lizzie Date: Wed, 14 Jan 2026 00:01:28 +0000 Subject: [PATCH] dma pusher fixups --- src/common/fiber.cpp | 4 +- src/video_core/dma_pusher.cpp | 78 ++++++++++----------------- src/video_core/engines/maxwell_3d.cpp | 6 +-- 3 files changed, 31 insertions(+), 57 deletions(-) diff --git a/src/common/fiber.cpp b/src/common/fiber.cpp index ced7cd1a34..ea3da3d053 100644 --- a/src/common/fiber.cpp +++ b/src/common/fiber.cpp @@ -47,11 +47,11 @@ Fiber::Fiber(std::function&& entry_point_func) : impl{std::make_uniqueentry_point = std::move(entry_point_func); impl->stack_limit = impl->stack.data(); impl->rewind_stack_limit = impl->rewind_stack.data(); - u8* stack_base = impl->stack_limit + default_stack_size; + u8* stack_base = impl->stack_limit + DEFAULT_STACK_SIZE; impl->context = boost::context::detail::make_fcontext(stack_base, impl->stack.size(), [](boost::context::detail::transfer_t transfer) -> void { auto* fiber = static_cast(transfer.data); ASSERT(fiber && fiber->impl && fiber->impl->previous_fiber && fiber->impl->previous_fiber->impl); - ASSERT(fiber->canary == CANARY_VALUE); + ASSERT(fiber->impl->canary == CANARY_VALUE); fiber->impl->previous_fiber->impl->context = transfer.fctx; fiber->impl->previous_fiber->impl->guard.unlock(); fiber->impl->previous_fiber.reset(); diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp index 19f8d32ebb..521701a226 100644 --- a/src/video_core/dma_pusher.cpp +++ b/src/video_core/dma_pusher.cpp @@ -111,46 +111,25 @@ bool DmaPusher::Step() { } void DmaPusher::ProcessCommands(std::span commands) { - constexpr std::size_t BATCH_SIZE = 16; - const std::size_t total_commands = commands.size(); - - for (std::size_t index = 0; index < total_commands;) { - const std::size_t prefetch_index = index + BATCH_SIZE; - if (prefetch_index < total_commands) { -#ifdef _MSC_VER - _mm_prefetch(reinterpret_cast(&commands[prefetch_index]), _MM_HINT_T0); -#else - __builtin_prefetch(&commands[prefetch_index], 0, 1); -#endif - } - - const CommandHeader& command_header = commands[index]; - - if (dma_state.method_count) { + for (size_t index = 0; index < commands.size();) { + auto const command_header = commands[index]; + if (dma_state.method_count && dma_state.non_incrementing) { // Data word of methods command - dma_state.dma_word_offset = static_cast(index * sizeof(u32)); - if (dma_state.non_incrementing) { - const u32 max_write = static_cast( - std::min(index + dma_state.method_count, total_commands) - index); - CallMultiMethod(&command_header.argument, max_write); - dma_state.method_count -= max_write; - dma_state.is_last_call = true; - index += max_write; - continue; - } else { - dma_state.is_last_call = dma_state.method_count <= 1; - CallMethod(command_header.argument); - } - - if (!dma_state.non_incrementing) { - dma_state.method++; - } - - if (dma_increment_once) { - dma_state.non_incrementing = true; - } - - dma_state.method_count--; + dma_state.dma_word_offset = u32(index * sizeof(u32)); + const u32 max_write = u32(std::min(index + dma_state.method_count, commands.size()) - index); + CallMultiMethod(&command_header.argument, max_write); + dma_state.method_count -= max_write; + dma_state.is_last_call = true; + index += max_write; + } else if (dma_state.method_count) { + // Data word of methods command + dma_state.dma_word_offset = u32(index * sizeof(u32)); + dma_state.is_last_call = dma_state.method_count <= 1; + CallMethod(command_header.argument); + dma_state.method += size_t(!dma_state.non_incrementing); + dma_state.non_incrementing = dma_increment_once; + --dma_state.method_count; + ++index; } else { // No command active - this is the first word of a new one switch (command_header.mode) { @@ -167,8 +146,7 @@ void DmaPusher::ProcessCommands(std::span commands) { case SubmissionMode::Inline: dma_state.method = command_header.method; dma_state.subchannel = command_header.subchannel; - dma_state.dma_word_offset = static_cast( - -static_cast(dma_state.dma_get)); // negate to set address as 0 + dma_state.dma_word_offset = u64(-s64(dma_state.dma_get)); // negate to set address as 0 CallMethod(command_header.arg_count); dma_state.non_incrementing = true; dma_increment_once = false; @@ -181,8 +159,8 @@ void DmaPusher::ProcessCommands(std::span commands) { default: break; } + ++index; } - index++; } } @@ -202,26 +180,24 @@ void DmaPusher::CallMethod(u32 argument) const { }); } else { auto subchannel = subchannels[dma_state.subchannel]; - if (!subchannel->execution_mask[dma_state.method]) [[likely]] { + if (!subchannel->execution_mask[dma_state.method]) { subchannel->method_sink.emplace_back(dma_state.method, argument); - return; + } else { + subchannel->ConsumeSink(); + subchannel->current_dma_segment = dma_state.dma_get + dma_state.dma_word_offset; + subchannel->CallMethod(dma_state.method, argument, dma_state.is_last_call); } - subchannel->ConsumeSink(); - subchannel->current_dma_segment = dma_state.dma_get + dma_state.dma_word_offset; - subchannel->CallMethod(dma_state.method, argument, dma_state.is_last_call); } } void DmaPusher::CallMultiMethod(const u32* base_start, u32 num_methods) const { if (dma_state.method < non_puller_methods) { - puller.CallMultiMethod(dma_state.method, dma_state.subchannel, base_start, num_methods, - dma_state.method_count); + puller.CallMultiMethod(dma_state.method, dma_state.subchannel, base_start, num_methods, dma_state.method_count); } else { auto subchannel = subchannels[dma_state.subchannel]; subchannel->ConsumeSink(); subchannel->current_dma_segment = dma_state.dma_get + dma_state.dma_word_offset; - subchannel->CallMultiMethod(dma_state.method, base_start, num_methods, - dma_state.method_count); + subchannel->CallMultiMethod(dma_state.method, base_start, num_methods, dma_state.method_count); } } diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 98e4d8c8b9..ec5a57f404 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -425,9 +425,7 @@ void Maxwell3D::CallMethod(u32 method, u32 method_argument, bool is_last_call) { return; } - ASSERT_MSG(method < Regs::NUM_REGS, - "Invalid Maxwell3D register, increase the size of the Regs structure"); - + ASSERT(method < Regs::NUM_REGS && "Invalid Maxwell3D register, increase the size of the Regs structure"); const u32 argument = ProcessShadowRam(method, method_argument); ProcessDirtyRegisters(method, argument); ProcessMethodCall(method, argument, method_argument, is_last_call); @@ -668,7 +666,7 @@ Texture::TSCEntry Maxwell3D::GetTSCEntry(u32 tsc_index) const { } u32 Maxwell3D::GetRegisterValue(u32 method) const { - ASSERT_MSG(method < Regs::NUM_REGS, "Invalid Maxwell3D register"); + ASSERT(method < Regs::NUM_REGS && "Invalid Maxwell3D register"); return regs.reg_array[method]; }