From 0a647f6d85dfef190331dac0fa9820ed35b7d46b Mon Sep 17 00:00:00 2001 From: MaranBr Date: Sun, 17 Aug 2025 19:42:58 -0400 Subject: [PATCH] Add checkbox to enable accurate barrier between compute and memory operations --- src/common/settings.h | 2 +- src/video_core/dma_pusher.cpp | 59 ++++++++++++------- src/video_core/dma_pusher.h | 16 ++++- src/yuzu/configuration/shared_translation.cpp | 2 + 4 files changed, 54 insertions(+), 25 deletions(-) diff --git a/src/common/settings.h b/src/common/settings.h index faf7210f5d..1d44d6e651 100644 --- a/src/common/settings.h +++ b/src/common/settings.h @@ -466,7 +466,7 @@ struct Values { true, true}; #endif - + SwitchableSetting enable_accurate_barrier{linkage, false, "enable_accurate_barrier", Category::RendererAdvanced}; SwitchableSetting async_presentation{linkage, #ifdef ANDROID true, diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp index c2c07deb9b..38a79b1304 100644 --- a/src/video_core/dma_pusher.cpp +++ b/src/video_core/dma_pusher.cpp @@ -1,10 +1,9 @@ // SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later -// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project +// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later -#include "common/cityhash.h" #include "common/settings.h" #include "core/core.h" #include "video_core/dma_pusher.h" @@ -12,11 +11,13 @@ #include "video_core/gpu.h" #include "video_core/guest_memory.h" #include "video_core/memory_manager.h" +#include "video_core/rasterizer_interface.h" namespace Tegra { constexpr u32 MacroRegistersStart = 0xE00; -constexpr u32 ComputeInline = 0x6D; +[[maybe_unused]] constexpr u32 ComputeInline = 0x6D; + //start on PR#76 of Eden this is a unused variable in android (need to investigate) // Dummy function that uses ComputeInline @@ -27,11 +28,12 @@ constexpr void UseComputeInline() { DmaPusher::DmaPusher(Core::System& system_, GPU& gpu_, MemoryManager& memory_manager_, Control::ChannelState& channel_state_) : gpu{gpu_}, system{system_}, memory_manager{memory_manager_}, puller{gpu_, memory_manager_, - *this, channel_state_} {} + *this, channel_state_}, signal_sync{false}, synced{false} {} DmaPusher::~DmaPusher() = default; void DmaPusher::DispatchCalls() { + dma_pushbuffer_subindex = 0; dma_state.is_last_call = true; @@ -69,14 +71,16 @@ bool DmaPusher::Step() { } else { const CommandListHeader command_list_header{ command_list.command_lists[dma_pushbuffer_subindex++]}; - dma_state.dma_get = command_list_header.addr; - if (dma_pushbuffer_subindex >= command_list.command_lists.size()) { - // We've gone through the current list, remove it from the queue - dma_pushbuffer.pop(); - dma_pushbuffer_subindex = 0; + if (signal_sync) { + std::unique_lock lk(sync_mutex); + sync_cv.wait(lk, [this]() { return synced; }); + signal_sync = false; + synced = false; } + dma_state.dma_get = command_list_header.addr; + if (command_list_header.size == 0) { return true; } @@ -102,23 +106,33 @@ bool DmaPusher::Step() { &command_headers); ProcessCommands(headers); }; - - // Only use unsafe reads for non-compute macro operations if (Settings::IsGPULevelHigh()) { - const bool is_compute = (subchannel_type[dma_state.subchannel] == - Engines::EngineTypes::KeplerCompute); - - if (dma_state.method >= MacroRegistersStart && !is_compute) { + if (dma_state.method >= MacroRegistersStart) { unsafe_process(); - return true; - } + } else if (subchannel_type[dma_state.subchannel] == Engines::EngineTypes::KeplerCompute && dma_state.method == ComputeInline) { + unsafe_process(); + } else { + safe_process(); + } + } else { + unsafe_process(); + } - // Always use safe reads for compute operations - safe_process(); - return true; + if (dma_pushbuffer_subindex >= command_list.command_lists.size()) { + // We've gone through the current list, remove it from the queue + dma_pushbuffer.pop(); + dma_pushbuffer_subindex = 0; + } else if (command_list.command_lists[dma_pushbuffer_subindex].sync && Settings::values.enable_accurate_barrier.GetValue()) { + signal_sync = true; } - unsafe_process(); + if (signal_sync) { + rasterizer->SignalFence([this]() { + std::scoped_lock lk(sync_mutex); + synced = true; + sync_cv.notify_all(); + }); + } } return true; } @@ -226,7 +240,8 @@ void DmaPusher::CallMultiMethod(const u32* base_start, u32 num_methods) const { } } -void DmaPusher::BindRasterizer(VideoCore::RasterizerInterface* rasterizer) { +void DmaPusher::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) { + rasterizer = rasterizer_; puller.BindRasterizer(rasterizer); } diff --git a/src/video_core/dma_pusher.h b/src/video_core/dma_pusher.h index e46a8fa5c6..f850513603 100644 --- a/src/video_core/dma_pusher.h +++ b/src/video_core/dma_pusher.h @@ -1,9 +1,13 @@ -// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + +// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later #pragma once #include +#include #include #include #include @@ -74,8 +78,10 @@ struct CommandListHeader { union { u64 raw; BitField<0, 40, GPUVAddr> addr; - BitField<41, 1, u64> is_non_main; + BitField<40, 1, u64> allow_flush; + BitField<41, 1, u64> is_push_buffer; BitField<42, 21, u64> size; + BitField<63, 1, u64> sync; }; }; static_assert(sizeof(CommandListHeader) == sizeof(u64), "CommandListHeader is incorrect size"); @@ -178,6 +184,12 @@ private: Core::System& system; MemoryManager& memory_manager; mutable Engines::Puller puller; + + VideoCore::RasterizerInterface* rasterizer; + bool signal_sync; + bool synced; + std::mutex sync_mutex; + std::condition_variable sync_cv; }; } // namespace Tegra diff --git a/src/yuzu/configuration/shared_translation.cpp b/src/yuzu/configuration/shared_translation.cpp index b9884e9442..561c490c2a 100644 --- a/src/yuzu/configuration/shared_translation.cpp +++ b/src/yuzu/configuration/shared_translation.cpp @@ -269,6 +269,8 @@ std::unique_ptr InitializeTranslations(QWidget* parent) INSERT(Settings, bg_blue, QString(), QString()); // Renderer (Advanced Graphics) + INSERT(Settings, enable_accurate_barrier, tr("Enable accurate barrier"), + tr("Ensures data consistency between compute and memory operations.\nThis option should fix issues in some games, but may reduce performance in some cases.")); INSERT(Settings, async_presentation, tr("Enable asynchronous presentation (Vulkan only)"),