From 435483f7c12f4c08df0bb119ab7980587f5084bd Mon Sep 17 00:00:00 2001 From: Gamer64 <76565986+Gamer64ytb@users.noreply.github.com> Date: Thu, 24 Jul 2025 00:40:58 +0200 Subject: [PATCH] [GPU]: Implement Fast GPU Path --- .../features/settings/model/BooleanSetting.kt | 1 + .../settings/model/view/SettingsItem.kt | 7 ++++ .../settings/ui/SettingsFragmentPresenter.kt | 1 + .../app/src/main/res/values/strings.xml | 2 ++ src/common/settings.h | 7 ++++ src/video_core/gpu.cpp | 33 ++++++++++++++++++- src/yuzu/configuration/shared_translation.cpp | 5 +++ 7 files changed, 55 insertions(+), 1 deletion(-) diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/BooleanSetting.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/BooleanSetting.kt index 92a49a1de7..ec2984e434 100644 --- a/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/BooleanSetting.kt +++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/BooleanSetting.kt @@ -17,6 +17,7 @@ enum class BooleanSetting(override val key: String) : AbstractBooleanSetting { RENDERER_USE_SPEED_LIMIT("use_speed_limit"), USE_FAST_CPU_TIME("use_fast_cpu_time"), USE_CUSTOM_CPU_TICKS("use_custom_cpu_ticks"), + FAST_GPU_PATH("fast_gpu_path"), SKIP_CPU_INNER_INVALIDATION("skip_cpu_inner_invalidation"), USE_DOCKED_MODE("use_docked_mode"), USE_AUTO_STUB("use_auto_stub"), diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/view/SettingsItem.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/view/SettingsItem.kt index d4335ddcd8..589efd5c58 100644 --- a/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/view/SettingsItem.kt +++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/view/SettingsItem.kt @@ -652,6 +652,13 @@ abstract class SettingsItem( max = 65535 ) ) + put( + SwitchSetting( + BooleanSetting.FAST_GPU_PATH, + titleId = R.string.fast_gpu_path, + descriptionId = R.string.fast_gpu_path_description + ) + ) put( SwitchSetting( BooleanSetting.SKIP_CPU_INNER_INVALIDATION, diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/ui/SettingsFragmentPresenter.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/ui/SettingsFragmentPresenter.kt index 8555b334ee..adabf67744 100644 --- a/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/ui/SettingsFragmentPresenter.kt +++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/ui/SettingsFragmentPresenter.kt @@ -464,6 +464,7 @@ class SettingsFragmentPresenter( add(IntSetting.FAST_CPU_TIME.key) add(BooleanSetting.USE_CUSTOM_CPU_TICKS.key) add(IntSetting.CPU_TICKS.key) + add(BooleanSetting.FAST_GPU_PATH.key) add(BooleanSetting.SKIP_CPU_INNER_INVALIDATION.key) add(BooleanSetting.USE_LRU_CACHE.key) add(BooleanSetting.CORE_SYNC_CORE_SPEED.key) diff --git a/src/android/app/src/main/res/values/strings.xml b/src/android/app/src/main/res/values/strings.xml index c78487e327..a73f0a1a15 100644 --- a/src/android/app/src/main/res/values/strings.xml +++ b/src/android/app/src/main/res/values/strings.xml @@ -101,6 +101,8 @@ Custom CPU Ticks Set a custom value of CPU ticks. Higher values can increase performance, but may also cause the game to freeze. A range of 77–21000 is recommended. Ticks + Fast GPU Path + Bypasses all CPU–GPU synchronization and fence handling, reducing overhead and improving the performance. This may cause glitches or crashes on some games. Skip CPU Inner Invalidation Skips certain CPU-side cache invalidations during memory updates, reducing CPU usage and improving it\'s performance. This may cause glitches or crashes on some games. CPU Clock diff --git a/src/common/settings.h b/src/common/settings.h index e3c2bd57cc..9ac06e526e 100644 --- a/src/common/settings.h +++ b/src/common/settings.h @@ -450,6 +450,13 @@ struct Values { VramUsageMode::Aggressive, "vram_usage_mode", Category::RendererAdvanced}; + SwitchableSetting fast_gpu_path{linkage, + false, + "fast_gpu_path", + Category::RendererAdvanced, + Specialization::Default, + true, + true}; SwitchableSetting skip_cpu_inner_invalidation{linkage, true, "skip_cpu_inner_invalidation", diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 7c34005a12..e99ead284c 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -40,7 +40,8 @@ struct GPU::Impl { explicit Impl(GPU& gpu_, Core::System& system_, bool is_async_, bool use_nvdec_) : gpu{gpu_}, system{system_}, host1x{system.Host1x()}, use_nvdec{use_nvdec_}, shader_notify{std::make_unique()}, is_async{is_async_}, - gpu_thread{system_, is_async_}, scheduler{std::make_unique(gpu)} {} + gpu_thread{system_, is_async_}, scheduler{std::make_unique(gpu)}, + fast_path{Settings::values.fast_gpu_path.GetValue()} {} ~Impl() = default; @@ -110,6 +111,11 @@ struct GPU::Impl { /// Request a host GPU memory flush from the CPU. template [[nodiscard]] u64 RequestSyncOperation(Func&& action) { + if (fast_path) { + // Just bump the fence counter, but do NOT enqueue + return ++last_sync_fence; + } + std::unique_lock lck{sync_request_mutex}; const u64 fence = ++last_sync_fence; sync_requests.emplace_back(action); @@ -122,12 +128,25 @@ struct GPU::Impl { } void WaitForSyncOperation(const u64 fence) { + if (fast_path) { + // Never block + return; + } + std::unique_lock lck{sync_request_mutex}; sync_request_cv.wait(lck, [this, fence] { return CurrentSyncRequestFence() >= fence; }); } /// Tick pending requests within the GPU. void TickWork() { + if (fast_path) { + // Drop all pending requests in one go + sync_requests.clear(); + current_sync_fence.store(last_sync_fence, std::memory_order_relaxed); + sync_request_cv.notify_all(); + return; + } + std::unique_lock lck{sync_request_mutex}; while (!sync_requests.empty()) { auto request = std::move(sync_requests.front()); @@ -289,6 +308,11 @@ struct GPU::Impl { void RequestComposite(std::vector&& layers, std::vector&& fences) { + if (fast_path) { + renderer->Composite(layers); + return; + } + size_t num_fences{fences.size()}; size_t current_request_counter{}; { @@ -327,6 +351,10 @@ struct GPU::Impl { } std::vector GetAppletCaptureBuffer() { + if (fast_path) { + return renderer->GetAppletCaptureBuffer(); + } + std::vector out; const auto wait_fence = @@ -372,6 +400,9 @@ struct GPU::Impl { std::unique_ptr cpu_context; std::unique_ptr scheduler; + + const bool fast_path; + std::unordered_map> channels; Tegra::Control::ChannelState* current_channel; s32 bound_channel{-1}; diff --git a/src/yuzu/configuration/shared_translation.cpp b/src/yuzu/configuration/shared_translation.cpp index 770a16a481..9af0b71210 100644 --- a/src/yuzu/configuration/shared_translation.cpp +++ b/src/yuzu/configuration/shared_translation.cpp @@ -250,6 +250,11 @@ std::unique_ptr InitializeTranslations(QWidget* parent) "of available video memory for performance. Has no effect on integrated graphics. " "Aggressive mode may severely impact the performance of other applications such as " "recording software.")); + INSERT(Settings, + fast_gpu_path, + tr("Fast GPU Path"), + tr("Bypasses all CPU–GPU synchronization and fence handling, reducing overhead and improving " + "the performance. This may cause glitches or crashes on some games.")); INSERT(Settings, skip_cpu_inner_invalidation, tr("Skip CPU Inner Invalidation"),