From 435483f7c12f4c08df0bb119ab7980587f5084bd Mon Sep 17 00:00:00 2001
From: Gamer64 <76565986+Gamer64ytb@users.noreply.github.com>
Date: Thu, 24 Jul 2025 00:40:58 +0200
Subject: [PATCH] [GPU]: Implement Fast GPU Path
---
.../features/settings/model/BooleanSetting.kt | 1 +
.../settings/model/view/SettingsItem.kt | 7 ++++
.../settings/ui/SettingsFragmentPresenter.kt | 1 +
.../app/src/main/res/values/strings.xml | 2 ++
src/common/settings.h | 7 ++++
src/video_core/gpu.cpp | 33 ++++++++++++++++++-
src/yuzu/configuration/shared_translation.cpp | 5 +++
7 files changed, 55 insertions(+), 1 deletion(-)
diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/BooleanSetting.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/BooleanSetting.kt
index 92a49a1de7..ec2984e434 100644
--- a/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/BooleanSetting.kt
+++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/BooleanSetting.kt
@@ -17,6 +17,7 @@ enum class BooleanSetting(override val key: String) : AbstractBooleanSetting {
RENDERER_USE_SPEED_LIMIT("use_speed_limit"),
USE_FAST_CPU_TIME("use_fast_cpu_time"),
USE_CUSTOM_CPU_TICKS("use_custom_cpu_ticks"),
+ FAST_GPU_PATH("fast_gpu_path"),
SKIP_CPU_INNER_INVALIDATION("skip_cpu_inner_invalidation"),
USE_DOCKED_MODE("use_docked_mode"),
USE_AUTO_STUB("use_auto_stub"),
diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/view/SettingsItem.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/view/SettingsItem.kt
index d4335ddcd8..589efd5c58 100644
--- a/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/view/SettingsItem.kt
+++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/view/SettingsItem.kt
@@ -652,6 +652,13 @@ abstract class SettingsItem(
max = 65535
)
)
+ put(
+ SwitchSetting(
+ BooleanSetting.FAST_GPU_PATH,
+ titleId = R.string.fast_gpu_path,
+ descriptionId = R.string.fast_gpu_path_description
+ )
+ )
put(
SwitchSetting(
BooleanSetting.SKIP_CPU_INNER_INVALIDATION,
diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/ui/SettingsFragmentPresenter.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/ui/SettingsFragmentPresenter.kt
index 8555b334ee..adabf67744 100644
--- a/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/ui/SettingsFragmentPresenter.kt
+++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/ui/SettingsFragmentPresenter.kt
@@ -464,6 +464,7 @@ class SettingsFragmentPresenter(
add(IntSetting.FAST_CPU_TIME.key)
add(BooleanSetting.USE_CUSTOM_CPU_TICKS.key)
add(IntSetting.CPU_TICKS.key)
+ add(BooleanSetting.FAST_GPU_PATH.key)
add(BooleanSetting.SKIP_CPU_INNER_INVALIDATION.key)
add(BooleanSetting.USE_LRU_CACHE.key)
add(BooleanSetting.CORE_SYNC_CORE_SPEED.key)
diff --git a/src/android/app/src/main/res/values/strings.xml b/src/android/app/src/main/res/values/strings.xml
index c78487e327..a73f0a1a15 100644
--- a/src/android/app/src/main/res/values/strings.xml
+++ b/src/android/app/src/main/res/values/strings.xml
@@ -101,6 +101,8 @@
Custom CPU Ticks
Set a custom value of CPU ticks. Higher values can increase performance, but may also cause the game to freeze. A range of 77–21000 is recommended.
Ticks
+ Fast GPU Path
+ Bypasses all CPU–GPU synchronization and fence handling, reducing overhead and improving the performance. This may cause glitches or crashes on some games.
Skip CPU Inner Invalidation
Skips certain CPU-side cache invalidations during memory updates, reducing CPU usage and improving it\'s performance. This may cause glitches or crashes on some games.
CPU Clock
diff --git a/src/common/settings.h b/src/common/settings.h
index e3c2bd57cc..9ac06e526e 100644
--- a/src/common/settings.h
+++ b/src/common/settings.h
@@ -450,6 +450,13 @@ struct Values {
VramUsageMode::Aggressive,
"vram_usage_mode",
Category::RendererAdvanced};
+ SwitchableSetting fast_gpu_path{linkage,
+ false,
+ "fast_gpu_path",
+ Category::RendererAdvanced,
+ Specialization::Default,
+ true,
+ true};
SwitchableSetting skip_cpu_inner_invalidation{linkage,
true,
"skip_cpu_inner_invalidation",
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 7c34005a12..e99ead284c 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -40,7 +40,8 @@ struct GPU::Impl {
explicit Impl(GPU& gpu_, Core::System& system_, bool is_async_, bool use_nvdec_)
: gpu{gpu_}, system{system_}, host1x{system.Host1x()}, use_nvdec{use_nvdec_},
shader_notify{std::make_unique()}, is_async{is_async_},
- gpu_thread{system_, is_async_}, scheduler{std::make_unique(gpu)} {}
+ gpu_thread{system_, is_async_}, scheduler{std::make_unique(gpu)},
+ fast_path{Settings::values.fast_gpu_path.GetValue()} {}
~Impl() = default;
@@ -110,6 +111,11 @@ struct GPU::Impl {
/// Request a host GPU memory flush from the CPU.
template
[[nodiscard]] u64 RequestSyncOperation(Func&& action) {
+ if (fast_path) {
+ // Just bump the fence counter, but do NOT enqueue
+ return ++last_sync_fence;
+ }
+
std::unique_lock lck{sync_request_mutex};
const u64 fence = ++last_sync_fence;
sync_requests.emplace_back(action);
@@ -122,12 +128,25 @@ struct GPU::Impl {
}
void WaitForSyncOperation(const u64 fence) {
+ if (fast_path) {
+ // Never block
+ return;
+ }
+
std::unique_lock lck{sync_request_mutex};
sync_request_cv.wait(lck, [this, fence] { return CurrentSyncRequestFence() >= fence; });
}
/// Tick pending requests within the GPU.
void TickWork() {
+ if (fast_path) {
+ // Drop all pending requests in one go
+ sync_requests.clear();
+ current_sync_fence.store(last_sync_fence, std::memory_order_relaxed);
+ sync_request_cv.notify_all();
+ return;
+ }
+
std::unique_lock lck{sync_request_mutex};
while (!sync_requests.empty()) {
auto request = std::move(sync_requests.front());
@@ -289,6 +308,11 @@ struct GPU::Impl {
void RequestComposite(std::vector&& layers,
std::vector&& fences) {
+ if (fast_path) {
+ renderer->Composite(layers);
+ return;
+ }
+
size_t num_fences{fences.size()};
size_t current_request_counter{};
{
@@ -327,6 +351,10 @@ struct GPU::Impl {
}
std::vector GetAppletCaptureBuffer() {
+ if (fast_path) {
+ return renderer->GetAppletCaptureBuffer();
+ }
+
std::vector out;
const auto wait_fence =
@@ -372,6 +400,9 @@ struct GPU::Impl {
std::unique_ptr cpu_context;
std::unique_ptr scheduler;
+
+ const bool fast_path;
+
std::unordered_map> channels;
Tegra::Control::ChannelState* current_channel;
s32 bound_channel{-1};
diff --git a/src/yuzu/configuration/shared_translation.cpp b/src/yuzu/configuration/shared_translation.cpp
index 770a16a481..9af0b71210 100644
--- a/src/yuzu/configuration/shared_translation.cpp
+++ b/src/yuzu/configuration/shared_translation.cpp
@@ -250,6 +250,11 @@ std::unique_ptr InitializeTranslations(QWidget* parent)
"of available video memory for performance. Has no effect on integrated graphics. "
"Aggressive mode may severely impact the performance of other applications such as "
"recording software."));
+ INSERT(Settings,
+ fast_gpu_path,
+ tr("Fast GPU Path"),
+ tr("Bypasses all CPU–GPU synchronization and fence handling, reducing overhead and improving "
+ "the performance. This may cause glitches or crashes on some games."));
INSERT(Settings,
skip_cpu_inner_invalidation,
tr("Skip CPU Inner Invalidation"),