9 changed files with 358 additions and 15 deletions
-
2src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
-
8src/core/memory.cpp
-
2src/video_core/CMakeLists.txt
-
2src/video_core/engines/kepler_memory.cpp
-
4src/video_core/engines/maxwell_dma.cpp
-
44src/video_core/gpu.cpp
-
22src/video_core/gpu.h
-
154src/video_core/gpu_thread.cpp
-
135src/video_core/gpu_thread.h
@ -0,0 +1,154 @@ |
|||
// Copyright 2019 yuzu Emulator Project
|
|||
// Licensed under GPLv2 or any later version
|
|||
// Refer to the license.txt file included.
|
|||
|
|||
#include "common/assert.h"
|
|||
#include "common/microprofile.h"
|
|||
#include "core/frontend/scope_acquire_window_context.h"
|
|||
#include "core/settings.h"
|
|||
#include "video_core/dma_pusher.h"
|
|||
#include "video_core/gpu.h"
|
|||
#include "video_core/gpu_thread.h"
|
|||
#include "video_core/renderer_base.h"
|
|||
|
|||
namespace VideoCommon::GPUThread { |
|||
|
|||
/// Executes a single GPU thread command
|
|||
static void ExecuteCommand(CommandData* command, VideoCore::RendererBase& renderer, |
|||
Tegra::DmaPusher& dma_pusher) { |
|||
if (const auto submit_list = std::get_if<SubmitListCommand>(command)) { |
|||
dma_pusher.Push(std::move(submit_list->entries)); |
|||
dma_pusher.DispatchCalls(); |
|||
} else if (const auto data = std::get_if<SwapBuffersCommand>(command)) { |
|||
renderer.SwapBuffers(data->framebuffer); |
|||
} else if (const auto data = std::get_if<FlushRegionCommand>(command)) { |
|||
renderer.Rasterizer().FlushRegion(data->addr, data->size); |
|||
} else if (const auto data = std::get_if<InvalidateRegionCommand>(command)) { |
|||
renderer.Rasterizer().InvalidateRegion(data->addr, data->size); |
|||
} else if (const auto data = std::get_if<FlushAndInvalidateRegionCommand>(command)) { |
|||
renderer.Rasterizer().FlushAndInvalidateRegion(data->addr, data->size); |
|||
} else { |
|||
UNREACHABLE(); |
|||
} |
|||
} |
|||
|
|||
/// Runs the GPU thread
|
|||
static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher, |
|||
SynchState& state) { |
|||
|
|||
MicroProfileOnThreadCreate("GpuThread"); |
|||
|
|||
auto WaitForWakeup = [&]() { |
|||
std::unique_lock<std::mutex> lock{state.signal_mutex}; |
|||
state.signal_condition.wait(lock, [&] { return !state.IsIdle() || !state.is_running; }); |
|||
}; |
|||
|
|||
// Wait for first GPU command before acquiring the window context
|
|||
WaitForWakeup(); |
|||
|
|||
// If emulation was stopped during disk shader loading, abort before trying to acquire context
|
|||
if (!state.is_running) { |
|||
return; |
|||
} |
|||
|
|||
Core::Frontend::ScopeAcquireWindowContext acquire_context{renderer.GetRenderWindow()}; |
|||
|
|||
while (state.is_running) { |
|||
if (!state.is_running) { |
|||
return; |
|||
} |
|||
|
|||
{ |
|||
// Thread has been woken up, so make the previous write queue the next read queue
|
|||
std::lock_guard<std::mutex> lock{state.signal_mutex}; |
|||
std::swap(state.push_queue, state.pop_queue); |
|||
} |
|||
|
|||
// Execute all of the GPU commands
|
|||
while (!state.pop_queue->empty()) { |
|||
ExecuteCommand(&state.pop_queue->front(), renderer, dma_pusher); |
|||
state.pop_queue->pop(); |
|||
} |
|||
|
|||
// Signal that the GPU thread has finished processing commands
|
|||
if (state.IsIdle()) { |
|||
state.idle_condition.notify_one(); |
|||
} |
|||
|
|||
// Wait for CPU thread to send more GPU commands
|
|||
WaitForWakeup(); |
|||
} |
|||
} |
|||
|
|||
ThreadManager::ThreadManager(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher) |
|||
: renderer{renderer}, dma_pusher{dma_pusher}, thread{RunThread, std::ref(renderer), |
|||
std::ref(dma_pusher), std::ref(state)}, |
|||
thread_id{thread.get_id()} {} |
|||
|
|||
ThreadManager::~ThreadManager() { |
|||
{ |
|||
// Notify GPU thread that a shutdown is pending
|
|||
std::lock_guard<std::mutex> lock{state.signal_mutex}; |
|||
state.is_running = false; |
|||
} |
|||
|
|||
state.signal_condition.notify_one(); |
|||
thread.join(); |
|||
} |
|||
|
|||
void ThreadManager::SubmitList(Tegra::CommandList&& entries) { |
|||
if (entries.empty()) { |
|||
return; |
|||
} |
|||
|
|||
PushCommand(SubmitListCommand(std::move(entries)), false, false); |
|||
} |
|||
|
|||
void ThreadManager::SwapBuffers( |
|||
std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) { |
|||
PushCommand(SwapBuffersCommand(std::move(framebuffer)), true, false); |
|||
} |
|||
|
|||
void ThreadManager::FlushRegion(VAddr addr, u64 size) { |
|||
if (Settings::values.use_accurate_gpu_emulation) { |
|||
PushCommand(FlushRegionCommand(addr, size), true, false); |
|||
} |
|||
} |
|||
|
|||
void ThreadManager::InvalidateRegion(VAddr addr, u64 size) { |
|||
PushCommand(InvalidateRegionCommand(addr, size), true, true); |
|||
} |
|||
|
|||
void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) { |
|||
if (Settings::values.use_accurate_gpu_emulation) { |
|||
PushCommand(FlushAndInvalidateRegionCommand(addr, size), true, false); |
|||
} else { |
|||
InvalidateRegion(addr, size); |
|||
} |
|||
} |
|||
|
|||
void ThreadManager::PushCommand(CommandData&& command_data, bool wait_for_idle, bool allow_on_cpu) { |
|||
{ |
|||
std::lock_guard<std::mutex> lock{state.signal_mutex}; |
|||
|
|||
if ((allow_on_cpu && state.IsIdle()) || IsGpuThread()) { |
|||
// Execute the command synchronously on the current thread
|
|||
ExecuteCommand(&command_data, renderer, dma_pusher); |
|||
return; |
|||
} |
|||
|
|||
// Push the command to the GPU thread
|
|||
state.push_queue->emplace(command_data); |
|||
} |
|||
|
|||
// Signal the GPU thread that commands are pending
|
|||
state.signal_condition.notify_one(); |
|||
|
|||
if (wait_for_idle) { |
|||
// Wait for the GPU to be idle (all commands to be executed)
|
|||
std::unique_lock<std::mutex> lock{state.idle_mutex}; |
|||
state.idle_condition.wait(lock, [this] { return state.IsIdle(); }); |
|||
} |
|||
} |
|||
|
|||
} // namespace VideoCommon::GPUThread
|
|||
@ -0,0 +1,135 @@ |
|||
// Copyright 2019 yuzu Emulator Project |
|||
// Licensed under GPLv2 or any later version |
|||
// Refer to the license.txt file included. |
|||
|
|||
#pragma once |
|||
|
|||
#include <array> |
|||
#include <atomic> |
|||
#include <condition_variable> |
|||
#include <memory> |
|||
#include <mutex> |
|||
#include <optional> |
|||
#include <thread> |
|||
#include <variant> |
|||
|
|||
namespace Tegra { |
|||
struct FramebufferConfig; |
|||
class DmaPusher; |
|||
} // namespace Tegra |
|||
|
|||
namespace VideoCore { |
|||
class RendererBase; |
|||
} // namespace VideoCore |
|||
|
|||
namespace VideoCommon::GPUThread { |
|||
|
|||
/// Command to signal to the GPU thread that a command list is ready for processing |
|||
struct SubmitListCommand final { |
|||
explicit SubmitListCommand(Tegra::CommandList&& entries) : entries{std::move(entries)} {} |
|||
|
|||
Tegra::CommandList entries; |
|||
}; |
|||
|
|||
/// Command to signal to the GPU thread that a swap buffers is pending |
|||
struct SwapBuffersCommand final { |
|||
explicit SwapBuffersCommand(std::optional<const Tegra::FramebufferConfig> framebuffer) |
|||
: framebuffer{std::move(framebuffer)} {} |
|||
|
|||
std::optional<const Tegra::FramebufferConfig> framebuffer; |
|||
}; |
|||
|
|||
/// Command to signal to the GPU thread to flush a region |
|||
struct FlushRegionCommand final { |
|||
explicit constexpr FlushRegionCommand(VAddr addr, u64 size) : addr{addr}, size{size} {} |
|||
|
|||
const VAddr addr; |
|||
const u64 size; |
|||
}; |
|||
|
|||
/// Command to signal to the GPU thread to invalidate a region |
|||
struct InvalidateRegionCommand final { |
|||
explicit constexpr InvalidateRegionCommand(VAddr addr, u64 size) : addr{addr}, size{size} {} |
|||
|
|||
const VAddr addr; |
|||
const u64 size; |
|||
}; |
|||
|
|||
/// Command to signal to the GPU thread to flush and invalidate a region |
|||
struct FlushAndInvalidateRegionCommand final { |
|||
explicit constexpr FlushAndInvalidateRegionCommand(VAddr addr, u64 size) |
|||
: addr{addr}, size{size} {} |
|||
|
|||
const VAddr addr; |
|||
const u64 size; |
|||
}; |
|||
|
|||
using CommandData = std::variant<SubmitListCommand, SwapBuffersCommand, FlushRegionCommand, |
|||
InvalidateRegionCommand, FlushAndInvalidateRegionCommand>; |
|||
|
|||
/// Struct used to synchronize the GPU thread |
|||
struct SynchState final { |
|||
std::atomic<bool> is_running{true}; |
|||
std::condition_variable signal_condition; |
|||
std::mutex signal_mutex; |
|||
std::condition_variable idle_condition; |
|||
std::mutex idle_mutex; |
|||
|
|||
// We use two queues for sending commands to the GPU thread, one for writing (push_queue) to and |
|||
// one for reading from (pop_queue). These are swapped whenever the current pop_queue becomes |
|||
// empty. This allows for efficient thread-safe access, as it does not require any copies. |
|||
|
|||
using CommandQueue = std::queue<CommandData>; |
|||
std::array<CommandQueue, 2> command_queues; |
|||
CommandQueue* push_queue{&command_queues[0]}; |
|||
CommandQueue* pop_queue{&command_queues[1]}; |
|||
|
|||
/// Returns true if the GPU thread should be idle, meaning there are no commands to process |
|||
bool IsIdle() const { |
|||
return command_queues[0].empty() && command_queues[1].empty(); |
|||
} |
|||
}; |
|||
|
|||
/// Class used to manage the GPU thread |
|||
class ThreadManager final { |
|||
public: |
|||
explicit ThreadManager(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher); |
|||
~ThreadManager(); |
|||
|
|||
/// Push GPU command entries to be processed |
|||
void SubmitList(Tegra::CommandList&& entries); |
|||
|
|||
/// Swap buffers (render frame) |
|||
void SwapBuffers( |
|||
std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer); |
|||
|
|||
/// Notify rasterizer that any caches of the specified region should be flushed to Switch memory |
|||
void FlushRegion(VAddr addr, u64 size); |
|||
|
|||
/// Notify rasterizer that any caches of the specified region should be invalidated |
|||
void InvalidateRegion(VAddr addr, u64 size); |
|||
|
|||
/// Notify rasterizer that any caches of the specified region should be flushed and invalidated |
|||
void FlushAndInvalidateRegion(VAddr addr, u64 size); |
|||
|
|||
/// Waits the caller until the GPU thread is idle, used for synchronization |
|||
void WaitForIdle(); |
|||
|
|||
private: |
|||
/// Pushes a command to be executed by the GPU thread |
|||
void PushCommand(CommandData&& command_data, bool wait_for_idle, bool allow_on_cpu); |
|||
|
|||
/// Returns true if this is called by the GPU thread |
|||
bool IsGpuThread() const { |
|||
return std::this_thread::get_id() == thread_id; |
|||
} |
|||
|
|||
private: |
|||
SynchState state; |
|||
std::thread thread; |
|||
std::thread::id thread_id; |
|||
VideoCore::RendererBase& renderer; |
|||
Tegra::DmaPusher& dma_pusher; |
|||
}; |
|||
|
|||
} // namespace VideoCommon::GPUThread |
|||
Write
Preview
Loading…
Cancel
Save
Reference in new issue