9 changed files with 358 additions and 15 deletions
-
2src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
-
8src/core/memory.cpp
-
2src/video_core/CMakeLists.txt
-
2src/video_core/engines/kepler_memory.cpp
-
4src/video_core/engines/maxwell_dma.cpp
-
38src/video_core/gpu.cpp
-
14src/video_core/gpu.h
-
154src/video_core/gpu_thread.cpp
-
135src/video_core/gpu_thread.h
@ -0,0 +1,154 @@ |
|||||
|
// Copyright 2019 yuzu Emulator Project
|
||||
|
// Licensed under GPLv2 or any later version
|
||||
|
// Refer to the license.txt file included.
|
||||
|
|
||||
|
#include "common/assert.h"
|
||||
|
#include "common/microprofile.h"
|
||||
|
#include "core/frontend/scope_acquire_window_context.h"
|
||||
|
#include "core/settings.h"
|
||||
|
#include "video_core/dma_pusher.h"
|
||||
|
#include "video_core/gpu.h"
|
||||
|
#include "video_core/gpu_thread.h"
|
||||
|
#include "video_core/renderer_base.h"
|
||||
|
|
||||
|
namespace VideoCommon::GPUThread { |
||||
|
|
||||
|
/// Executes a single GPU thread command
|
||||
|
static void ExecuteCommand(CommandData* command, VideoCore::RendererBase& renderer, |
||||
|
Tegra::DmaPusher& dma_pusher) { |
||||
|
if (const auto submit_list = std::get_if<SubmitListCommand>(command)) { |
||||
|
dma_pusher.Push(std::move(submit_list->entries)); |
||||
|
dma_pusher.DispatchCalls(); |
||||
|
} else if (const auto data = std::get_if<SwapBuffersCommand>(command)) { |
||||
|
renderer.SwapBuffers(data->framebuffer); |
||||
|
} else if (const auto data = std::get_if<FlushRegionCommand>(command)) { |
||||
|
renderer.Rasterizer().FlushRegion(data->addr, data->size); |
||||
|
} else if (const auto data = std::get_if<InvalidateRegionCommand>(command)) { |
||||
|
renderer.Rasterizer().InvalidateRegion(data->addr, data->size); |
||||
|
} else if (const auto data = std::get_if<FlushAndInvalidateRegionCommand>(command)) { |
||||
|
renderer.Rasterizer().FlushAndInvalidateRegion(data->addr, data->size); |
||||
|
} else { |
||||
|
UNREACHABLE(); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
/// Runs the GPU thread
|
||||
|
static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher, |
||||
|
SynchState& state) { |
||||
|
|
||||
|
MicroProfileOnThreadCreate("GpuThread"); |
||||
|
|
||||
|
auto WaitForWakeup = [&]() { |
||||
|
std::unique_lock<std::mutex> lock{state.signal_mutex}; |
||||
|
state.signal_condition.wait(lock, [&] { return !state.IsIdle() || !state.is_running; }); |
||||
|
}; |
||||
|
|
||||
|
// Wait for first GPU command before acquiring the window context
|
||||
|
WaitForWakeup(); |
||||
|
|
||||
|
// If emulation was stopped during disk shader loading, abort before trying to acquire context
|
||||
|
if (!state.is_running) { |
||||
|
return; |
||||
|
} |
||||
|
|
||||
|
Core::Frontend::ScopeAcquireWindowContext acquire_context{renderer.GetRenderWindow()}; |
||||
|
|
||||
|
while (state.is_running) { |
||||
|
if (!state.is_running) { |
||||
|
return; |
||||
|
} |
||||
|
|
||||
|
{ |
||||
|
// Thread has been woken up, so make the previous write queue the next read queue
|
||||
|
std::lock_guard<std::mutex> lock{state.signal_mutex}; |
||||
|
std::swap(state.push_queue, state.pop_queue); |
||||
|
} |
||||
|
|
||||
|
// Execute all of the GPU commands
|
||||
|
while (!state.pop_queue->empty()) { |
||||
|
ExecuteCommand(&state.pop_queue->front(), renderer, dma_pusher); |
||||
|
state.pop_queue->pop(); |
||||
|
} |
||||
|
|
||||
|
// Signal that the GPU thread has finished processing commands
|
||||
|
if (state.IsIdle()) { |
||||
|
state.idle_condition.notify_one(); |
||||
|
} |
||||
|
|
||||
|
// Wait for CPU thread to send more GPU commands
|
||||
|
WaitForWakeup(); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
ThreadManager::ThreadManager(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher) |
||||
|
: renderer{renderer}, dma_pusher{dma_pusher}, thread{RunThread, std::ref(renderer), |
||||
|
std::ref(dma_pusher), std::ref(state)}, |
||||
|
thread_id{thread.get_id()} {} |
||||
|
|
||||
|
ThreadManager::~ThreadManager() { |
||||
|
{ |
||||
|
// Notify GPU thread that a shutdown is pending
|
||||
|
std::lock_guard<std::mutex> lock{state.signal_mutex}; |
||||
|
state.is_running = false; |
||||
|
} |
||||
|
|
||||
|
state.signal_condition.notify_one(); |
||||
|
thread.join(); |
||||
|
} |
||||
|
|
||||
|
void ThreadManager::SubmitList(Tegra::CommandList&& entries) { |
||||
|
if (entries.empty()) { |
||||
|
return; |
||||
|
} |
||||
|
|
||||
|
PushCommand(SubmitListCommand(std::move(entries)), false, false); |
||||
|
} |
||||
|
|
||||
|
void ThreadManager::SwapBuffers( |
||||
|
std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) { |
||||
|
PushCommand(SwapBuffersCommand(std::move(framebuffer)), true, false); |
||||
|
} |
||||
|
|
||||
|
void ThreadManager::FlushRegion(VAddr addr, u64 size) { |
||||
|
if (Settings::values.use_accurate_gpu_emulation) { |
||||
|
PushCommand(FlushRegionCommand(addr, size), true, false); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
void ThreadManager::InvalidateRegion(VAddr addr, u64 size) { |
||||
|
PushCommand(InvalidateRegionCommand(addr, size), true, true); |
||||
|
} |
||||
|
|
||||
|
void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) { |
||||
|
if (Settings::values.use_accurate_gpu_emulation) { |
||||
|
PushCommand(FlushAndInvalidateRegionCommand(addr, size), true, false); |
||||
|
} else { |
||||
|
InvalidateRegion(addr, size); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
void ThreadManager::PushCommand(CommandData&& command_data, bool wait_for_idle, bool allow_on_cpu) { |
||||
|
{ |
||||
|
std::lock_guard<std::mutex> lock{state.signal_mutex}; |
||||
|
|
||||
|
if ((allow_on_cpu && state.IsIdle()) || IsGpuThread()) { |
||||
|
// Execute the command synchronously on the current thread
|
||||
|
ExecuteCommand(&command_data, renderer, dma_pusher); |
||||
|
return; |
||||
|
} |
||||
|
|
||||
|
// Push the command to the GPU thread
|
||||
|
state.push_queue->emplace(command_data); |
||||
|
} |
||||
|
|
||||
|
// Signal the GPU thread that commands are pending
|
||||
|
state.signal_condition.notify_one(); |
||||
|
|
||||
|
if (wait_for_idle) { |
||||
|
// Wait for the GPU to be idle (all commands to be executed)
|
||||
|
std::unique_lock<std::mutex> lock{state.idle_mutex}; |
||||
|
state.idle_condition.wait(lock, [this] { return state.IsIdle(); }); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
} // namespace VideoCommon::GPUThread
|
||||
@ -0,0 +1,135 @@ |
|||||
|
// Copyright 2019 yuzu Emulator Project |
||||
|
// Licensed under GPLv2 or any later version |
||||
|
// Refer to the license.txt file included. |
||||
|
|
||||
|
#pragma once |
||||
|
|
||||
|
#include <array> |
||||
|
#include <atomic> |
||||
|
#include <condition_variable> |
||||
|
#include <memory> |
||||
|
#include <mutex> |
||||
|
#include <optional> |
||||
|
#include <thread> |
||||
|
#include <variant> |
||||
|
|
||||
|
namespace Tegra { |
||||
|
struct FramebufferConfig; |
||||
|
class DmaPusher; |
||||
|
} // namespace Tegra |
||||
|
|
||||
|
namespace VideoCore { |
||||
|
class RendererBase; |
||||
|
} // namespace VideoCore |
||||
|
|
||||
|
namespace VideoCommon::GPUThread { |
||||
|
|
||||
|
/// Command to signal to the GPU thread that a command list is ready for processing |
||||
|
struct SubmitListCommand final { |
||||
|
explicit SubmitListCommand(Tegra::CommandList&& entries) : entries{std::move(entries)} {} |
||||
|
|
||||
|
Tegra::CommandList entries; |
||||
|
}; |
||||
|
|
||||
|
/// Command to signal to the GPU thread that a swap buffers is pending |
||||
|
struct SwapBuffersCommand final { |
||||
|
explicit SwapBuffersCommand(std::optional<const Tegra::FramebufferConfig> framebuffer) |
||||
|
: framebuffer{std::move(framebuffer)} {} |
||||
|
|
||||
|
std::optional<const Tegra::FramebufferConfig> framebuffer; |
||||
|
}; |
||||
|
|
||||
|
/// Command to signal to the GPU thread to flush a region |
||||
|
struct FlushRegionCommand final { |
||||
|
explicit constexpr FlushRegionCommand(VAddr addr, u64 size) : addr{addr}, size{size} {} |
||||
|
|
||||
|
const VAddr addr; |
||||
|
const u64 size; |
||||
|
}; |
||||
|
|
||||
|
/// Command to signal to the GPU thread to invalidate a region |
||||
|
struct InvalidateRegionCommand final { |
||||
|
explicit constexpr InvalidateRegionCommand(VAddr addr, u64 size) : addr{addr}, size{size} {} |
||||
|
|
||||
|
const VAddr addr; |
||||
|
const u64 size; |
||||
|
}; |
||||
|
|
||||
|
/// Command to signal to the GPU thread to flush and invalidate a region |
||||
|
struct FlushAndInvalidateRegionCommand final { |
||||
|
explicit constexpr FlushAndInvalidateRegionCommand(VAddr addr, u64 size) |
||||
|
: addr{addr}, size{size} {} |
||||
|
|
||||
|
const VAddr addr; |
||||
|
const u64 size; |
||||
|
}; |
||||
|
|
||||
|
using CommandData = std::variant<SubmitListCommand, SwapBuffersCommand, FlushRegionCommand, |
||||
|
InvalidateRegionCommand, FlushAndInvalidateRegionCommand>; |
||||
|
|
||||
|
/// Struct used to synchronize the GPU thread |
||||
|
struct SynchState final { |
||||
|
std::atomic<bool> is_running{true}; |
||||
|
std::condition_variable signal_condition; |
||||
|
std::mutex signal_mutex; |
||||
|
std::condition_variable idle_condition; |
||||
|
std::mutex idle_mutex; |
||||
|
|
||||
|
// We use two queues for sending commands to the GPU thread, one for writing (push_queue) to and |
||||
|
// one for reading from (pop_queue). These are swapped whenever the current pop_queue becomes |
||||
|
// empty. This allows for efficient thread-safe access, as it does not require any copies. |
||||
|
|
||||
|
using CommandQueue = std::queue<CommandData>; |
||||
|
std::array<CommandQueue, 2> command_queues; |
||||
|
CommandQueue* push_queue{&command_queues[0]}; |
||||
|
CommandQueue* pop_queue{&command_queues[1]}; |
||||
|
|
||||
|
/// Returns true if the GPU thread should be idle, meaning there are no commands to process |
||||
|
bool IsIdle() const { |
||||
|
return command_queues[0].empty() && command_queues[1].empty(); |
||||
|
} |
||||
|
}; |
||||
|
|
||||
|
/// Class used to manage the GPU thread |
||||
|
class ThreadManager final { |
||||
|
public: |
||||
|
explicit ThreadManager(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher); |
||||
|
~ThreadManager(); |
||||
|
|
||||
|
/// Push GPU command entries to be processed |
||||
|
void SubmitList(Tegra::CommandList&& entries); |
||||
|
|
||||
|
/// Swap buffers (render frame) |
||||
|
void SwapBuffers( |
||||
|
std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer); |
||||
|
|
||||
|
/// Notify rasterizer that any caches of the specified region should be flushed to Switch memory |
||||
|
void FlushRegion(VAddr addr, u64 size); |
||||
|
|
||||
|
/// Notify rasterizer that any caches of the specified region should be invalidated |
||||
|
void InvalidateRegion(VAddr addr, u64 size); |
||||
|
|
||||
|
/// Notify rasterizer that any caches of the specified region should be flushed and invalidated |
||||
|
void FlushAndInvalidateRegion(VAddr addr, u64 size); |
||||
|
|
||||
|
/// Waits the caller until the GPU thread is idle, used for synchronization |
||||
|
void WaitForIdle(); |
||||
|
|
||||
|
private: |
||||
|
/// Pushes a command to be executed by the GPU thread |
||||
|
void PushCommand(CommandData&& command_data, bool wait_for_idle, bool allow_on_cpu); |
||||
|
|
||||
|
/// Returns true if this is called by the GPU thread |
||||
|
bool IsGpuThread() const { |
||||
|
return std::this_thread::get_id() == thread_id; |
||||
|
} |
||||
|
|
||||
|
private: |
||||
|
SynchState state; |
||||
|
std::thread thread; |
||||
|
std::thread::id thread_id; |
||||
|
VideoCore::RendererBase& renderer; |
||||
|
Tegra::DmaPusher& dma_pusher; |
||||
|
}; |
||||
|
|
||||
|
} // namespace VideoCommon::GPUThread |
||||
Write
Preview
Loading…
Cancel
Save
Reference in new issue