|
|
|
@ -4,28 +4,12 @@ |
|
|
|
|
|
|
|
#pragma once |
|
|
|
|
|
|
|
#include <array> |
|
|
|
#include <atomic> |
|
|
|
#include <condition_variable> |
|
|
|
#include <list> |
|
|
|
#include <memory> |
|
|
|
#include <mutex> |
|
|
|
|
|
|
|
#include "common/bit_field.h" |
|
|
|
#include "common/common_types.h" |
|
|
|
#include "core/hle/service/nvdrv/nvdata.h" |
|
|
|
#include "core/hle/service/nvflinger/buffer_queue.h" |
|
|
|
#include "video_core/cdma_pusher.h" |
|
|
|
#include "video_core/dma_pusher.h" |
|
|
|
#include "video_core/framebuffer_config.h" |
|
|
|
#include "video_core/gpu_thread.h" |
|
|
|
|
|
|
|
using CacheAddr = std::uintptr_t; |
|
|
|
[[nodiscard]] inline CacheAddr ToCacheAddr(const void* host_ptr) { |
|
|
|
return reinterpret_cast<CacheAddr>(host_ptr); |
|
|
|
} |
|
|
|
|
|
|
|
[[nodiscard]] inline u8* FromCacheAddr(CacheAddr cache_addr) { |
|
|
|
return reinterpret_cast<u8*>(cache_addr); |
|
|
|
} |
|
|
|
|
|
|
|
namespace Core { |
|
|
|
namespace Frontend { |
|
|
|
@ -40,6 +24,9 @@ class ShaderNotify; |
|
|
|
} // namespace VideoCore |
|
|
|
|
|
|
|
namespace Tegra { |
|
|
|
class DmaPusher; |
|
|
|
class CDmaPusher; |
|
|
|
struct CommandList; |
|
|
|
|
|
|
|
enum class RenderTargetFormat : u32 { |
|
|
|
NONE = 0x0, |
|
|
|
@ -138,7 +125,18 @@ public: |
|
|
|
} |
|
|
|
}; |
|
|
|
|
|
|
|
explicit GPU(Core::System& system_, bool is_async_, bool use_nvdec_); |
|
|
|
enum class FenceOperation : u32 { |
|
|
|
Acquire = 0, |
|
|
|
Increment = 1, |
|
|
|
}; |
|
|
|
|
|
|
|
union FenceAction { |
|
|
|
u32 raw; |
|
|
|
BitField<0, 1, FenceOperation> op; |
|
|
|
BitField<8, 24, u32> syncpoint_id; |
|
|
|
}; |
|
|
|
|
|
|
|
explicit GPU(Core::System& system, bool is_async, bool use_nvdec); |
|
|
|
~GPU(); |
|
|
|
|
|
|
|
/// Binds a renderer to the GPU. |
|
|
|
@ -162,9 +160,7 @@ public: |
|
|
|
[[nodiscard]] u64 RequestFlush(VAddr addr, std::size_t size); |
|
|
|
|
|
|
|
/// Obtains current flush request fence id. |
|
|
|
[[nodiscard]] u64 CurrentFlushRequestFence() const { |
|
|
|
return current_flush_fence.load(std::memory_order_relaxed); |
|
|
|
} |
|
|
|
[[nodiscard]] u64 CurrentFlushRequestFence() const; |
|
|
|
|
|
|
|
/// Tick pending requests within the GPU. |
|
|
|
void TickWork(); |
|
|
|
@ -200,24 +196,16 @@ public: |
|
|
|
[[nodiscard]] const Tegra::CDmaPusher& CDmaPusher() const; |
|
|
|
|
|
|
|
/// Returns a reference to the underlying renderer. |
|
|
|
[[nodiscard]] VideoCore::RendererBase& Renderer() { |
|
|
|
return *renderer; |
|
|
|
} |
|
|
|
[[nodiscard]] VideoCore::RendererBase& Renderer(); |
|
|
|
|
|
|
|
/// Returns a const reference to the underlying renderer. |
|
|
|
[[nodiscard]] const VideoCore::RendererBase& Renderer() const { |
|
|
|
return *renderer; |
|
|
|
} |
|
|
|
[[nodiscard]] const VideoCore::RendererBase& Renderer() const; |
|
|
|
|
|
|
|
/// Returns a reference to the shader notifier. |
|
|
|
[[nodiscard]] VideoCore::ShaderNotify& ShaderNotify() { |
|
|
|
return *shader_notify; |
|
|
|
} |
|
|
|
[[nodiscard]] VideoCore::ShaderNotify& ShaderNotify(); |
|
|
|
|
|
|
|
/// Returns a const reference to the shader notifier. |
|
|
|
[[nodiscard]] const VideoCore::ShaderNotify& ShaderNotify() const { |
|
|
|
return *shader_notify; |
|
|
|
} |
|
|
|
[[nodiscard]] const VideoCore::ShaderNotify& ShaderNotify() const; |
|
|
|
|
|
|
|
/// Allows the CPU/NvFlinger to wait on the GPU before presenting a frame. |
|
|
|
void WaitFence(u32 syncpoint_id, u32 value); |
|
|
|
@ -232,80 +220,12 @@ public: |
|
|
|
|
|
|
|
[[nodiscard]] u64 GetTicks() const; |
|
|
|
|
|
|
|
[[nodiscard]] std::unique_lock<std::mutex> LockSync() { |
|
|
|
return std::unique_lock{sync_mutex}; |
|
|
|
} |
|
|
|
|
|
|
|
[[nodiscard]] bool IsAsync() const { |
|
|
|
return is_async; |
|
|
|
} |
|
|
|
[[nodiscard]] bool IsAsync() const; |
|
|
|
|
|
|
|
[[nodiscard]] bool UseNvdec() const { |
|
|
|
return use_nvdec; |
|
|
|
} |
|
|
|
[[nodiscard]] bool UseNvdec() const; |
|
|
|
|
|
|
|
void RendererFrameEndNotify(); |
|
|
|
|
|
|
|
enum class FenceOperation : u32 { |
|
|
|
Acquire = 0, |
|
|
|
Increment = 1, |
|
|
|
}; |
|
|
|
|
|
|
|
union FenceAction { |
|
|
|
u32 raw; |
|
|
|
BitField<0, 1, FenceOperation> op; |
|
|
|
BitField<8, 24, u32> syncpoint_id; |
|
|
|
|
|
|
|
[[nodiscard]] static CommandHeader Build(FenceOperation op, u32 syncpoint_id) { |
|
|
|
FenceAction result{}; |
|
|
|
result.op.Assign(op); |
|
|
|
result.syncpoint_id.Assign(syncpoint_id); |
|
|
|
return {result.raw}; |
|
|
|
} |
|
|
|
}; |
|
|
|
|
|
|
|
struct Regs { |
|
|
|
static constexpr size_t NUM_REGS = 0x40; |
|
|
|
|
|
|
|
union { |
|
|
|
struct { |
|
|
|
INSERT_PADDING_WORDS_NOINIT(0x4); |
|
|
|
struct { |
|
|
|
u32 address_high; |
|
|
|
u32 address_low; |
|
|
|
|
|
|
|
[[nodiscard]] GPUVAddr SemaphoreAddress() const { |
|
|
|
return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | |
|
|
|
address_low); |
|
|
|
} |
|
|
|
} semaphore_address; |
|
|
|
|
|
|
|
u32 semaphore_sequence; |
|
|
|
u32 semaphore_trigger; |
|
|
|
INSERT_PADDING_WORDS_NOINIT(0xC); |
|
|
|
|
|
|
|
// The pusher and the puller share the reference counter, the pusher only has read |
|
|
|
// access |
|
|
|
u32 reference_count; |
|
|
|
INSERT_PADDING_WORDS_NOINIT(0x5); |
|
|
|
|
|
|
|
u32 semaphore_acquire; |
|
|
|
u32 semaphore_release; |
|
|
|
u32 fence_value; |
|
|
|
FenceAction fence_action; |
|
|
|
INSERT_PADDING_WORDS_NOINIT(0xE2); |
|
|
|
|
|
|
|
// Puller state |
|
|
|
u32 acquire_mode; |
|
|
|
u32 acquire_source; |
|
|
|
u32 acquire_active; |
|
|
|
u32 acquire_timeout; |
|
|
|
u32 acquire_value; |
|
|
|
}; |
|
|
|
std::array<u32, NUM_REGS> reg_array; |
|
|
|
}; |
|
|
|
} regs{}; |
|
|
|
|
|
|
|
/// Performs any additional setup necessary in order to begin GPU emulation. |
|
|
|
/// This can be used to launch any necessary threads and register any necessary |
|
|
|
/// core timing events. |
|
|
|
@ -338,104 +258,9 @@ public: |
|
|
|
/// Notify rasterizer that any caches of the specified region should be flushed and invalidated |
|
|
|
void FlushAndInvalidateRegion(VAddr addr, u64 size); |
|
|
|
|
|
|
|
protected: |
|
|
|
void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const; |
|
|
|
|
|
|
|
private: |
|
|
|
void ProcessBindMethod(const MethodCall& method_call); |
|
|
|
void ProcessFenceActionMethod(); |
|
|
|
void ProcessWaitForInterruptMethod(); |
|
|
|
void ProcessSemaphoreTriggerMethod(); |
|
|
|
void ProcessSemaphoreRelease(); |
|
|
|
void ProcessSemaphoreAcquire(); |
|
|
|
|
|
|
|
/// Calls a GPU puller method. |
|
|
|
void CallPullerMethod(const MethodCall& method_call); |
|
|
|
|
|
|
|
/// Calls a GPU engine method. |
|
|
|
void CallEngineMethod(const MethodCall& method_call); |
|
|
|
|
|
|
|
/// Calls a GPU engine multivalue method. |
|
|
|
void CallEngineMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount, |
|
|
|
u32 methods_pending); |
|
|
|
|
|
|
|
/// Determines where the method should be executed. |
|
|
|
[[nodiscard]] bool ExecuteMethodOnEngine(u32 method); |
|
|
|
|
|
|
|
protected: |
|
|
|
Core::System& system; |
|
|
|
std::unique_ptr<Tegra::MemoryManager> memory_manager; |
|
|
|
std::unique_ptr<Tegra::DmaPusher> dma_pusher; |
|
|
|
std::unique_ptr<Tegra::CDmaPusher> cdma_pusher; |
|
|
|
std::unique_ptr<VideoCore::RendererBase> renderer; |
|
|
|
VideoCore::RasterizerInterface* rasterizer = nullptr; |
|
|
|
const bool use_nvdec; |
|
|
|
|
|
|
|
private: |
|
|
|
/// Mapping of command subchannels to their bound engine ids |
|
|
|
std::array<EngineID, 8> bound_engines = {}; |
|
|
|
/// 3D engine |
|
|
|
std::unique_ptr<Engines::Maxwell3D> maxwell_3d; |
|
|
|
/// 2D engine |
|
|
|
std::unique_ptr<Engines::Fermi2D> fermi_2d; |
|
|
|
/// Compute engine |
|
|
|
std::unique_ptr<Engines::KeplerCompute> kepler_compute; |
|
|
|
/// DMA engine |
|
|
|
std::unique_ptr<Engines::MaxwellDMA> maxwell_dma; |
|
|
|
/// Inline memory engine |
|
|
|
std::unique_ptr<Engines::KeplerMemory> kepler_memory; |
|
|
|
/// Shader build notifier |
|
|
|
std::unique_ptr<VideoCore::ShaderNotify> shader_notify; |
|
|
|
/// When true, we are about to shut down emulation session, so terminate outstanding tasks |
|
|
|
std::atomic_bool shutting_down{}; |
|
|
|
|
|
|
|
std::array<std::atomic<u32>, Service::Nvidia::MaxSyncPoints> syncpoints{}; |
|
|
|
|
|
|
|
std::array<std::list<u32>, Service::Nvidia::MaxSyncPoints> syncpt_interrupts; |
|
|
|
|
|
|
|
std::mutex sync_mutex; |
|
|
|
std::mutex device_mutex; |
|
|
|
|
|
|
|
std::condition_variable sync_cv; |
|
|
|
|
|
|
|
struct FlushRequest { |
|
|
|
explicit FlushRequest(u64 fence_, VAddr addr_, std::size_t size_) |
|
|
|
: fence{fence_}, addr{addr_}, size{size_} {} |
|
|
|
u64 fence; |
|
|
|
VAddr addr; |
|
|
|
std::size_t size; |
|
|
|
}; |
|
|
|
|
|
|
|
std::list<FlushRequest> flush_requests; |
|
|
|
std::atomic<u64> current_flush_fence{}; |
|
|
|
u64 last_flush_fence{}; |
|
|
|
std::mutex flush_request_mutex; |
|
|
|
|
|
|
|
const bool is_async; |
|
|
|
|
|
|
|
VideoCommon::GPUThread::ThreadManager gpu_thread; |
|
|
|
std::unique_ptr<Core::Frontend::GraphicsContext> cpu_context; |
|
|
|
struct Impl; |
|
|
|
std::unique_ptr<Impl> impl; |
|
|
|
}; |
|
|
|
|
|
|
|
#define ASSERT_REG_POSITION(field_name, position) \ |
|
|
|
static_assert(offsetof(GPU::Regs, field_name) == position * 4, \ |
|
|
|
"Field " #field_name " has invalid position") |
|
|
|
|
|
|
|
ASSERT_REG_POSITION(semaphore_address, 0x4); |
|
|
|
ASSERT_REG_POSITION(semaphore_sequence, 0x6); |
|
|
|
ASSERT_REG_POSITION(semaphore_trigger, 0x7); |
|
|
|
ASSERT_REG_POSITION(reference_count, 0x14); |
|
|
|
ASSERT_REG_POSITION(semaphore_acquire, 0x1A); |
|
|
|
ASSERT_REG_POSITION(semaphore_release, 0x1B); |
|
|
|
ASSERT_REG_POSITION(fence_value, 0x1C); |
|
|
|
ASSERT_REG_POSITION(fence_action, 0x1D); |
|
|
|
|
|
|
|
ASSERT_REG_POSITION(acquire_mode, 0x100); |
|
|
|
ASSERT_REG_POSITION(acquire_source, 0x101); |
|
|
|
ASSERT_REG_POSITION(acquire_active, 0x102); |
|
|
|
ASSERT_REG_POSITION(acquire_timeout, 0x103); |
|
|
|
ASSERT_REG_POSITION(acquire_value, 0x104); |
|
|
|
|
|
|
|
#undef ASSERT_REG_POSITION |
|
|
|
|
|
|
|
} // namespace Tegra |