Browse Source
gpu: Rewrite GPU command list processing with DmaPusher class.
gpu: Rewrite GPU command list processing with DmaPusher class.
- More accurate impl., fixes Undertale (among other games).nce_cpp
18 changed files with 353 additions and 108 deletions
-
13src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
-
4src/video_core/CMakeLists.txt
-
53src/video_core/command_processor.h
-
110src/video_core/dma_pusher.cpp
-
95src/video_core/dma_pusher.h
-
8src/video_core/engines/fermi_2d.cpp
-
2src/video_core/engines/fermi_2d.h
-
10src/video_core/engines/kepler_memory.cpp
-
3src/video_core/engines/kepler_memory.h
-
53src/video_core/engines/maxwell_3d.cpp
-
2src/video_core/engines/maxwell_3d.h
-
8src/video_core/engines/maxwell_compute.cpp
-
3src/video_core/engines/maxwell_compute.h
-
8src/video_core/engines/maxwell_dma.cpp
-
2src/video_core/engines/maxwell_dma.h
-
58src/video_core/gpu.cpp
-
27src/video_core/gpu.h
-
2src/video_core/macro_interpreter.cpp
@ -1,53 +0,0 @@ |
|||
// Copyright 2018 yuzu Emulator Project |
|||
// Licensed under GPLv2 or any later version |
|||
// Refer to the license.txt file included. |
|||
|
|||
#pragma once |
|||
|
|||
#include <type_traits> |
|||
#include "common/bit_field.h" |
|||
#include "common/common_types.h" |
|||
#include "video_core/memory_manager.h" |
|||
|
|||
namespace Tegra { |
|||
|
|||
enum class SubmissionMode : u32 { |
|||
IncreasingOld = 0, |
|||
Increasing = 1, |
|||
NonIncreasingOld = 2, |
|||
NonIncreasing = 3, |
|||
Inline = 4, |
|||
IncreaseOnce = 5 |
|||
}; |
|||
|
|||
struct CommandListHeader { |
|||
u32 entry0; // gpu_va_lo |
|||
union { |
|||
u32 entry1; // gpu_va_hi | (unk_0x02 << 0x08) | (size << 0x0A) | (unk_0x01 << 0x1F) |
|||
BitField<0, 8, u32> gpu_va_hi; |
|||
BitField<8, 2, u32> unk1; |
|||
BitField<10, 21, u32> sz; |
|||
BitField<31, 1, u32> unk2; |
|||
}; |
|||
|
|||
GPUVAddr Address() const { |
|||
return (static_cast<GPUVAddr>(gpu_va_hi) << 32) | entry0; |
|||
} |
|||
}; |
|||
static_assert(sizeof(CommandListHeader) == 8, "CommandListHeader is incorrect size"); |
|||
|
|||
union CommandHeader { |
|||
u32 hex; |
|||
|
|||
BitField<0, 13, u32> method; |
|||
BitField<13, 3, u32> subchannel; |
|||
|
|||
BitField<16, 13, u32> arg_count; |
|||
BitField<16, 13, u32> inline_data; |
|||
|
|||
BitField<29, 3, SubmissionMode> mode; |
|||
}; |
|||
static_assert(std::is_standard_layout_v<CommandHeader>, "CommandHeader is not standard layout"); |
|||
static_assert(sizeof(CommandHeader) == sizeof(u32), "CommandHeader has incorrect size!"); |
|||
|
|||
} // namespace Tegra |
|||
@ -0,0 +1,110 @@ |
|||
// Copyright 2018 yuzu Emulator Project
|
|||
// Licensed under GPLv2 or any later version
|
|||
// Refer to the license.txt file included.
|
|||
|
|||
#include "core/core.h"
|
|||
#include "core/memory.h"
|
|||
#include "video_core/dma_pusher.h"
|
|||
#include "video_core/engines/maxwell_3d.h"
|
|||
#include "video_core/gpu.h"
|
|||
|
|||
namespace Tegra { |
|||
|
|||
DmaPusher::DmaPusher(GPU& gpu) : gpu(gpu) {} |
|||
|
|||
DmaPusher::~DmaPusher() = default; |
|||
|
|||
void DmaPusher::DispatchCalls() { |
|||
// On entering GPU code, assume all memory may be touched by the ARM core.
|
|||
gpu.Maxwell3D().dirty_flags.OnMemoryWrite(); |
|||
|
|||
while (Core::System::GetInstance().IsPoweredOn()) { |
|||
if (!Step()) { |
|||
break; |
|||
} |
|||
} |
|||
} |
|||
|
|||
bool DmaPusher::Step() { |
|||
if (dma_get != dma_put) { |
|||
// Push buffer non-empty, read a word
|
|||
const CommandHeader command_header{ |
|||
Memory::Read32(*gpu.MemoryManager().GpuToCpuAddress(dma_get))}; |
|||
|
|||
dma_get += sizeof(u32); |
|||
|
|||
if (!non_main) { |
|||
dma_mget = dma_get; |
|||
} |
|||
|
|||
// now, see if we're in the middle of a command
|
|||
if (dma_state.length_pending) { |
|||
// Second word of long non-inc methods command - method count
|
|||
dma_state.length_pending = 0; |
|||
dma_state.method_count = command_header.method_count_; |
|||
} else if (dma_state.method_count) { |
|||
// Data word of methods command
|
|||
CallMethod(command_header.argument); |
|||
|
|||
if (!dma_state.non_incrementing) { |
|||
dma_state.method++; |
|||
} |
|||
|
|||
if (dma_increment_once) { |
|||
dma_state.non_incrementing = true; |
|||
} |
|||
|
|||
dma_state.method_count--; |
|||
} else { |
|||
// No command active - this is the first word of a new one
|
|||
switch (command_header.mode) { |
|||
case SubmissionMode::Increasing: |
|||
SetState(command_header); |
|||
dma_state.non_incrementing = false; |
|||
dma_increment_once = false; |
|||
break; |
|||
case SubmissionMode::NonIncreasing: |
|||
SetState(command_header); |
|||
dma_state.non_incrementing = true; |
|||
dma_increment_once = false; |
|||
break; |
|||
case SubmissionMode::Inline: |
|||
dma_state.method = command_header.method; |
|||
dma_state.subchannel = command_header.subchannel; |
|||
CallMethod(command_header.arg_count); |
|||
dma_state.non_incrementing = true; |
|||
dma_increment_once = false; |
|||
break; |
|||
case SubmissionMode::IncreaseOnce: |
|||
SetState(command_header); |
|||
dma_state.non_incrementing = false; |
|||
dma_increment_once = true; |
|||
break; |
|||
} |
|||
} |
|||
} else if (ib_enable && !dma_pushbuffer.empty()) { |
|||
// Current pushbuffer empty, but we have more IB entries to read
|
|||
const CommandListHeader& command_list_header{dma_pushbuffer.front()}; |
|||
dma_get = command_list_header.addr; |
|||
dma_put = dma_get + command_list_header.size * sizeof(u32); |
|||
non_main = command_list_header.is_non_main; |
|||
dma_pushbuffer.pop(); |
|||
} else { |
|||
// Otherwise, pushbuffer empty and IB empty or nonexistent - nothing to do
|
|||
return {}; |
|||
} |
|||
|
|||
return true; |
|||
} |
|||
|
|||
void DmaPusher::SetState(const CommandHeader& command_header) { |
|||
dma_state.method = command_header.method; |
|||
dma_state.subchannel = command_header.subchannel; |
|||
dma_state.method_count = command_header.method_count; |
|||
} |
|||
|
|||
void DmaPusher::CallMethod(u32 argument) const { |
|||
gpu.CallMethod({dma_state.method, argument, dma_state.subchannel, dma_state.method_count}); |
|||
} |
|||
|
|||
} // namespace Tegra
|
|||
@ -0,0 +1,95 @@ |
|||
// Copyright 2018 yuzu Emulator Project |
|||
// Licensed under GPLv2 or any later version |
|||
// Refer to the license.txt file included. |
|||
|
|||
#pragma once |
|||
|
|||
#include <queue> |
|||
|
|||
#include "common/bit_field.h" |
|||
#include "common/common_types.h" |
|||
#include "video_core/memory_manager.h" |
|||
|
|||
namespace Tegra { |
|||
|
|||
enum class SubmissionMode : u32 { |
|||
IncreasingOld = 0, |
|||
Increasing = 1, |
|||
NonIncreasingOld = 2, |
|||
NonIncreasing = 3, |
|||
Inline = 4, |
|||
IncreaseOnce = 5 |
|||
}; |
|||
|
|||
struct CommandListHeader { |
|||
union { |
|||
u64 raw; |
|||
BitField<0, 40, GPUVAddr> addr; |
|||
BitField<41, 1, u64> is_non_main; |
|||
BitField<42, 21, u64> size; |
|||
}; |
|||
}; |
|||
static_assert(sizeof(CommandListHeader) == sizeof(u64), "CommandListHeader is incorrect size"); |
|||
|
|||
union CommandHeader { |
|||
u32 argument; |
|||
BitField<0, 13, u32> method; |
|||
BitField<0, 24, u32> method_count_; |
|||
BitField<13, 3, u32> subchannel; |
|||
BitField<16, 13, u32> arg_count; |
|||
BitField<16, 13, u32> method_count; |
|||
BitField<29, 3, SubmissionMode> mode; |
|||
}; |
|||
static_assert(std::is_standard_layout_v<CommandHeader>, "CommandHeader is not standard layout"); |
|||
static_assert(sizeof(CommandHeader) == sizeof(u32), "CommandHeader has incorrect size!"); |
|||
|
|||
class GPU; |
|||
|
|||
/** |
|||
* The DmaPusher class implements DMA submission to FIFOs, providing an area of memory that the |
|||
* emulated app fills with commands and tells PFIFO to process. The pushbuffers are then assembled |
|||
* into a "command stream" consisting of 32-bit words that make up "commands". |
|||
* See https://envytools.readthedocs.io/en/latest/hw/fifo/dma-pusher.html#fifo-dma-pusher for |
|||
* details on this implementation. |
|||
*/ |
|||
class DmaPusher { |
|||
public: |
|||
explicit DmaPusher(GPU& gpu); |
|||
~DmaPusher(); |
|||
|
|||
void Push(const CommandListHeader& command_list_header) { |
|||
dma_pushbuffer.push(command_list_header); |
|||
} |
|||
|
|||
void DispatchCalls(); |
|||
|
|||
private: |
|||
bool Step(); |
|||
|
|||
void SetState(const CommandHeader& command_header); |
|||
|
|||
void CallMethod(u32 argument) const; |
|||
|
|||
GPU& gpu; |
|||
|
|||
std::queue<CommandListHeader> dma_pushbuffer; |
|||
|
|||
struct DmaState { |
|||
u32 method; ///< Current method |
|||
u32 subchannel; ///< Current subchannel |
|||
u32 method_count; ///< Current method count |
|||
u32 length_pending; ///< Large NI command length pending |
|||
bool non_incrementing; ///< Current command’s NI flag |
|||
}; |
|||
|
|||
DmaState dma_state{}; |
|||
bool dma_increment_once{}; |
|||
|
|||
GPUVAddr dma_put{}; ///< pushbuffer current end address |
|||
GPUVAddr dma_get{}; ///< pushbuffer current read address |
|||
GPUVAddr dma_mget{}; ///< main pushbuffer last read address |
|||
bool ib_enable{true}; ///< IB mode enabled |
|||
bool non_main{}; ///< non-main pushbuffer active |
|||
}; |
|||
|
|||
} // namespace Tegra |
|||
Write
Preview
Loading…
Cancel
Save
Reference in new issue