Browse Source
gpu: Rewrite GPU command list processing with DmaPusher class.
gpu: Rewrite GPU command list processing with DmaPusher class.
- More accurate impl., fixes Undertale (among other games).nce_cpp
18 changed files with 353 additions and 108 deletions
-
13src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
-
4src/video_core/CMakeLists.txt
-
53src/video_core/command_processor.h
-
110src/video_core/dma_pusher.cpp
-
95src/video_core/dma_pusher.h
-
8src/video_core/engines/fermi_2d.cpp
-
2src/video_core/engines/fermi_2d.h
-
10src/video_core/engines/kepler_memory.cpp
-
3src/video_core/engines/kepler_memory.h
-
53src/video_core/engines/maxwell_3d.cpp
-
2src/video_core/engines/maxwell_3d.h
-
8src/video_core/engines/maxwell_compute.cpp
-
3src/video_core/engines/maxwell_compute.h
-
8src/video_core/engines/maxwell_dma.cpp
-
2src/video_core/engines/maxwell_dma.h
-
58src/video_core/gpu.cpp
-
27src/video_core/gpu.h
-
2src/video_core/macro_interpreter.cpp
@ -1,53 +0,0 @@ |
|||||
// Copyright 2018 yuzu Emulator Project |
|
||||
// Licensed under GPLv2 or any later version |
|
||||
// Refer to the license.txt file included. |
|
||||
|
|
||||
#pragma once |
|
||||
|
|
||||
#include <type_traits> |
|
||||
#include "common/bit_field.h" |
|
||||
#include "common/common_types.h" |
|
||||
#include "video_core/memory_manager.h" |
|
||||
|
|
||||
namespace Tegra { |
|
||||
|
|
||||
enum class SubmissionMode : u32 { |
|
||||
IncreasingOld = 0, |
|
||||
Increasing = 1, |
|
||||
NonIncreasingOld = 2, |
|
||||
NonIncreasing = 3, |
|
||||
Inline = 4, |
|
||||
IncreaseOnce = 5 |
|
||||
}; |
|
||||
|
|
||||
struct CommandListHeader { |
|
||||
u32 entry0; // gpu_va_lo |
|
||||
union { |
|
||||
u32 entry1; // gpu_va_hi | (unk_0x02 << 0x08) | (size << 0x0A) | (unk_0x01 << 0x1F) |
|
||||
BitField<0, 8, u32> gpu_va_hi; |
|
||||
BitField<8, 2, u32> unk1; |
|
||||
BitField<10, 21, u32> sz; |
|
||||
BitField<31, 1, u32> unk2; |
|
||||
}; |
|
||||
|
|
||||
GPUVAddr Address() const { |
|
||||
return (static_cast<GPUVAddr>(gpu_va_hi) << 32) | entry0; |
|
||||
} |
|
||||
}; |
|
||||
static_assert(sizeof(CommandListHeader) == 8, "CommandListHeader is incorrect size"); |
|
||||
|
|
||||
union CommandHeader { |
|
||||
u32 hex; |
|
||||
|
|
||||
BitField<0, 13, u32> method; |
|
||||
BitField<13, 3, u32> subchannel; |
|
||||
|
|
||||
BitField<16, 13, u32> arg_count; |
|
||||
BitField<16, 13, u32> inline_data; |
|
||||
|
|
||||
BitField<29, 3, SubmissionMode> mode; |
|
||||
}; |
|
||||
static_assert(std::is_standard_layout_v<CommandHeader>, "CommandHeader is not standard layout"); |
|
||||
static_assert(sizeof(CommandHeader) == sizeof(u32), "CommandHeader has incorrect size!"); |
|
||||
|
|
||||
} // namespace Tegra |
|
||||
@ -0,0 +1,110 @@ |
|||||
|
// Copyright 2018 yuzu Emulator Project
|
||||
|
// Licensed under GPLv2 or any later version
|
||||
|
// Refer to the license.txt file included.
|
||||
|
|
||||
|
#include "core/core.h"
|
||||
|
#include "core/memory.h"
|
||||
|
#include "video_core/dma_pusher.h"
|
||||
|
#include "video_core/engines/maxwell_3d.h"
|
||||
|
#include "video_core/gpu.h"
|
||||
|
|
||||
|
namespace Tegra { |
||||
|
|
||||
|
DmaPusher::DmaPusher(GPU& gpu) : gpu(gpu) {} |
||||
|
|
||||
|
DmaPusher::~DmaPusher() = default; |
||||
|
|
||||
|
void DmaPusher::DispatchCalls() { |
||||
|
// On entering GPU code, assume all memory may be touched by the ARM core.
|
||||
|
gpu.Maxwell3D().dirty_flags.OnMemoryWrite(); |
||||
|
|
||||
|
while (Core::System::GetInstance().IsPoweredOn()) { |
||||
|
if (!Step()) { |
||||
|
break; |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
bool DmaPusher::Step() { |
||||
|
if (dma_get != dma_put) { |
||||
|
// Push buffer non-empty, read a word
|
||||
|
const CommandHeader command_header{ |
||||
|
Memory::Read32(*gpu.MemoryManager().GpuToCpuAddress(dma_get))}; |
||||
|
|
||||
|
dma_get += sizeof(u32); |
||||
|
|
||||
|
if (!non_main) { |
||||
|
dma_mget = dma_get; |
||||
|
} |
||||
|
|
||||
|
// now, see if we're in the middle of a command
|
||||
|
if (dma_state.length_pending) { |
||||
|
// Second word of long non-inc methods command - method count
|
||||
|
dma_state.length_pending = 0; |
||||
|
dma_state.method_count = command_header.method_count_; |
||||
|
} else if (dma_state.method_count) { |
||||
|
// Data word of methods command
|
||||
|
CallMethod(command_header.argument); |
||||
|
|
||||
|
if (!dma_state.non_incrementing) { |
||||
|
dma_state.method++; |
||||
|
} |
||||
|
|
||||
|
if (dma_increment_once) { |
||||
|
dma_state.non_incrementing = true; |
||||
|
} |
||||
|
|
||||
|
dma_state.method_count--; |
||||
|
} else { |
||||
|
// No command active - this is the first word of a new one
|
||||
|
switch (command_header.mode) { |
||||
|
case SubmissionMode::Increasing: |
||||
|
SetState(command_header); |
||||
|
dma_state.non_incrementing = false; |
||||
|
dma_increment_once = false; |
||||
|
break; |
||||
|
case SubmissionMode::NonIncreasing: |
||||
|
SetState(command_header); |
||||
|
dma_state.non_incrementing = true; |
||||
|
dma_increment_once = false; |
||||
|
break; |
||||
|
case SubmissionMode::Inline: |
||||
|
dma_state.method = command_header.method; |
||||
|
dma_state.subchannel = command_header.subchannel; |
||||
|
CallMethod(command_header.arg_count); |
||||
|
dma_state.non_incrementing = true; |
||||
|
dma_increment_once = false; |
||||
|
break; |
||||
|
case SubmissionMode::IncreaseOnce: |
||||
|
SetState(command_header); |
||||
|
dma_state.non_incrementing = false; |
||||
|
dma_increment_once = true; |
||||
|
break; |
||||
|
} |
||||
|
} |
||||
|
} else if (ib_enable && !dma_pushbuffer.empty()) { |
||||
|
// Current pushbuffer empty, but we have more IB entries to read
|
||||
|
const CommandListHeader& command_list_header{dma_pushbuffer.front()}; |
||||
|
dma_get = command_list_header.addr; |
||||
|
dma_put = dma_get + command_list_header.size * sizeof(u32); |
||||
|
non_main = command_list_header.is_non_main; |
||||
|
dma_pushbuffer.pop(); |
||||
|
} else { |
||||
|
// Otherwise, pushbuffer empty and IB empty or nonexistent - nothing to do
|
||||
|
return {}; |
||||
|
} |
||||
|
|
||||
|
return true; |
||||
|
} |
||||
|
|
||||
|
void DmaPusher::SetState(const CommandHeader& command_header) { |
||||
|
dma_state.method = command_header.method; |
||||
|
dma_state.subchannel = command_header.subchannel; |
||||
|
dma_state.method_count = command_header.method_count; |
||||
|
} |
||||
|
|
||||
|
void DmaPusher::CallMethod(u32 argument) const { |
||||
|
gpu.CallMethod({dma_state.method, argument, dma_state.subchannel, dma_state.method_count}); |
||||
|
} |
||||
|
|
||||
|
} // namespace Tegra
|
||||
@ -0,0 +1,95 @@ |
|||||
|
// Copyright 2018 yuzu Emulator Project |
||||
|
// Licensed under GPLv2 or any later version |
||||
|
// Refer to the license.txt file included. |
||||
|
|
||||
|
#pragma once |
||||
|
|
||||
|
#include <queue> |
||||
|
|
||||
|
#include "common/bit_field.h" |
||||
|
#include "common/common_types.h" |
||||
|
#include "video_core/memory_manager.h" |
||||
|
|
||||
|
namespace Tegra { |
||||
|
|
||||
|
enum class SubmissionMode : u32 { |
||||
|
IncreasingOld = 0, |
||||
|
Increasing = 1, |
||||
|
NonIncreasingOld = 2, |
||||
|
NonIncreasing = 3, |
||||
|
Inline = 4, |
||||
|
IncreaseOnce = 5 |
||||
|
}; |
||||
|
|
||||
|
struct CommandListHeader { |
||||
|
union { |
||||
|
u64 raw; |
||||
|
BitField<0, 40, GPUVAddr> addr; |
||||
|
BitField<41, 1, u64> is_non_main; |
||||
|
BitField<42, 21, u64> size; |
||||
|
}; |
||||
|
}; |
||||
|
static_assert(sizeof(CommandListHeader) == sizeof(u64), "CommandListHeader is incorrect size"); |
||||
|
|
||||
|
union CommandHeader { |
||||
|
u32 argument; |
||||
|
BitField<0, 13, u32> method; |
||||
|
BitField<0, 24, u32> method_count_; |
||||
|
BitField<13, 3, u32> subchannel; |
||||
|
BitField<16, 13, u32> arg_count; |
||||
|
BitField<16, 13, u32> method_count; |
||||
|
BitField<29, 3, SubmissionMode> mode; |
||||
|
}; |
||||
|
static_assert(std::is_standard_layout_v<CommandHeader>, "CommandHeader is not standard layout"); |
||||
|
static_assert(sizeof(CommandHeader) == sizeof(u32), "CommandHeader has incorrect size!"); |
||||
|
|
||||
|
class GPU; |
||||
|
|
||||
|
/** |
||||
|
* The DmaPusher class implements DMA submission to FIFOs, providing an area of memory that the |
||||
|
* emulated app fills with commands and tells PFIFO to process. The pushbuffers are then assembled |
||||
|
* into a "command stream" consisting of 32-bit words that make up "commands". |
||||
|
* See https://envytools.readthedocs.io/en/latest/hw/fifo/dma-pusher.html#fifo-dma-pusher for |
||||
|
* details on this implementation. |
||||
|
*/ |
||||
|
class DmaPusher { |
||||
|
public: |
||||
|
explicit DmaPusher(GPU& gpu); |
||||
|
~DmaPusher(); |
||||
|
|
||||
|
void Push(const CommandListHeader& command_list_header) { |
||||
|
dma_pushbuffer.push(command_list_header); |
||||
|
} |
||||
|
|
||||
|
void DispatchCalls(); |
||||
|
|
||||
|
private: |
||||
|
bool Step(); |
||||
|
|
||||
|
void SetState(const CommandHeader& command_header); |
||||
|
|
||||
|
void CallMethod(u32 argument) const; |
||||
|
|
||||
|
GPU& gpu; |
||||
|
|
||||
|
std::queue<CommandListHeader> dma_pushbuffer; |
||||
|
|
||||
|
struct DmaState { |
||||
|
u32 method; ///< Current method |
||||
|
u32 subchannel; ///< Current subchannel |
||||
|
u32 method_count; ///< Current method count |
||||
|
u32 length_pending; ///< Large NI command length pending |
||||
|
bool non_incrementing; ///< Current command’s NI flag |
||||
|
}; |
||||
|
|
||||
|
DmaState dma_state{}; |
||||
|
bool dma_increment_once{}; |
||||
|
|
||||
|
GPUVAddr dma_put{}; ///< pushbuffer current end address |
||||
|
GPUVAddr dma_get{}; ///< pushbuffer current read address |
||||
|
GPUVAddr dma_mget{}; ///< main pushbuffer last read address |
||||
|
bool ib_enable{true}; ///< IB mode enabled |
||||
|
bool non_main{}; ///< non-main pushbuffer active |
||||
|
}; |
||||
|
|
||||
|
} // namespace Tegra |
||||
Write
Preview
Loading…
Cancel
Save
Reference in new issue