Browse Source

[video_core] Reduce synchronization overhead and improve performance in DMA operations (#3179)

This reworks the logic to improve performance in many games that heavily rely on DMA. It can help all platforms, but on desktop the performance boost can be noticeable, especially on dedicated GPUs. The option Sync Memory Operations must be enabled.

Reviewed-on: https://git.eden-emu.dev/eden-emu/eden/pulls/3179
Co-authored-by: MaranBr <maranbr@outlook.com>
Co-committed-by: MaranBr <maranbr@outlook.com>
pull/3235/head
MaranBr 6 days ago
committed by crueter
parent
commit
8d31484f64
No known key found for this signature in database GPG Key ID: 425ACD2D4830EBC6
  1. 78
      src/video_core/dma_pusher.cpp

78
src/video_core/dma_pusher.cpp

@ -43,80 +43,56 @@ void DmaPusher::DispatchCalls() {
bool DmaPusher::Step() { bool DmaPusher::Step() {
if (!ib_enable || dma_pushbuffer.empty()) { if (!ib_enable || dma_pushbuffer.empty()) {
// pushbuffer empty and IB empty or nonexistent - nothing to do
return false; return false;
} }
CommandList& command_list{dma_pushbuffer.front()};
CommandList& command_list = dma_pushbuffer.front();
ASSERT_OR_EXECUTE(
command_list.command_lists.size() || command_list.prefetch_command_list.size(), {
// Somehow the command_list is empty, in order to avoid a crash
// We ignore it and assume its size is 0.
const size_t prefetch_size = command_list.prefetch_command_list.size();
const size_t command_list_size = command_list.command_lists.size();
if (prefetch_size == 0 && command_list_size == 0) {
dma_pushbuffer.pop(); dma_pushbuffer.pop();
dma_pushbuffer_subindex = 0; dma_pushbuffer_subindex = 0;
return true; return true;
});
}
if (command_list.prefetch_command_list.size()) {
// Prefetched command list from nvdrv, used for things like synchronization
ProcessCommands(VideoCommon::FixSmallVectorADL(command_list.prefetch_command_list));
if (prefetch_size > 0) {
ProcessCommands(command_list.prefetch_command_list);
dma_pushbuffer.pop(); dma_pushbuffer.pop();
} else {
const CommandListHeader command_list_header{
command_list.command_lists[dma_pushbuffer_subindex++]};
return true;
}
if (signal_sync) {
auto& current_command = command_list.command_lists[dma_pushbuffer_subindex];
const CommandListHeader& header = current_command;
dma_state.dma_get = header.addr;
if (signal_sync && !synced) {
std::unique_lock lk(sync_mutex); std::unique_lock lk(sync_mutex);
sync_cv.wait(lk, [this]() { return synced; }); sync_cv.wait(lk, [this]() { return synced; });
signal_sync = false; signal_sync = false;
synced = false; synced = false;
} }
dma_state.dma_get = command_list_header.addr;
if (command_list_header.size == 0) {
return true;
}
// Push buffer non-empty, read a word
if (dma_state.method >= MacroRegistersStart) {
if (subchannels[dma_state.subchannel]) {
subchannels[dma_state.subchannel]->current_dirty = memory_manager.IsMemoryDirty(
dma_state.dma_get, command_list_header.size * sizeof(u32));
}
if (header.size > 0 && dma_state.method >= MacroRegistersStart && subchannels[dma_state.subchannel]) {
subchannels[dma_state.subchannel]->current_dirty = memory_manager.IsMemoryDirty(dma_state.dma_get, header.size * sizeof(u32));
} }
const auto safe_process = [&] {
Tegra::Memory::GpuGuestMemory<Tegra::CommandHeader,
Tegra::Memory::GuestMemoryFlags::SafeRead>
headers(memory_manager, dma_state.dma_get, command_list_header.size,
&command_headers);
ProcessCommands(headers);
};
const auto unsafe_process = [&] {
Tegra::Memory::GpuGuestMemory<Tegra::CommandHeader,
Tegra::Memory::GuestMemoryFlags::UnsafeRead>
headers(memory_manager, dma_state.dma_get, command_list_header.size,
&command_headers);
if (header.size > 0) {
if (Settings::IsDMALevelDefault() ? (Settings::IsGPULevelMedium() || Settings::IsGPULevelHigh()) : Settings::IsDMALevelSafe()) {
Tegra::Memory::GpuGuestMemory<Tegra::CommandHeader, Tegra::Memory::GuestMemoryFlags::SafeRead>headers(memory_manager, dma_state.dma_get, header.size, &command_headers);
ProcessCommands(headers); ProcessCommands(headers);
};
const bool use_safe = Settings::IsDMALevelDefault() ? (Settings::IsGPULevelMedium() || Settings::IsGPULevelHigh()) : Settings::IsDMALevelSafe();
if (use_safe) {
safe_process();
} else { } else {
unsafe_process();
Tegra::Memory::GpuGuestMemory<Tegra::CommandHeader, Tegra::Memory::GuestMemoryFlags::UnsafeRead>headers(memory_manager, dma_state.dma_get, header.size, &command_headers);
ProcessCommands(headers);
}
} }
if (dma_pushbuffer_subindex >= command_list.command_lists.size()) {
// We've gone through the current list, remove it from the queue
if (++dma_pushbuffer_subindex >= command_list_size) {
dma_pushbuffer.pop(); dma_pushbuffer.pop();
dma_pushbuffer_subindex = 0; dma_pushbuffer_subindex = 0;
} else if (command_list.command_lists[dma_pushbuffer_subindex].sync && Settings::values.sync_memory_operations.GetValue()) {
signal_sync = true;
} else {
signal_sync = command_list.command_lists[dma_pushbuffer_subindex].sync && Settings::values.sync_memory_operations.GetValue();
} }
if (signal_sync) { if (signal_sync) {
@ -126,7 +102,7 @@ bool DmaPusher::Step() {
sync_cv.notify_all(); sync_cv.notify_all();
}); });
} }
}
return true; return true;
} }

Loading…
Cancel
Save