Browse Source

Preserve GPU-modified regions for vertex buffers

video_core
MaranBr 8 hours ago
parent
commit
f41244bd57
  1. 57
      src/video_core/buffer_cache/buffer_cache.h
  2. 2
      src/video_core/buffer_cache/buffer_cache_base.h

57
src/video_core/buffer_cache/buffer_cache.h

@ -827,7 +827,7 @@ void BufferCache<P>::BindHostVertexBuffers() {
const Binding& binding = VertexBufferSlot(index);
Buffer& buffer = slot_buffers[binding.buffer_id];
TouchBuffer(buffer, binding.buffer_id);
SynchronizeBuffer(buffer, binding.device_addr, binding.size);
SynchronizeBuffer(buffer, binding.device_addr, binding.size, true);
if (!flags[Dirty::VertexBuffer0 + index]) {
flush_bindings();
continue;
@ -857,7 +857,7 @@ void BufferCache<P>::BindHostVertexBuffers() {
const Binding& binding = channel_state->vertex_buffers[index];
Buffer& buffer = slot_buffers[binding.buffer_id];
TouchBuffer(buffer, binding.buffer_id);
SynchronizeBuffer(buffer, binding.device_addr, binding.size);
SynchronizeBuffer(buffer, binding.device_addr, binding.size, true);
if (!flags[Dirty::VertexBuffer0 + index]) {
continue;
}
@ -1617,35 +1617,42 @@ void BufferCache<P>::TouchBuffer(Buffer& buffer, BufferId buffer_id) noexcept {
}
template <class P>
bool BufferCache<P>::SynchronizeBuffer(Buffer& buffer, DAddr device_addr, u32 size) {
bool BufferCache<P>::SynchronizeBuffer(Buffer& buffer, DAddr device_addr, u32 size, bool preserve_gpu_writes) {
upload_copies.clear();
u64 total_size_bytes = 0;
u64 staging_offset = 0;
u64 largest_copy = 0;
const DAddr buffer_start = buffer.cpu_addr_cached;
memory_tracker.ForEachUploadRange(device_addr, size, [&](u64 device_addr_out, u64 range_size) {
upload_copies.push_back(BufferCopy{
.src_offset = total_size_bytes,
.dst_offset = device_addr_out - buffer_start,
.size = range_size,
DAddr buffer_start = buffer.CpuAddr();
auto push = [&](u64 start, u64 end) {
if (start >= end) {
return;
}
u64 range_size = end - start;
upload_copies.push_back({
.src_offset = staging_offset,
.dst_offset = start - buffer_start,
.size = range_size
});
total_size_bytes += range_size;
largest_copy = (std::max)(largest_copy, range_size);
staging_offset += range_size;
largest_copy = std::max(largest_copy, range_size);
};
memory_tracker.ForEachUploadRange(device_addr, size, [&](u64 addr, u64 range_size) {
if (preserve_gpu_writes) {
u64 start = addr;
u64 end = addr + range_size;
gpu_modified_ranges.ForEachInRange(start, range_size, [&](u64 gstart, u64 gsize) {
u64 gend = gstart + gsize;
push(start, gstart);
start = std::max(start, gend);
});
push(start, end);
} else {
push(addr, addr + range_size);
}
});
if (total_size_bytes == 0) {
if (upload_copies.empty()) {
return true;
}
u64 min_offset = (std::numeric_limits<u64>::max)();
u64 max_offset = 0;
for (const auto& copy : upload_copies) {
min_offset = (std::min)(min_offset, copy.dst_offset);
max_offset = (std::max)(max_offset, copy.dst_offset + copy.size);
}
const DAddr sync_addr = buffer.CpuAddr() + min_offset;
const u64 sync_size = max_offset - min_offset;
DownloadBufferMemory(buffer, sync_addr, sync_size);
const std::span<BufferCopy> copies_span(upload_copies.data(), upload_copies.size());
UploadMemory(buffer, total_size_bytes, largest_copy, copies_span);
any_buffer_uploaded = true;
UploadMemory(buffer, staging_offset, largest_copy, std::span(upload_copies));
return false;
}

2
src/video_core/buffer_cache/buffer_cache_base.h

@ -431,7 +431,7 @@ private:
void TouchBuffer(Buffer& buffer, BufferId buffer_id) noexcept;
bool SynchronizeBuffer(Buffer& buffer, DAddr device_addr, u32 size);
bool SynchronizeBuffer(Buffer& buffer, DAddr device_addr, u32 size, bool preserve_gpu_writes = false);
void UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy,
std::span<BufferCopy> copies);

Loading…
Cancel
Save