|
|
|
@ -827,7 +827,7 @@ void BufferCache<P>::BindHostVertexBuffers() { |
|
|
|
const Binding& binding = VertexBufferSlot(index); |
|
|
|
Buffer& buffer = slot_buffers[binding.buffer_id]; |
|
|
|
TouchBuffer(buffer, binding.buffer_id); |
|
|
|
SynchronizeBuffer(buffer, binding.device_addr, binding.size); |
|
|
|
SynchronizeBuffer(buffer, binding.device_addr, binding.size, true); |
|
|
|
if (!flags[Dirty::VertexBuffer0 + index]) { |
|
|
|
flush_bindings(); |
|
|
|
continue; |
|
|
|
@ -857,7 +857,7 @@ void BufferCache<P>::BindHostVertexBuffers() { |
|
|
|
const Binding& binding = channel_state->vertex_buffers[index]; |
|
|
|
Buffer& buffer = slot_buffers[binding.buffer_id]; |
|
|
|
TouchBuffer(buffer, binding.buffer_id); |
|
|
|
SynchronizeBuffer(buffer, binding.device_addr, binding.size); |
|
|
|
SynchronizeBuffer(buffer, binding.device_addr, binding.size, true); |
|
|
|
if (!flags[Dirty::VertexBuffer0 + index]) { |
|
|
|
continue; |
|
|
|
} |
|
|
|
@ -1617,35 +1617,42 @@ void BufferCache<P>::TouchBuffer(Buffer& buffer, BufferId buffer_id) noexcept { |
|
|
|
} |
|
|
|
|
|
|
|
template <class P> |
|
|
|
bool BufferCache<P>::SynchronizeBuffer(Buffer& buffer, DAddr device_addr, u32 size) { |
|
|
|
bool BufferCache<P>::SynchronizeBuffer(Buffer& buffer, DAddr device_addr, u32 size, bool preserve_gpu_writes) { |
|
|
|
upload_copies.clear(); |
|
|
|
u64 total_size_bytes = 0; |
|
|
|
u64 staging_offset = 0; |
|
|
|
u64 largest_copy = 0; |
|
|
|
const DAddr buffer_start = buffer.cpu_addr_cached; |
|
|
|
memory_tracker.ForEachUploadRange(device_addr, size, [&](u64 device_addr_out, u64 range_size) { |
|
|
|
upload_copies.push_back(BufferCopy{ |
|
|
|
.src_offset = total_size_bytes, |
|
|
|
.dst_offset = device_addr_out - buffer_start, |
|
|
|
.size = range_size, |
|
|
|
DAddr buffer_start = buffer.CpuAddr(); |
|
|
|
auto push = [&](u64 start, u64 end) { |
|
|
|
if (start >= end) { |
|
|
|
return; |
|
|
|
} |
|
|
|
u64 range_size = end - start; |
|
|
|
upload_copies.push_back({ |
|
|
|
.src_offset = staging_offset, |
|
|
|
.dst_offset = start - buffer_start, |
|
|
|
.size = range_size |
|
|
|
}); |
|
|
|
total_size_bytes += range_size; |
|
|
|
largest_copy = (std::max)(largest_copy, range_size); |
|
|
|
staging_offset += range_size; |
|
|
|
largest_copy = std::max(largest_copy, range_size); |
|
|
|
}; |
|
|
|
memory_tracker.ForEachUploadRange(device_addr, size, [&](u64 addr, u64 range_size) { |
|
|
|
if (preserve_gpu_writes) { |
|
|
|
u64 start = addr; |
|
|
|
u64 end = addr + range_size; |
|
|
|
gpu_modified_ranges.ForEachInRange(start, range_size, [&](u64 gstart, u64 gsize) { |
|
|
|
u64 gend = gstart + gsize; |
|
|
|
push(start, gstart); |
|
|
|
start = std::max(start, gend); |
|
|
|
}); |
|
|
|
push(start, end); |
|
|
|
} else { |
|
|
|
push(addr, addr + range_size); |
|
|
|
} |
|
|
|
}); |
|
|
|
if (total_size_bytes == 0) { |
|
|
|
if (upload_copies.empty()) { |
|
|
|
return true; |
|
|
|
} |
|
|
|
u64 min_offset = (std::numeric_limits<u64>::max)(); |
|
|
|
u64 max_offset = 0; |
|
|
|
for (const auto& copy : upload_copies) { |
|
|
|
min_offset = (std::min)(min_offset, copy.dst_offset); |
|
|
|
max_offset = (std::max)(max_offset, copy.dst_offset + copy.size); |
|
|
|
} |
|
|
|
const DAddr sync_addr = buffer.CpuAddr() + min_offset; |
|
|
|
const u64 sync_size = max_offset - min_offset; |
|
|
|
DownloadBufferMemory(buffer, sync_addr, sync_size); |
|
|
|
const std::span<BufferCopy> copies_span(upload_copies.data(), upload_copies.size()); |
|
|
|
UploadMemory(buffer, total_size_bytes, largest_copy, copies_span); |
|
|
|
any_buffer_uploaded = true; |
|
|
|
UploadMemory(buffer, staging_offset, largest_copy, std::span(upload_copies)); |
|
|
|
return false; |
|
|
|
} |
|
|
|
|
|
|
|
|