|
|
@ -170,11 +170,6 @@ public: |
|
|
void BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_addr, u32 size, PixelFormat format, |
|
|
void BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_addr, u32 size, PixelFormat format, |
|
|
bool is_written, bool is_image); |
|
|
bool is_written, bool is_image); |
|
|
|
|
|
|
|
|
[[nodiscard]] std::pair<Buffer*, u32> ObtainBuffer(GPUVAddr gpu_addr, u32 size, |
|
|
|
|
|
bool synchronize = true, |
|
|
|
|
|
bool mark_as_written = false, |
|
|
|
|
|
bool discard_downloads = false); |
|
|
|
|
|
|
|
|
|
|
|
void FlushCachedWrites(); |
|
|
void FlushCachedWrites(); |
|
|
|
|
|
|
|
|
/// Return true when there are uncommitted buffers to be downloaded |
|
|
/// Return true when there are uncommitted buffers to be downloaded |
|
|
@ -354,8 +349,6 @@ private: |
|
|
|
|
|
|
|
|
bool SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 size); |
|
|
bool SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 size); |
|
|
|
|
|
|
|
|
bool SynchronizeBufferNoModified(Buffer& buffer, VAddr cpu_addr, u32 size); |
|
|
|
|
|
|
|
|
|
|
|
void UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy, |
|
|
void UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy, |
|
|
std::span<BufferCopy> copies); |
|
|
std::span<BufferCopy> copies); |
|
|
|
|
|
|
|
|
@ -442,7 +435,6 @@ private: |
|
|
|
|
|
|
|
|
std::vector<BufferId> cached_write_buffer_ids; |
|
|
std::vector<BufferId> cached_write_buffer_ids; |
|
|
|
|
|
|
|
|
IntervalSet discarded_ranges; |
|
|
|
|
|
IntervalSet uncommitted_ranges; |
|
|
IntervalSet uncommitted_ranges; |
|
|
IntervalSet common_ranges; |
|
|
IntervalSet common_ranges; |
|
|
std::deque<IntervalSet> committed_ranges; |
|
|
std::deque<IntervalSet> committed_ranges; |
|
|
@ -600,17 +592,13 @@ bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am |
|
|
}}; |
|
|
}}; |
|
|
|
|
|
|
|
|
boost::container::small_vector<IntervalType, 4> tmp_intervals; |
|
|
boost::container::small_vector<IntervalType, 4> tmp_intervals; |
|
|
const bool is_high_accuracy = |
|
|
|
|
|
Settings::values.gpu_accuracy.GetValue() == Settings::GPUAccuracy::High; |
|
|
|
|
|
auto mirror = [&](VAddr base_address, VAddr base_address_end) { |
|
|
auto mirror = [&](VAddr base_address, VAddr base_address_end) { |
|
|
const u64 size = base_address_end - base_address; |
|
|
const u64 size = base_address_end - base_address; |
|
|
const VAddr diff = base_address - *cpu_src_address; |
|
|
const VAddr diff = base_address - *cpu_src_address; |
|
|
const VAddr new_base_address = *cpu_dest_address + diff; |
|
|
const VAddr new_base_address = *cpu_dest_address + diff; |
|
|
const IntervalType add_interval{new_base_address, new_base_address + size}; |
|
|
const IntervalType add_interval{new_base_address, new_base_address + size}; |
|
|
|
|
|
uncommitted_ranges.add(add_interval); |
|
|
tmp_intervals.push_back(add_interval); |
|
|
tmp_intervals.push_back(add_interval); |
|
|
if (is_high_accuracy) { |
|
|
|
|
|
uncommitted_ranges.add(add_interval); |
|
|
|
|
|
} |
|
|
|
|
|
}; |
|
|
}; |
|
|
ForEachWrittenRange(*cpu_src_address, amount, mirror); |
|
|
ForEachWrittenRange(*cpu_src_address, amount, mirror); |
|
|
// This subtraction in this order is important for overlapping copies. |
|
|
// This subtraction in this order is important for overlapping copies. |
|
|
@ -821,32 +809,6 @@ void BufferCache<P>::BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_add |
|
|
compute_texture_buffers[tbo_index] = GetTextureBufferBinding(gpu_addr, size, format); |
|
|
compute_texture_buffers[tbo_index] = GetTextureBufferBinding(gpu_addr, size, format); |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
template <class P> |
|
|
|
|
|
std::pair<typename P::Buffer*, u32> BufferCache<P>::ObtainBuffer(GPUVAddr gpu_addr, u32 size, |
|
|
|
|
|
bool synchronize, |
|
|
|
|
|
bool mark_as_written, |
|
|
|
|
|
bool discard_downloads) { |
|
|
|
|
|
const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); |
|
|
|
|
|
if (!cpu_addr) { |
|
|
|
|
|
return {&slot_buffers[NULL_BUFFER_ID], 0}; |
|
|
|
|
|
} |
|
|
|
|
|
const BufferId buffer_id = FindBuffer(*cpu_addr, size); |
|
|
|
|
|
Buffer& buffer = slot_buffers[buffer_id]; |
|
|
|
|
|
if (synchronize) { |
|
|
|
|
|
// SynchronizeBuffer(buffer, *cpu_addr, size); |
|
|
|
|
|
SynchronizeBufferNoModified(buffer, *cpu_addr, size); |
|
|
|
|
|
} |
|
|
|
|
|
if (mark_as_written) { |
|
|
|
|
|
MarkWrittenBuffer(buffer_id, *cpu_addr, size); |
|
|
|
|
|
} |
|
|
|
|
|
if (discard_downloads) { |
|
|
|
|
|
IntervalType interval{*cpu_addr, size}; |
|
|
|
|
|
ClearDownload(interval); |
|
|
|
|
|
discarded_ranges.subtract(interval); |
|
|
|
|
|
} |
|
|
|
|
|
return {&buffer, buffer.Offset(*cpu_addr)}; |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
template <class P> |
|
|
template <class P> |
|
|
void BufferCache<P>::FlushCachedWrites() { |
|
|
void BufferCache<P>::FlushCachedWrites() { |
|
|
for (const BufferId buffer_id : cached_write_buffer_ids) { |
|
|
for (const BufferId buffer_id : cached_write_buffer_ids) { |
|
|
@ -862,6 +824,10 @@ bool BufferCache<P>::HasUncommittedFlushes() const noexcept { |
|
|
|
|
|
|
|
|
template <class P> |
|
|
template <class P> |
|
|
void BufferCache<P>::AccumulateFlushes() { |
|
|
void BufferCache<P>::AccumulateFlushes() { |
|
|
|
|
|
if (Settings::values.gpu_accuracy.GetValue() != Settings::GPUAccuracy::High) { |
|
|
|
|
|
uncommitted_ranges.clear(); |
|
|
|
|
|
return; |
|
|
|
|
|
} |
|
|
if (uncommitted_ranges.empty()) { |
|
|
if (uncommitted_ranges.empty()) { |
|
|
return; |
|
|
return; |
|
|
} |
|
|
} |
|
|
@ -877,14 +843,12 @@ template <class P> |
|
|
void BufferCache<P>::CommitAsyncFlushesHigh() { |
|
|
void BufferCache<P>::CommitAsyncFlushesHigh() { |
|
|
AccumulateFlushes(); |
|
|
AccumulateFlushes(); |
|
|
|
|
|
|
|
|
for (const auto& interval : discarded_ranges) { |
|
|
|
|
|
common_ranges.subtract(interval); |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
if (committed_ranges.empty()) { |
|
|
if (committed_ranges.empty()) { |
|
|
return; |
|
|
return; |
|
|
} |
|
|
} |
|
|
MICROPROFILE_SCOPE(GPU_DownloadMemory); |
|
|
MICROPROFILE_SCOPE(GPU_DownloadMemory); |
|
|
|
|
|
const bool is_accuracy_normal = |
|
|
|
|
|
Settings::values.gpu_accuracy.GetValue() == Settings::GPUAccuracy::Normal; |
|
|
|
|
|
|
|
|
auto it = committed_ranges.begin(); |
|
|
auto it = committed_ranges.begin(); |
|
|
while (it != committed_ranges.end()) { |
|
|
while (it != committed_ranges.end()) { |
|
|
@ -909,6 +873,9 @@ void BufferCache<P>::CommitAsyncFlushesHigh() { |
|
|
ForEachBufferInRange(cpu_addr, size, [&](BufferId buffer_id, Buffer& buffer) { |
|
|
ForEachBufferInRange(cpu_addr, size, [&](BufferId buffer_id, Buffer& buffer) { |
|
|
buffer.ForEachDownloadRangeAndClear( |
|
|
buffer.ForEachDownloadRangeAndClear( |
|
|
cpu_addr, size, [&](u64 range_offset, u64 range_size) { |
|
|
cpu_addr, size, [&](u64 range_offset, u64 range_size) { |
|
|
|
|
|
if (is_accuracy_normal) { |
|
|
|
|
|
return; |
|
|
|
|
|
} |
|
|
const VAddr buffer_addr = buffer.CpuAddr(); |
|
|
const VAddr buffer_addr = buffer.CpuAddr(); |
|
|
const auto add_download = [&](VAddr start, VAddr end) { |
|
|
const auto add_download = [&](VAddr start, VAddr end) { |
|
|
const u64 new_offset = start - buffer_addr; |
|
|
const u64 new_offset = start - buffer_addr; |
|
|
@ -973,7 +940,12 @@ void BufferCache<P>::CommitAsyncFlushesHigh() { |
|
|
|
|
|
|
|
|
template <class P> |
|
|
template <class P> |
|
|
void BufferCache<P>::CommitAsyncFlushes() { |
|
|
void BufferCache<P>::CommitAsyncFlushes() { |
|
|
CommitAsyncFlushesHigh(); |
|
|
|
|
|
|
|
|
if (Settings::values.gpu_accuracy.GetValue() == Settings::GPUAccuracy::High) { |
|
|
|
|
|
CommitAsyncFlushesHigh(); |
|
|
|
|
|
} else { |
|
|
|
|
|
uncommitted_ranges.clear(); |
|
|
|
|
|
committed_ranges.clear(); |
|
|
|
|
|
} |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
template <class P> |
|
|
template <class P> |
|
|
@ -1353,7 +1325,7 @@ void BufferCache<P>::UpdateIndexBuffer() { |
|
|
const auto& draw_state = maxwell3d->draw_manager->GetDrawState(); |
|
|
const auto& draw_state = maxwell3d->draw_manager->GetDrawState(); |
|
|
const auto& index_array = draw_state.index_buffer; |
|
|
const auto& index_array = draw_state.index_buffer; |
|
|
auto& flags = maxwell3d->dirty.flags; |
|
|
auto& flags = maxwell3d->dirty.flags; |
|
|
if (!flags[Dirty::IndexBuffer] && last_index_count == index_array.count) { |
|
|
|
|
|
|
|
|
if (!flags[Dirty::IndexBuffer]) { |
|
|
return; |
|
|
return; |
|
|
} |
|
|
} |
|
|
flags[Dirty::IndexBuffer] = false; |
|
|
flags[Dirty::IndexBuffer] = false; |
|
|
@ -1574,11 +1546,7 @@ void BufferCache<P>::MarkWrittenBuffer(BufferId buffer_id, VAddr cpu_addr, u32 s |
|
|
if (!is_async) { |
|
|
if (!is_async) { |
|
|
return; |
|
|
return; |
|
|
} |
|
|
} |
|
|
const bool is_high_accuracy = |
|
|
|
|
|
Settings::values.gpu_accuracy.GetValue() == Settings::GPUAccuracy::High; |
|
|
|
|
|
if (is_high_accuracy) { |
|
|
|
|
|
uncommitted_ranges.add(base_interval); |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
uncommitted_ranges.add(base_interval); |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
template <class P> |
|
|
template <class P> |
|
|
@ -1771,51 +1739,6 @@ bool BufferCache<P>::SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 s |
|
|
return false; |
|
|
return false; |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
template <class P> |
|
|
|
|
|
bool BufferCache<P>::SynchronizeBufferNoModified(Buffer& buffer, VAddr cpu_addr, u32 size) { |
|
|
|
|
|
boost::container::small_vector<BufferCopy, 4> copies; |
|
|
|
|
|
u64 total_size_bytes = 0; |
|
|
|
|
|
u64 largest_copy = 0; |
|
|
|
|
|
IntervalSet found_sets{}; |
|
|
|
|
|
auto make_copies = [&] { |
|
|
|
|
|
for (auto& interval : found_sets) { |
|
|
|
|
|
const std::size_t sub_size = interval.upper() - interval.lower(); |
|
|
|
|
|
const VAddr cpu_addr = interval.lower(); |
|
|
|
|
|
copies.push_back(BufferCopy{ |
|
|
|
|
|
.src_offset = total_size_bytes, |
|
|
|
|
|
.dst_offset = cpu_addr - buffer.CpuAddr(), |
|
|
|
|
|
.size = sub_size, |
|
|
|
|
|
}); |
|
|
|
|
|
total_size_bytes += sub_size; |
|
|
|
|
|
largest_copy = std::max(largest_copy, sub_size); |
|
|
|
|
|
} |
|
|
|
|
|
const std::span<BufferCopy> copies_span(copies.data(), copies.size()); |
|
|
|
|
|
UploadMemory(buffer, total_size_bytes, largest_copy, copies_span); |
|
|
|
|
|
}; |
|
|
|
|
|
buffer.ForEachUploadRange(cpu_addr, size, [&](u64 range_offset, u64 range_size) { |
|
|
|
|
|
const VAddr base_adr = buffer.CpuAddr() + range_offset; |
|
|
|
|
|
const VAddr end_adr = base_adr + range_size; |
|
|
|
|
|
const IntervalType add_interval{base_adr, end_adr}; |
|
|
|
|
|
found_sets.add(add_interval); |
|
|
|
|
|
}); |
|
|
|
|
|
if (found_sets.empty()) { |
|
|
|
|
|
return true; |
|
|
|
|
|
} |
|
|
|
|
|
const IntervalType search_interval{cpu_addr, cpu_addr + size}; |
|
|
|
|
|
auto it = common_ranges.lower_bound(search_interval); |
|
|
|
|
|
auto it_end = common_ranges.upper_bound(search_interval); |
|
|
|
|
|
if (it == common_ranges.end()) { |
|
|
|
|
|
make_copies(); |
|
|
|
|
|
return false; |
|
|
|
|
|
} |
|
|
|
|
|
while (it != it_end) { |
|
|
|
|
|
found_sets.subtract(*it); |
|
|
|
|
|
it++; |
|
|
|
|
|
} |
|
|
|
|
|
make_copies(); |
|
|
|
|
|
return false; |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
template <class P> |
|
|
template <class P> |
|
|
void BufferCache<P>::UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy, |
|
|
void BufferCache<P>::UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy, |
|
|
std::span<BufferCopy> copies) { |
|
|
std::span<BufferCopy> copies) { |
|
|
|