Browse Source

[buffer_cache] Add batching support for memory tracker updates

pull/3316/head
CamilleLaVey 3 weeks ago
parent
commit
c9ee46fea2
  1. 27
      src/tests/video_core/memory_tracker.cpp
  2. 48
      src/video_core/buffer_cache/word_manager.h

27
src/tests/video_core/memory_tracker.cpp

@ -4,6 +4,8 @@
#include <memory>
#include <stdexcept>
#include <unordered_map>
#include <tuple>
#include <vector>
#include <catch2/catch_test_macros.hpp>
@ -23,6 +25,8 @@ constexpr VAddr c = 16 * HIGH_PAGE_SIZE;
class RasterizerInterface {
public:
void UpdatePagesCachedCount(VAddr addr, u64 size, int delta) {
++update_calls;
calls.emplace_back(addr, size, delta);
const u64 page_start{addr >> Core::DEVICE_PAGEBITS};
const u64 page_end{(addr + size + Core::DEVICE_PAGESIZE - 1) >> Core::DEVICE_PAGEBITS};
for (u64 page = page_start; page < page_end; ++page) {
@ -36,6 +40,9 @@ public:
}
}
[[nodiscard]] size_t UpdateCalls() const noexcept { return update_calls; }
[[nodiscard]] const std::vector<std::tuple<VAddr, u64, int>>& UpdateCallsList() const noexcept { return calls; }
[[nodiscard]] int Count(VAddr addr) const noexcept {
const auto it = page_table.find(addr >> Core::DEVICE_PAGEBITS);
return it == page_table.end() ? 0 : it->second;
@ -51,7 +58,10 @@ public:
private:
std::unordered_map<u64, int> page_table;
size_t update_calls = 0;
std::vector<std::tuple<VAddr, u64, int>> calls;
};
} // Anonymous namespace
using MemoryTracker = VideoCommon::MemoryTrackerBase<RasterizerInterface>;
@ -544,3 +554,20 @@ TEST_CASE("MemoryTracker: Cached write downloads") {
memory_track->MarkRegionAsCpuModified(c, WORD);
REQUIRE(rasterizer.Count() == 0);
}
TEST_CASE("MemoryTracker: FlushCachedWrites batching") {
RasterizerInterface rasterizer;
std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer));
memory_track->UnmarkRegionAsCpuModified(c, WORD * 2);
memory_track->CachedCpuWrite(c + PAGE, PAGE);
memory_track->CachedCpuWrite(c + PAGE * 2, PAGE);
memory_track->CachedCpuWrite(c + PAGE * 4, PAGE);
REQUIRE(rasterizer.UpdateCalls() == 0);
memory_track->FlushCachedWrites();
REQUIRE(rasterizer.UpdateCalls() == 2);
const auto& calls = rasterizer.UpdateCallsList();
REQUIRE(std::get<0>(calls[0]) == c + PAGE);
REQUIRE(std::get<1>(calls[0]) == PAGE * 2);
REQUIRE(std::get<0>(calls[1]) == c + PAGE * 4);
REQUIRE(std::get<1>(calls[1]) == PAGE);
}

48
src/video_core/buffer_cache/word_manager.h

@ -11,6 +11,7 @@
#include <limits>
#include <span>
#include <utility>
#include <vector>
#include "common/alignment.h"
#include "common/common_funcs.h"
@ -256,9 +257,10 @@ public:
std::span<u64> state_words = words.template Span<type>();
[[maybe_unused]] std::span<u64> untracked_words = words.template Span<Type::Untracked>();
[[maybe_unused]] std::span<u64> cached_words = words.template Span<Type::CachedCPU>();
std::vector<std::pair<VAddr, u64>> ranges;
IterateWords(dirty_addr - cpu_addr, size, [&](size_t index, u64 mask) {
if constexpr (type == Type::CPU || type == Type::CachedCPU) {
NotifyRasterizer<!enable>(index, untracked_words[index], mask);
CollectChangedRanges<(!enable)>(index, untracked_words[index], mask, ranges);
}
if constexpr (enable) {
state_words[index] |= mask;
@ -279,6 +281,9 @@ public:
}
}
});
if (!ranges.empty()) {
ApplyCollectedRanges(ranges, (!enable) ? 1 : -1);
}
}
/**
@ -304,6 +309,7 @@ public:
func(cpu_addr + pending_offset * BYTES_PER_PAGE,
(pending_pointer - pending_offset) * BYTES_PER_PAGE);
};
std::vector<std::pair<VAddr, u64>> ranges;
IterateWords(offset, size, [&](size_t index, u64 mask) {
if constexpr (type == Type::GPU) {
mask &= ~untracked_words[index];
@ -311,7 +317,7 @@ public:
const u64 word = state_words[index] & mask;
if constexpr (clear) {
if constexpr (type == Type::CPU || type == Type::CachedCPU) {
NotifyRasterizer<true>(index, untracked_words[index], mask);
CollectChangedRanges<true>(index, untracked_words[index], mask, ranges);
}
state_words[index] &= ~mask;
if constexpr (type == Type::CPU || type == Type::CachedCPU) {
@ -343,6 +349,9 @@ public:
if (pending) {
release();
}
if (!ranges.empty()) {
ApplyCollectedRanges(ranges, 1);
}
}
/**
@ -425,13 +434,17 @@ public:
u64* const cached_words = Array<Type::CachedCPU>();
u64* const untracked_words = Array<Type::Untracked>();
u64* const cpu_words = Array<Type::CPU>();
std::vector<std::pair<VAddr, u64>> ranges;
for (u64 word_index = 0; word_index < num_words; ++word_index) {
const u64 cached_bits = cached_words[word_index];
NotifyRasterizer<false>(word_index, untracked_words[word_index], cached_bits);
CollectChangedRanges<false>(word_index, untracked_words[word_index], cached_bits, ranges);
untracked_words[word_index] |= cached_bits;
cpu_words[word_index] |= cached_bits;
cached_words[word_index] = 0;
}
if (!ranges.empty()) {
ApplyCollectedRanges(ranges, -1);
}
}
private:
@ -470,6 +483,35 @@ private:
*
* @tparam add_to_tracker True when the tracker should start tracking the new pages
*/
template <bool add_to_tracker>
void CollectChangedRanges(u64 word_index, u64 current_bits, u64 new_bits,
std::vector<std::pair<VAddr, u64>>& out_ranges) const {
u64 changed_bits = (add_to_tracker ? current_bits : ~current_bits) & new_bits;
VAddr addr = cpu_addr + word_index * BYTES_PER_WORD;
IteratePages(changed_bits, [&](size_t offset, size_t size) {
out_ranges.emplace_back(addr + offset * BYTES_PER_PAGE, size * BYTES_PER_PAGE);
});
}
void ApplyCollectedRanges(std::vector<std::pair<VAddr, u64>>& ranges, int delta) const {
if (ranges.empty()) return;
std::sort(ranges.begin(), ranges.end(),
[](const auto& a, const auto& b) { return a.first < b.first; });
VAddr cur_addr = ranges[0].first;
u64 cur_size = ranges[0].second;
for (size_t i = 1; i < ranges.size(); ++i) {
if (cur_addr + cur_size == ranges[i].first) {
cur_size += ranges[i].second;
} else {
tracker->UpdatePagesCachedCount(cur_addr, cur_size, delta);
cur_addr = ranges[i].first;
cur_size = ranges[i].second;
}
}
tracker->UpdatePagesCachedCount(cur_addr, cur_size, delta);
ranges.clear();
}
template <bool add_to_tracker>
void NotifyRasterizer(u64 word_index, u64 current_bits, u64 new_bits) const {
u64 changed_bits = (add_to_tracker ? current_bits : ~current_bits) & new_bits;

Loading…
Cancel
Save