|
|
|
@ -37,28 +37,45 @@ public: |
|
|
|
bool is_written = false, bool use_fast_cbuf = false) { |
|
|
|
std::lock_guard lock{mutex}; |
|
|
|
|
|
|
|
auto& memory_manager = system.GPU().MemoryManager(); |
|
|
|
const auto host_ptr = memory_manager.GetPointer(gpu_addr); |
|
|
|
if (!host_ptr) { |
|
|
|
const std::optional<VAddr> cpu_addr_opt = |
|
|
|
system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr); |
|
|
|
|
|
|
|
if (!cpu_addr_opt) { |
|
|
|
return {GetEmptyBuffer(size), 0}; |
|
|
|
} |
|
|
|
const auto cache_addr = ToCacheAddr(host_ptr); |
|
|
|
|
|
|
|
VAddr cpu_addr = *cpu_addr_opt; |
|
|
|
|
|
|
|
// Cache management is a big overhead, so only cache entries with a given size. |
|
|
|
// TODO: Figure out which size is the best for given games. |
|
|
|
constexpr std::size_t max_stream_size = 0x800; |
|
|
|
if (use_fast_cbuf || size < max_stream_size) { |
|
|
|
if (!is_written && !IsRegionWritten(cache_addr, cache_addr + size - 1)) { |
|
|
|
if (!is_written && !IsRegionWritten(cpu_addr, cpu_addr + size - 1)) { |
|
|
|
auto& memory_manager = system.GPU().MemoryManager(); |
|
|
|
if (use_fast_cbuf) { |
|
|
|
if (Tegra::MemoryManager::IsGranularRange(gpu_addr, size)) { |
|
|
|
const auto host_ptr = memory_manager.GetPointer(gpu_addr); |
|
|
|
return ConstBufferUpload(host_ptr, size); |
|
|
|
} else { |
|
|
|
staging_buffer.resize(size); |
|
|
|
memory_manager.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size); |
|
|
|
return ConstBufferUpload(staging_buffer.data(), size); |
|
|
|
} |
|
|
|
} else { |
|
|
|
if (Tegra::MemoryManager::IsGranularRange(gpu_addr, size)) { |
|
|
|
const auto host_ptr = memory_manager.GetPointer(gpu_addr); |
|
|
|
return StreamBufferUpload(host_ptr, size, alignment); |
|
|
|
} else { |
|
|
|
staging_buffer.resize(size); |
|
|
|
memory_manager.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size); |
|
|
|
return StreamBufferUpload(staging_buffer.data(), size, alignment); |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
auto block = GetBlock(cache_addr, size); |
|
|
|
auto map = MapAddress(block, gpu_addr, cache_addr, size); |
|
|
|
auto block = GetBlock(cpu_addr, size); |
|
|
|
auto map = MapAddress(block, gpu_addr, cpu_addr, size); |
|
|
|
if (is_written) { |
|
|
|
map->MarkAsModified(true, GetModifiedTicks()); |
|
|
|
if (!map->IsWritten()) { |
|
|
|
@ -71,7 +88,7 @@ public: |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
const u64 offset = static_cast<u64>(block->GetOffset(cache_addr)); |
|
|
|
const u64 offset = static_cast<u64>(block->GetOffset(cpu_addr)); |
|
|
|
|
|
|
|
return {ToHandle(block), offset}; |
|
|
|
} |
|
|
|
@ -112,7 +129,7 @@ public: |
|
|
|
} |
|
|
|
|
|
|
|
/// Write any cached resources overlapping the specified region back to memory |
|
|
|
void FlushRegion(CacheAddr addr, std::size_t size) { |
|
|
|
void FlushRegion(VAddr addr, std::size_t size) { |
|
|
|
std::lock_guard lock{mutex}; |
|
|
|
|
|
|
|
std::vector<MapInterval> objects = GetMapsInRange(addr, size); |
|
|
|
@ -127,7 +144,7 @@ public: |
|
|
|
} |
|
|
|
|
|
|
|
/// Mark the specified region as being invalidated |
|
|
|
void InvalidateRegion(CacheAddr addr, u64 size) { |
|
|
|
void InvalidateRegion(VAddr addr, u64 size) { |
|
|
|
std::lock_guard lock{mutex}; |
|
|
|
|
|
|
|
std::vector<MapInterval> objects = GetMapsInRange(addr, size); |
|
|
|
@ -152,7 +169,7 @@ protected: |
|
|
|
|
|
|
|
virtual void WriteBarrier() = 0; |
|
|
|
|
|
|
|
virtual TBuffer CreateBlock(CacheAddr cache_addr, std::size_t size) = 0; |
|
|
|
virtual TBuffer CreateBlock(VAddr cpu_addr, std::size_t size) = 0; |
|
|
|
|
|
|
|
virtual void UploadBlockData(const TBuffer& buffer, std::size_t offset, std::size_t size, |
|
|
|
const u8* data) = 0; |
|
|
|
@ -169,20 +186,17 @@ protected: |
|
|
|
|
|
|
|
/// Register an object into the cache |
|
|
|
void Register(const MapInterval& new_map, bool inherit_written = false) { |
|
|
|
const CacheAddr cache_ptr = new_map->GetStart(); |
|
|
|
const std::optional<VAddr> cpu_addr = |
|
|
|
system.GPU().MemoryManager().GpuToCpuAddress(new_map->GetGpuAddress()); |
|
|
|
if (!cache_ptr || !cpu_addr) { |
|
|
|
const VAddr cpu_addr = new_map->GetStart(); |
|
|
|
if (!cpu_addr) { |
|
|
|
LOG_CRITICAL(HW_GPU, "Failed to register buffer with unmapped gpu_address 0x{:016x}", |
|
|
|
new_map->GetGpuAddress()); |
|
|
|
return; |
|
|
|
} |
|
|
|
const std::size_t size = new_map->GetEnd() - new_map->GetStart(); |
|
|
|
new_map->SetCpuAddress(*cpu_addr); |
|
|
|
new_map->MarkAsRegistered(true); |
|
|
|
const IntervalType interval{new_map->GetStart(), new_map->GetEnd()}; |
|
|
|
mapped_addresses.insert({interval, new_map}); |
|
|
|
rasterizer.UpdatePagesCachedCount(*cpu_addr, size, 1); |
|
|
|
rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1); |
|
|
|
if (inherit_written) { |
|
|
|
MarkRegionAsWritten(new_map->GetStart(), new_map->GetEnd() - 1); |
|
|
|
new_map->MarkAsWritten(true); |
|
|
|
@ -192,7 +206,7 @@ protected: |
|
|
|
/// Unregisters an object from the cache |
|
|
|
void Unregister(MapInterval& map) { |
|
|
|
const std::size_t size = map->GetEnd() - map->GetStart(); |
|
|
|
rasterizer.UpdatePagesCachedCount(map->GetCpuAddress(), size, -1); |
|
|
|
rasterizer.UpdatePagesCachedCount(map->GetStart(), size, -1); |
|
|
|
map->MarkAsRegistered(false); |
|
|
|
if (map->IsWritten()) { |
|
|
|
UnmarkRegionAsWritten(map->GetStart(), map->GetEnd() - 1); |
|
|
|
@ -202,32 +216,39 @@ protected: |
|
|
|
} |
|
|
|
|
|
|
|
private: |
|
|
|
MapInterval CreateMap(const CacheAddr start, const CacheAddr end, const GPUVAddr gpu_addr) { |
|
|
|
MapInterval CreateMap(const VAddr start, const VAddr end, const GPUVAddr gpu_addr) { |
|
|
|
return std::make_shared<MapIntervalBase>(start, end, gpu_addr); |
|
|
|
} |
|
|
|
|
|
|
|
MapInterval MapAddress(const TBuffer& block, const GPUVAddr gpu_addr, |
|
|
|
const CacheAddr cache_addr, const std::size_t size) { |
|
|
|
MapInterval MapAddress(const TBuffer& block, const GPUVAddr gpu_addr, const VAddr cpu_addr, |
|
|
|
const std::size_t size) { |
|
|
|
|
|
|
|
std::vector<MapInterval> overlaps = GetMapsInRange(cache_addr, size); |
|
|
|
std::vector<MapInterval> overlaps = GetMapsInRange(cpu_addr, size); |
|
|
|
if (overlaps.empty()) { |
|
|
|
const CacheAddr cache_addr_end = cache_addr + size; |
|
|
|
MapInterval new_map = CreateMap(cache_addr, cache_addr_end, gpu_addr); |
|
|
|
u8* host_ptr = FromCacheAddr(cache_addr); |
|
|
|
UploadBlockData(block, block->GetOffset(cache_addr), size, host_ptr); |
|
|
|
auto& memory_manager = system.GPU().MemoryManager(); |
|
|
|
const VAddr cpu_addr_end = cpu_addr + size; |
|
|
|
MapInterval new_map = CreateMap(cpu_addr, cpu_addr_end, gpu_addr); |
|
|
|
if (Tegra::MemoryManager::IsGranularRange(gpu_addr, size)) { |
|
|
|
u8* host_ptr = memory_manager.GetPointer(gpu_addr); |
|
|
|
UploadBlockData(block, block->GetOffset(cpu_addr), size, host_ptr); |
|
|
|
} else { |
|
|
|
staging_buffer.resize(size); |
|
|
|
memory_manager.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size); |
|
|
|
UploadBlockData(block, block->GetOffset(cpu_addr), size, staging_buffer.data()); |
|
|
|
} |
|
|
|
Register(new_map); |
|
|
|
return new_map; |
|
|
|
} |
|
|
|
|
|
|
|
const CacheAddr cache_addr_end = cache_addr + size; |
|
|
|
const VAddr cpu_addr_end = cpu_addr + size; |
|
|
|
if (overlaps.size() == 1) { |
|
|
|
MapInterval& current_map = overlaps[0]; |
|
|
|
if (current_map->IsInside(cache_addr, cache_addr_end)) { |
|
|
|
if (current_map->IsInside(cpu_addr, cpu_addr_end)) { |
|
|
|
return current_map; |
|
|
|
} |
|
|
|
} |
|
|
|
CacheAddr new_start = cache_addr; |
|
|
|
CacheAddr new_end = cache_addr_end; |
|
|
|
VAddr new_start = cpu_addr; |
|
|
|
VAddr new_end = cpu_addr_end; |
|
|
|
bool write_inheritance = false; |
|
|
|
bool modified_inheritance = false; |
|
|
|
// Calculate new buffer parameters |
|
|
|
@ -237,7 +258,7 @@ private: |
|
|
|
write_inheritance |= overlap->IsWritten(); |
|
|
|
modified_inheritance |= overlap->IsModified(); |
|
|
|
} |
|
|
|
GPUVAddr new_gpu_addr = gpu_addr + new_start - cache_addr; |
|
|
|
GPUVAddr new_gpu_addr = gpu_addr + new_start - cpu_addr; |
|
|
|
for (auto& overlap : overlaps) { |
|
|
|
Unregister(overlap); |
|
|
|
} |
|
|
|
@ -250,7 +271,7 @@ private: |
|
|
|
return new_map; |
|
|
|
} |
|
|
|
|
|
|
|
void UpdateBlock(const TBuffer& block, CacheAddr start, CacheAddr end, |
|
|
|
void UpdateBlock(const TBuffer& block, VAddr start, VAddr end, |
|
|
|
std::vector<MapInterval>& overlaps) { |
|
|
|
const IntervalType base_interval{start, end}; |
|
|
|
IntervalSet interval_set{}; |
|
|
|
@ -262,13 +283,15 @@ private: |
|
|
|
for (auto& interval : interval_set) { |
|
|
|
std::size_t size = interval.upper() - interval.lower(); |
|
|
|
if (size > 0) { |
|
|
|
u8* host_ptr = FromCacheAddr(interval.lower()); |
|
|
|
UploadBlockData(block, block->GetOffset(interval.lower()), size, host_ptr); |
|
|
|
staging_buffer.resize(size); |
|
|
|
system.Memory().ReadBlockUnsafe(interval.lower(), staging_buffer.data(), size); |
|
|
|
UploadBlockData(block, block->GetOffset(interval.lower()), size, |
|
|
|
staging_buffer.data()); |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
std::vector<MapInterval> GetMapsInRange(CacheAddr addr, std::size_t size) { |
|
|
|
std::vector<MapInterval> GetMapsInRange(VAddr addr, std::size_t size) { |
|
|
|
if (size == 0) { |
|
|
|
return {}; |
|
|
|
} |
|
|
|
@ -290,8 +313,9 @@ private: |
|
|
|
void FlushMap(MapInterval map) { |
|
|
|
std::size_t size = map->GetEnd() - map->GetStart(); |
|
|
|
TBuffer block = blocks[map->GetStart() >> block_page_bits]; |
|
|
|
u8* host_ptr = FromCacheAddr(map->GetStart()); |
|
|
|
DownloadBlockData(block, block->GetOffset(map->GetStart()), size, host_ptr); |
|
|
|
staging_buffer.resize(size); |
|
|
|
DownloadBlockData(block, block->GetOffset(map->GetStart()), size, staging_buffer.data()); |
|
|
|
system.Memory().WriteBlockUnsafe(map->GetStart(), staging_buffer.data(), size); |
|
|
|
map->MarkAsModified(false, 0); |
|
|
|
} |
|
|
|
|
|
|
|
@ -316,14 +340,14 @@ private: |
|
|
|
TBuffer EnlargeBlock(TBuffer buffer) { |
|
|
|
const std::size_t old_size = buffer->GetSize(); |
|
|
|
const std::size_t new_size = old_size + block_page_size; |
|
|
|
const CacheAddr cache_addr = buffer->GetCacheAddr(); |
|
|
|
TBuffer new_buffer = CreateBlock(cache_addr, new_size); |
|
|
|
const VAddr cpu_addr = buffer->GetCpuAddr(); |
|
|
|
TBuffer new_buffer = CreateBlock(cpu_addr, new_size); |
|
|
|
CopyBlock(buffer, new_buffer, 0, 0, old_size); |
|
|
|
buffer->SetEpoch(epoch); |
|
|
|
pending_destruction.push_back(buffer); |
|
|
|
const CacheAddr cache_addr_end = cache_addr + new_size - 1; |
|
|
|
u64 page_start = cache_addr >> block_page_bits; |
|
|
|
const u64 page_end = cache_addr_end >> block_page_bits; |
|
|
|
const VAddr cpu_addr_end = cpu_addr + new_size - 1; |
|
|
|
u64 page_start = cpu_addr >> block_page_bits; |
|
|
|
const u64 page_end = cpu_addr_end >> block_page_bits; |
|
|
|
while (page_start <= page_end) { |
|
|
|
blocks[page_start] = new_buffer; |
|
|
|
++page_start; |
|
|
|
@ -334,9 +358,9 @@ private: |
|
|
|
TBuffer MergeBlocks(TBuffer first, TBuffer second) { |
|
|
|
const std::size_t size_1 = first->GetSize(); |
|
|
|
const std::size_t size_2 = second->GetSize(); |
|
|
|
const CacheAddr first_addr = first->GetCacheAddr(); |
|
|
|
const CacheAddr second_addr = second->GetCacheAddr(); |
|
|
|
const CacheAddr new_addr = std::min(first_addr, second_addr); |
|
|
|
const VAddr first_addr = first->GetCpuAddr(); |
|
|
|
const VAddr second_addr = second->GetCpuAddr(); |
|
|
|
const VAddr new_addr = std::min(first_addr, second_addr); |
|
|
|
const std::size_t new_size = size_1 + size_2; |
|
|
|
TBuffer new_buffer = CreateBlock(new_addr, new_size); |
|
|
|
CopyBlock(first, new_buffer, 0, new_buffer->GetOffset(first_addr), size_1); |
|
|
|
@ -345,9 +369,9 @@ private: |
|
|
|
second->SetEpoch(epoch); |
|
|
|
pending_destruction.push_back(first); |
|
|
|
pending_destruction.push_back(second); |
|
|
|
const CacheAddr cache_addr_end = new_addr + new_size - 1; |
|
|
|
const VAddr cpu_addr_end = new_addr + new_size - 1; |
|
|
|
u64 page_start = new_addr >> block_page_bits; |
|
|
|
const u64 page_end = cache_addr_end >> block_page_bits; |
|
|
|
const u64 page_end = cpu_addr_end >> block_page_bits; |
|
|
|
while (page_start <= page_end) { |
|
|
|
blocks[page_start] = new_buffer; |
|
|
|
++page_start; |
|
|
|
@ -355,18 +379,18 @@ private: |
|
|
|
return new_buffer; |
|
|
|
} |
|
|
|
|
|
|
|
TBuffer GetBlock(const CacheAddr cache_addr, const std::size_t size) { |
|
|
|
TBuffer GetBlock(const VAddr cpu_addr, const std::size_t size) { |
|
|
|
TBuffer found{}; |
|
|
|
const CacheAddr cache_addr_end = cache_addr + size - 1; |
|
|
|
u64 page_start = cache_addr >> block_page_bits; |
|
|
|
const u64 page_end = cache_addr_end >> block_page_bits; |
|
|
|
const VAddr cpu_addr_end = cpu_addr + size - 1; |
|
|
|
u64 page_start = cpu_addr >> block_page_bits; |
|
|
|
const u64 page_end = cpu_addr_end >> block_page_bits; |
|
|
|
while (page_start <= page_end) { |
|
|
|
auto it = blocks.find(page_start); |
|
|
|
if (it == blocks.end()) { |
|
|
|
if (found) { |
|
|
|
found = EnlargeBlock(found); |
|
|
|
} else { |
|
|
|
const CacheAddr start_addr = (page_start << block_page_bits); |
|
|
|
const VAddr start_addr = (page_start << block_page_bits); |
|
|
|
found = CreateBlock(start_addr, block_page_size); |
|
|
|
blocks[page_start] = found; |
|
|
|
} |
|
|
|
@ -386,7 +410,7 @@ private: |
|
|
|
return found; |
|
|
|
} |
|
|
|
|
|
|
|
void MarkRegionAsWritten(const CacheAddr start, const CacheAddr end) { |
|
|
|
void MarkRegionAsWritten(const VAddr start, const VAddr end) { |
|
|
|
u64 page_start = start >> write_page_bit; |
|
|
|
const u64 page_end = end >> write_page_bit; |
|
|
|
while (page_start <= page_end) { |
|
|
|
@ -400,7 +424,7 @@ private: |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
void UnmarkRegionAsWritten(const CacheAddr start, const CacheAddr end) { |
|
|
|
void UnmarkRegionAsWritten(const VAddr start, const VAddr end) { |
|
|
|
u64 page_start = start >> write_page_bit; |
|
|
|
const u64 page_end = end >> write_page_bit; |
|
|
|
while (page_start <= page_end) { |
|
|
|
@ -416,7 +440,7 @@ private: |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
bool IsRegionWritten(const CacheAddr start, const CacheAddr end) const { |
|
|
|
bool IsRegionWritten(const VAddr start, const VAddr end) const { |
|
|
|
u64 page_start = start >> write_page_bit; |
|
|
|
const u64 page_end = end >> write_page_bit; |
|
|
|
while (page_start <= page_end) { |
|
|
|
@ -440,8 +464,8 @@ private: |
|
|
|
u64 buffer_offset = 0; |
|
|
|
u64 buffer_offset_base = 0; |
|
|
|
|
|
|
|
using IntervalSet = boost::icl::interval_set<CacheAddr>; |
|
|
|
using IntervalCache = boost::icl::interval_map<CacheAddr, MapInterval>; |
|
|
|
using IntervalSet = boost::icl::interval_set<VAddr>; |
|
|
|
using IntervalCache = boost::icl::interval_map<VAddr, MapInterval>; |
|
|
|
using IntervalType = typename IntervalCache::interval_type; |
|
|
|
IntervalCache mapped_addresses; |
|
|
|
|
|
|
|
@ -456,6 +480,8 @@ private: |
|
|
|
u64 epoch = 0; |
|
|
|
u64 modified_ticks = 0; |
|
|
|
|
|
|
|
std::vector<u8> staging_buffer; |
|
|
|
|
|
|
|
std::recursive_mutex mutex; |
|
|
|
}; |
|
|
|
|
|
|
|
|