|
|
@ -6,11 +6,13 @@ |
|
|
#include "common/alignment.h"
|
|
|
#include "common/alignment.h"
|
|
|
#include "common/assert.h"
|
|
|
#include "common/assert.h"
|
|
|
#include "common/logging/log.h"
|
|
|
#include "common/logging/log.h"
|
|
|
|
|
|
#include "common/settings.h"
|
|
|
#include "core/core.h"
|
|
|
#include "core/core.h"
|
|
|
#include "core/device_memory.h"
|
|
|
#include "core/device_memory.h"
|
|
|
#include "core/hle/kernel/k_page_table.h"
|
|
|
#include "core/hle/kernel/k_page_table.h"
|
|
|
#include "core/hle/kernel/k_process.h"
|
|
|
#include "core/hle/kernel/k_process.h"
|
|
|
#include "core/memory.h"
|
|
|
#include "core/memory.h"
|
|
|
|
|
|
#include "video_core/invalidation_accumulator.h"
|
|
|
#include "video_core/memory_manager.h"
|
|
|
#include "video_core/memory_manager.h"
|
|
|
#include "video_core/rasterizer_interface.h"
|
|
|
#include "video_core/rasterizer_interface.h"
|
|
|
#include "video_core/renderer_base.h"
|
|
|
#include "video_core/renderer_base.h"
|
|
|
@ -26,7 +28,8 @@ MemoryManager::MemoryManager(Core::System& system_, u64 address_space_bits_, u64 |
|
|
entries{}, big_entries{}, page_table{address_space_bits, address_space_bits + page_bits - 38, |
|
|
entries{}, big_entries{}, page_table{address_space_bits, address_space_bits + page_bits - 38, |
|
|
page_bits != big_page_bits ? page_bits : 0}, |
|
|
page_bits != big_page_bits ? page_bits : 0}, |
|
|
kind_map{PTEKind::INVALID}, unique_identifier{unique_identifier_generator.fetch_add( |
|
|
kind_map{PTEKind::INVALID}, unique_identifier{unique_identifier_generator.fetch_add( |
|
|
1, std::memory_order_acq_rel)} { |
|
|
|
|
|
|
|
|
1, std::memory_order_acq_rel)}, |
|
|
|
|
|
accumulator{std::make_unique<VideoCommon::InvalidationAccumulator>()} { |
|
|
address_space_size = 1ULL << address_space_bits; |
|
|
address_space_size = 1ULL << address_space_bits; |
|
|
page_size = 1ULL << page_bits; |
|
|
page_size = 1ULL << page_bits; |
|
|
page_mask = page_size - 1ULL; |
|
|
page_mask = page_size - 1ULL; |
|
|
@ -43,6 +46,11 @@ MemoryManager::MemoryManager(Core::System& system_, u64 address_space_bits_, u64 |
|
|
big_page_table_cpu.resize(big_page_table_size); |
|
|
big_page_table_cpu.resize(big_page_table_size); |
|
|
big_page_continous.resize(big_page_table_size / continous_bits, 0); |
|
|
big_page_continous.resize(big_page_table_size / continous_bits, 0); |
|
|
entries.resize(page_table_size / 32, 0); |
|
|
entries.resize(page_table_size / 32, 0); |
|
|
|
|
|
if (!Settings::IsGPULevelExtreme() && Settings::IsFastmemEnabled()) { |
|
|
|
|
|
fastmem_arena = system.DeviceMemory().buffer.VirtualBasePointer(); |
|
|
|
|
|
} else { |
|
|
|
|
|
fastmem_arena = nullptr; |
|
|
|
|
|
} |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
MemoryManager::~MemoryManager() = default; |
|
|
MemoryManager::~MemoryManager() = default; |
|
|
@ -185,15 +193,12 @@ void MemoryManager::Unmap(GPUVAddr gpu_addr, std::size_t size) { |
|
|
if (size == 0) { |
|
|
if (size == 0) { |
|
|
return; |
|
|
return; |
|
|
} |
|
|
} |
|
|
const auto submapped_ranges = GetSubmappedRange(gpu_addr, size); |
|
|
|
|
|
|
|
|
GetSubmappedRangeImpl<false>(gpu_addr, size, page_stash); |
|
|
|
|
|
|
|
|
for (const auto& [map_addr, map_size] : submapped_ranges) { |
|
|
|
|
|
// Flush and invalidate through the GPU interface, to be asynchronous if possible.
|
|
|
|
|
|
const std::optional<VAddr> cpu_addr = GpuToCpuAddress(map_addr); |
|
|
|
|
|
ASSERT(cpu_addr); |
|
|
|
|
|
|
|
|
|
|
|
rasterizer->UnmapMemory(*cpu_addr, map_size); |
|
|
|
|
|
|
|
|
for (const auto& [map_addr, map_size] : page_stash) { |
|
|
|
|
|
rasterizer->UnmapMemory(map_addr, map_size); |
|
|
} |
|
|
} |
|
|
|
|
|
page_stash.clear(); |
|
|
|
|
|
|
|
|
BigPageTableOp<EntryType::Free>(gpu_addr, 0, size, PTEKind::INVALID); |
|
|
BigPageTableOp<EntryType::Free>(gpu_addr, 0, size, PTEKind::INVALID); |
|
|
PageTableOp<EntryType::Free>(gpu_addr, 0, size, PTEKind::INVALID); |
|
|
PageTableOp<EntryType::Free>(gpu_addr, 0, size, PTEKind::INVALID); |
|
|
@ -355,7 +360,7 @@ inline void MemoryManager::MemoryOperation(GPUVAddr gpu_src_addr, std::size_t si |
|
|
} |
|
|
} |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
template <bool is_safe> |
|
|
|
|
|
|
|
|
template <bool is_safe, bool use_fastmem> |
|
|
void MemoryManager::ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size, |
|
|
void MemoryManager::ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size, |
|
|
[[maybe_unused]] VideoCommon::CacheType which) const { |
|
|
[[maybe_unused]] VideoCommon::CacheType which) const { |
|
|
auto set_to_zero = [&]([[maybe_unused]] std::size_t page_index, |
|
|
auto set_to_zero = [&]([[maybe_unused]] std::size_t page_index, |
|
|
@ -369,8 +374,12 @@ void MemoryManager::ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, std: |
|
|
if constexpr (is_safe) { |
|
|
if constexpr (is_safe) { |
|
|
rasterizer->FlushRegion(cpu_addr_base, copy_amount, which); |
|
|
rasterizer->FlushRegion(cpu_addr_base, copy_amount, which); |
|
|
} |
|
|
} |
|
|
|
|
|
if constexpr (use_fastmem) { |
|
|
|
|
|
std::memcpy(dest_buffer, &fastmem_arena[cpu_addr_base], copy_amount); |
|
|
|
|
|
} else { |
|
|
u8* physical = memory.GetPointer(cpu_addr_base); |
|
|
u8* physical = memory.GetPointer(cpu_addr_base); |
|
|
std::memcpy(dest_buffer, physical, copy_amount); |
|
|
std::memcpy(dest_buffer, physical, copy_amount); |
|
|
|
|
|
} |
|
|
dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount; |
|
|
dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount; |
|
|
}; |
|
|
}; |
|
|
auto mapped_big = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { |
|
|
auto mapped_big = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { |
|
|
@ -379,12 +388,16 @@ void MemoryManager::ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, std: |
|
|
if constexpr (is_safe) { |
|
|
if constexpr (is_safe) { |
|
|
rasterizer->FlushRegion(cpu_addr_base, copy_amount, which); |
|
|
rasterizer->FlushRegion(cpu_addr_base, copy_amount, which); |
|
|
} |
|
|
} |
|
|
|
|
|
if constexpr (use_fastmem) { |
|
|
|
|
|
std::memcpy(dest_buffer, &fastmem_arena[cpu_addr_base], copy_amount); |
|
|
|
|
|
} else { |
|
|
if (!IsBigPageContinous(page_index)) [[unlikely]] { |
|
|
if (!IsBigPageContinous(page_index)) [[unlikely]] { |
|
|
memory.ReadBlockUnsafe(cpu_addr_base, dest_buffer, copy_amount); |
|
|
memory.ReadBlockUnsafe(cpu_addr_base, dest_buffer, copy_amount); |
|
|
} else { |
|
|
} else { |
|
|
u8* physical = memory.GetPointer(cpu_addr_base); |
|
|
u8* physical = memory.GetPointer(cpu_addr_base); |
|
|
std::memcpy(dest_buffer, physical, copy_amount); |
|
|
std::memcpy(dest_buffer, physical, copy_amount); |
|
|
} |
|
|
} |
|
|
|
|
|
} |
|
|
dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount; |
|
|
dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount; |
|
|
}; |
|
|
}; |
|
|
auto read_short_pages = [&](std::size_t page_index, std::size_t offset, |
|
|
auto read_short_pages = [&](std::size_t page_index, std::size_t offset, |
|
|
@ -397,12 +410,20 @@ void MemoryManager::ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, std: |
|
|
|
|
|
|
|
|
void MemoryManager::ReadBlock(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size, |
|
|
void MemoryManager::ReadBlock(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size, |
|
|
VideoCommon::CacheType which) const { |
|
|
VideoCommon::CacheType which) const { |
|
|
ReadBlockImpl<true>(gpu_src_addr, dest_buffer, size, which); |
|
|
|
|
|
|
|
|
if (fastmem_arena) [[likely]] { |
|
|
|
|
|
ReadBlockImpl<true, true>(gpu_src_addr, dest_buffer, size, which); |
|
|
|
|
|
return; |
|
|
|
|
|
} |
|
|
|
|
|
ReadBlockImpl<true, false>(gpu_src_addr, dest_buffer, size, which); |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
void MemoryManager::ReadBlockUnsafe(GPUVAddr gpu_src_addr, void* dest_buffer, |
|
|
void MemoryManager::ReadBlockUnsafe(GPUVAddr gpu_src_addr, void* dest_buffer, |
|
|
const std::size_t size) const { |
|
|
const std::size_t size) const { |
|
|
ReadBlockImpl<false>(gpu_src_addr, dest_buffer, size, VideoCommon::CacheType::None); |
|
|
|
|
|
|
|
|
if (fastmem_arena) [[likely]] { |
|
|
|
|
|
ReadBlockImpl<false, true>(gpu_src_addr, dest_buffer, size, VideoCommon::CacheType::None); |
|
|
|
|
|
return; |
|
|
|
|
|
} |
|
|
|
|
|
ReadBlockImpl<false, false>(gpu_src_addr, dest_buffer, size, VideoCommon::CacheType::None); |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
template <bool is_safe> |
|
|
template <bool is_safe> |
|
|
@ -454,6 +475,12 @@ void MemoryManager::WriteBlockUnsafe(GPUVAddr gpu_dest_addr, const void* src_buf |
|
|
WriteBlockImpl<false>(gpu_dest_addr, src_buffer, size, VideoCommon::CacheType::None); |
|
|
WriteBlockImpl<false>(gpu_dest_addr, src_buffer, size, VideoCommon::CacheType::None); |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
void MemoryManager::WriteBlockCached(GPUVAddr gpu_dest_addr, const void* src_buffer, |
|
|
|
|
|
std::size_t size) { |
|
|
|
|
|
WriteBlockImpl<false>(gpu_dest_addr, src_buffer, size, VideoCommon::CacheType::None); |
|
|
|
|
|
accumulator->Add(gpu_dest_addr, size); |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
void MemoryManager::FlushRegion(GPUVAddr gpu_addr, size_t size, |
|
|
void MemoryManager::FlushRegion(GPUVAddr gpu_addr, size_t size, |
|
|
VideoCommon::CacheType which) const { |
|
|
VideoCommon::CacheType which) const { |
|
|
auto do_nothing = [&]([[maybe_unused]] std::size_t page_index, |
|
|
auto do_nothing = [&]([[maybe_unused]] std::size_t page_index, |
|
|
@ -663,7 +690,17 @@ bool MemoryManager::IsFullyMappedRange(GPUVAddr gpu_addr, std::size_t size) cons |
|
|
std::vector<std::pair<GPUVAddr, std::size_t>> MemoryManager::GetSubmappedRange( |
|
|
std::vector<std::pair<GPUVAddr, std::size_t>> MemoryManager::GetSubmappedRange( |
|
|
GPUVAddr gpu_addr, std::size_t size) const { |
|
|
GPUVAddr gpu_addr, std::size_t size) const { |
|
|
std::vector<std::pair<GPUVAddr, std::size_t>> result{}; |
|
|
std::vector<std::pair<GPUVAddr, std::size_t>> result{}; |
|
|
std::optional<std::pair<GPUVAddr, std::size_t>> last_segment{}; |
|
|
|
|
|
|
|
|
GetSubmappedRangeImpl<true>(gpu_addr, size, result); |
|
|
|
|
|
return result; |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
template <bool is_gpu_address> |
|
|
|
|
|
void MemoryManager::GetSubmappedRangeImpl( |
|
|
|
|
|
GPUVAddr gpu_addr, std::size_t size, |
|
|
|
|
|
std::vector<std::pair<std::conditional_t<is_gpu_address, GPUVAddr, VAddr>, std::size_t>>& |
|
|
|
|
|
result) const { |
|
|
|
|
|
std::optional<std::pair<std::conditional_t<is_gpu_address, GPUVAddr, VAddr>, std::size_t>> |
|
|
|
|
|
last_segment{}; |
|
|
std::optional<VAddr> old_page_addr{}; |
|
|
std::optional<VAddr> old_page_addr{}; |
|
|
const auto split = [&last_segment, &result]([[maybe_unused]] std::size_t page_index, |
|
|
const auto split = [&last_segment, &result]([[maybe_unused]] std::size_t page_index, |
|
|
[[maybe_unused]] std::size_t offset, |
|
|
[[maybe_unused]] std::size_t offset, |
|
|
@ -685,8 +722,12 @@ std::vector<std::pair<GPUVAddr, std::size_t>> MemoryManager::GetSubmappedRange( |
|
|
} |
|
|
} |
|
|
old_page_addr = {cpu_addr_base + copy_amount}; |
|
|
old_page_addr = {cpu_addr_base + copy_amount}; |
|
|
if (!last_segment) { |
|
|
if (!last_segment) { |
|
|
|
|
|
if constexpr (is_gpu_address) { |
|
|
const GPUVAddr new_base_addr = (page_index << big_page_bits) + offset; |
|
|
const GPUVAddr new_base_addr = (page_index << big_page_bits) + offset; |
|
|
last_segment = {new_base_addr, copy_amount}; |
|
|
last_segment = {new_base_addr, copy_amount}; |
|
|
|
|
|
} else { |
|
|
|
|
|
last_segment = {cpu_addr_base, copy_amount}; |
|
|
|
|
|
} |
|
|
} else { |
|
|
} else { |
|
|
last_segment->second += copy_amount; |
|
|
last_segment->second += copy_amount; |
|
|
} |
|
|
} |
|
|
@ -703,8 +744,12 @@ std::vector<std::pair<GPUVAddr, std::size_t>> MemoryManager::GetSubmappedRange( |
|
|
} |
|
|
} |
|
|
old_page_addr = {cpu_addr_base + copy_amount}; |
|
|
old_page_addr = {cpu_addr_base + copy_amount}; |
|
|
if (!last_segment) { |
|
|
if (!last_segment) { |
|
|
|
|
|
if constexpr (is_gpu_address) { |
|
|
const GPUVAddr new_base_addr = (page_index << page_bits) + offset; |
|
|
const GPUVAddr new_base_addr = (page_index << page_bits) + offset; |
|
|
last_segment = {new_base_addr, copy_amount}; |
|
|
last_segment = {new_base_addr, copy_amount}; |
|
|
|
|
|
} else { |
|
|
|
|
|
last_segment = {cpu_addr_base, copy_amount}; |
|
|
|
|
|
} |
|
|
} else { |
|
|
} else { |
|
|
last_segment->second += copy_amount; |
|
|
last_segment->second += copy_amount; |
|
|
} |
|
|
} |
|
|
@ -715,7 +760,18 @@ std::vector<std::pair<GPUVAddr, std::size_t>> MemoryManager::GetSubmappedRange( |
|
|
}; |
|
|
}; |
|
|
MemoryOperation<true>(gpu_addr, size, extend_size_big, split, do_short_pages); |
|
|
MemoryOperation<true>(gpu_addr, size, extend_size_big, split, do_short_pages); |
|
|
split(0, 0, 0); |
|
|
split(0, 0, 0); |
|
|
return result; |
|
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
void MemoryManager::FlushCaching() { |
|
|
|
|
|
if (!accumulator->AnyAccumulated()) { |
|
|
|
|
|
return; |
|
|
|
|
|
} |
|
|
|
|
|
accumulator->Callback([this](GPUVAddr addr, size_t size) { |
|
|
|
|
|
GetSubmappedRangeImpl<false>(addr, size, page_stash); |
|
|
|
|
|
}); |
|
|
|
|
|
rasterizer->InnerInvalidation(page_stash); |
|
|
|
|
|
page_stash.clear(); |
|
|
|
|
|
accumulator->Clear(); |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
} // namespace Tegra
|
|
|
} // namespace Tegra
|