Browse Source

[dynarmic, common] pagetable clustering (#3215)

Raises the size of each page entry to 32 bytes, however, it merges them into a single structure
THEORETICALLY this is better since the access pattern observed corresponds to the program wanting backing_addr/pointers/blocks immediately after one another.
This may improve performance at the cost of some extra memory.
Another implementation would be to structure only backing_addr/blocks within the same virtual buffer.
Alas spamming virtual buffers is evil since each of them is a cache trasher (imagine jumping from wildly different block to wildly different block immediately).

Signed-off-by: lizzie lizzie@eden-emu.dev

Reviewed-on: https://git.eden-emu.dev/eden-emu/eden/pulls/3215
Reviewed-by: DraVee <dravee@eden-emu.dev>
Reviewed-by: CamilleLaVey <camillelavey99@gmail.com>
Co-authored-by: lizzie <lizzie@eden-emu.dev>
Co-committed-by: lizzie <lizzie@eden-emu.dev>
pull/3303/head
lizzie 3 days ago
committed by crueter
parent
commit
dceeccd04b
No known key found for this signature in database GPG Key ID: 425ACD2D4830EBC6
  1. 43
      src/common/page_table.cpp
  2. 25
      src/common/page_table.h
  3. 8
      src/core/arm/dynarmic/arm_dynarmic_32.cpp
  4. 6
      src/core/arm/dynarmic/arm_dynarmic_64.cpp
  5. 102
      src/core/memory.cpp
  6. 1
      src/dynarmic/src/dynarmic/backend/arm64/a32_address_space.cpp
  7. 1
      src/dynarmic/src/dynarmic/backend/arm64/a64_address_space.cpp
  8. 1
      src/dynarmic/src/dynarmic/backend/arm64/emit_arm64.h
  9. 2
      src/dynarmic/src/dynarmic/backend/arm64/emit_arm64_memory.cpp
  10. 10
      src/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.h
  11. 3
      src/dynarmic/src/dynarmic/interface/A32/config.h
  12. 6
      src/dynarmic/src/dynarmic/interface/A64/config.h
  13. 3
      src/video_core/renderer_vulkan/maxwell_to_vk.h

43
src/common/page_table.cpp

@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
@ -22,37 +25,25 @@ bool PageTable::ContinueTraversal(TraversalEntry* out_entry, TraversalContext* c
// Setup invalid defaults.
out_entry->phys_addr = 0;
out_entry->block_size = page_size;
// Regardless of whether the page was mapped, advance on exit.
SCOPE_EXIT {
context->next_page += 1;
context->next_offset += page_size;
};
// Validate that we can read the actual entry.
const auto page = context->next_page;
if (page >= backing_addr.size()) {
return false;
if (auto const page = context->next_page; page < entries.size()) {
// Validate that the entry is mapped.
if (auto const paddr = entries[page].addr; paddr != 0) {
// Populate the results.
out_entry->phys_addr = paddr + context->next_offset;
context->next_page += 1;
context->next_offset += page_size;
return true;
}
}
// Validate that the entry is mapped.
const auto phys_addr = backing_addr[page];
if (phys_addr == 0) {
return false;
}
// Populate the results.
out_entry->phys_addr = phys_addr + context->next_offset;
return true;
context->next_page += 1;
context->next_offset += page_size;
return false;
}
void PageTable::Resize(std::size_t address_space_width_in_bits, std::size_t page_size_in_bits) {
const std::size_t num_page_table_entries{1ULL
<< (address_space_width_in_bits - page_size_in_bits)};
pointers.resize(num_page_table_entries);
backing_addr.resize(num_page_table_entries);
blocks.resize(num_page_table_entries);
auto const num_page_table_entries = 1ULL << (address_space_width_in_bits - page_size_in_bits);
entries.resize(num_page_table_entries);
current_address_space_width_in_bits = address_space_width_in_bits;
page_size = 1ULL << page_size_in_bits;
}

25
src/common/page_table.h

@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
@ -75,7 +78,7 @@ struct PageTable {
/// Write a page pointer and type pair atomically
void Store(uintptr_t pointer, PageType type) noexcept {
raw.store(pointer | static_cast<uintptr_t>(type));
raw.store(pointer | uintptr_t(type));
}
/// Unpack a pointer from a page info raw representation
@ -124,18 +127,20 @@ struct PageTable {
return false;
}
*out_phys_addr = backing_addr[virt_addr / page_size] + GetInteger(virt_addr);
*out_phys_addr = entries[virt_addr / page_size].addr + GetInteger(virt_addr);
return true;
}
/**
* Vector of memory pointers backing each page. An entry can only be non-null if the
* corresponding attribute element is of type `Memory`.
*/
VirtualBuffer<PageInfo> pointers;
VirtualBuffer<u64> blocks;
VirtualBuffer<u64> backing_addr;
/// Vector of memory pointers backing each page. An entry can only be non-null if the
/// corresponding attribute element is of type `Memory`.
struct PageEntryData {
PageInfo ptr;
u64 block;
u64 addr;
u64 padding;
};
VirtualBuffer<PageEntryData> entries;
static_assert(sizeof(PageEntryData) == 32);
std::size_t current_address_space_width_in_bits{};

8
src/core/arm/dynarmic/arm_dynarmic_32.cpp

@ -186,11 +186,13 @@ std::shared_ptr<Dynarmic::A32::Jit> ArmDynarmic32::MakeJit(Common::PageTable* pa
if (page_table) {
constexpr size_t PageBits = 12;
constexpr size_t NumPageTableEntries = 1 << (32 - PageBits);
constexpr size_t PageLog2Stride = 5;
static_assert(1 << PageLog2Stride == sizeof(Common::PageTable::PageEntryData));
config.page_table = reinterpret_cast<std::array<std::uint8_t*, NumPageTableEntries>*>(
page_table->pointers.data());
config.absolute_offset_page_table = true;
config.page_table = reinterpret_cast<std::array<std::uint8_t*, NumPageTableEntries>*>(page_table->entries.data());
config.page_table_pointer_mask_bits = Common::PageTable::ATTRIBUTE_BITS;
config.page_table_log2_stride = PageLog2Stride;
config.absolute_offset_page_table = true;
config.detect_misaligned_access_via_page_table = 16 | 32 | 64 | 128;
config.only_detect_misalignment_via_page_table_on_page_boundary = true;

6
src/core/arm/dynarmic/arm_dynarmic_64.cpp

@ -233,9 +233,13 @@ std::shared_ptr<Dynarmic::A64::Jit> ArmDynarmic64::MakeJit(Common::PageTable* pa
// Memory
if (page_table) {
config.page_table = reinterpret_cast<void**>(page_table->pointers.data());
constexpr size_t PageLog2Stride = 5;
static_assert(1 << PageLog2Stride == sizeof(Common::PageTable::PageEntryData));
config.page_table = reinterpret_cast<void**>(page_table->entries.data());
config.page_table_address_space_bits = std::uint32_t(address_space_bits);
config.page_table_pointer_mask_bits = Common::PageTable::ATTRIBUTE_BITS;
config.page_table_log2_stride = PageLog2Stride;
config.silently_mirror_page_table = false;
config.absolute_offset_page_table = true;
config.detect_misaligned_access_via_page_table = 16 | 32 | 64 | 128;

102
src/core/memory.cpp

@ -106,11 +106,9 @@ struct Memory::Impl {
return;
}
u64 protect_bytes{};
u64 protect_begin{};
u64 protect_bytes = 0, protect_begin = 0;
for (u64 addr = vaddr; addr < vaddr + size; addr += YUZU_PAGESIZE) {
const Common::PageType page_type{
current_page_table->pointers[addr >> YUZU_PAGEBITS].Type()};
const Common::PageType page_type = current_page_table->entries[addr >> YUZU_PAGEBITS].ptr.Type();
switch (page_type) {
case Common::PageType::RasterizerCachedMemory:
if (protect_bytes > 0) {
@ -119,9 +117,8 @@ struct Memory::Impl {
}
break;
default:
if (protect_bytes == 0) {
if (protect_bytes == 0)
protect_begin = addr;
}
protect_bytes += YUZU_PAGESIZE;
}
}
@ -132,25 +129,17 @@ struct Memory::Impl {
}
[[nodiscard]] u8* GetPointerFromRasterizerCachedMemory(u64 vaddr) const {
const Common::PhysicalAddress paddr{
current_page_table->backing_addr[vaddr >> YUZU_PAGEBITS]};
if (!paddr) {
return {};
}
return system.DeviceMemory().GetPointer<u8>(paddr + vaddr);
Common::PhysicalAddress const paddr = current_page_table->entries[vaddr >> YUZU_PAGEBITS].addr;
if (paddr)
return system.DeviceMemory().GetPointer<u8>(paddr + vaddr);
return {};
}
[[nodiscard]] u8* GetPointerFromDebugMemory(u64 vaddr) const {
const Common::PhysicalAddress paddr{
current_page_table->backing_addr[vaddr >> YUZU_PAGEBITS]};
if (paddr == 0) {
return {};
}
return system.DeviceMemory().GetPointer<u8>(paddr + vaddr);
const Common::PhysicalAddress paddr = current_page_table->entries[vaddr >> YUZU_PAGEBITS].addr;
if (paddr != 0)
return system.DeviceMemory().GetPointer<u8>(paddr + vaddr);
return {};
}
u8 Read8(const Common::ProcessAddress addr) {
@ -268,7 +257,7 @@ struct Memory::Impl {
const auto current_vaddr =
static_cast<u64>((page_index << YUZU_PAGEBITS) + page_offset);
const auto [pointer, type] = page_table.pointers[page_index].PointerType();
const auto [pointer, type] = page_table.entries[page_index].ptr.PointerType();
switch (type) {
case Common::PageType::Unmapped: {
user_accessible = false;
@ -344,16 +333,16 @@ struct Memory::Impl {
}
const u8* GetSpan(const VAddr src_addr, const std::size_t size) const {
if (current_page_table->blocks[src_addr >> YUZU_PAGEBITS] ==
current_page_table->blocks[(src_addr + size) >> YUZU_PAGEBITS]) {
if (current_page_table->entries[src_addr >> YUZU_PAGEBITS].block ==
current_page_table->entries[(src_addr + size) >> YUZU_PAGEBITS].block) {
return GetPointerSilent(src_addr);
}
return nullptr;
}
u8* GetSpan(const VAddr src_addr, const std::size_t size) {
if (current_page_table->blocks[src_addr >> YUZU_PAGEBITS] ==
current_page_table->blocks[(src_addr + size) >> YUZU_PAGEBITS]) {
if (current_page_table->entries[src_addr >> YUZU_PAGEBITS].block ==
current_page_table->entries[(src_addr + size) >> YUZU_PAGEBITS].block) {
return GetPointerSilent(src_addr);
}
return nullptr;
@ -511,21 +500,19 @@ struct Memory::Impl {
const u64 num_pages = ((vaddr + size - 1) >> YUZU_PAGEBITS) - (vaddr >> YUZU_PAGEBITS) + 1;
for (u64 i = 0; i < num_pages; ++i, vaddr += YUZU_PAGESIZE) {
const Common::PageType page_type{
current_page_table->pointers[vaddr >> YUZU_PAGEBITS].Type()};
const Common::PageType page_type = current_page_table->entries[vaddr >> YUZU_PAGEBITS].ptr.Type();
if (debug) {
// Switch page type to debug if now debug
switch (page_type) {
case Common::PageType::Unmapped:
ASSERT_MSG(false, "Attempted to mark unmapped pages as debug");
ASSERT(false && "Attempted to mark unmapped pages as debug");
break;
case Common::PageType::RasterizerCachedMemory:
case Common::PageType::DebugMemory:
// Page is already marked.
break;
case Common::PageType::Memory:
current_page_table->pointers[vaddr >> YUZU_PAGEBITS].Store(
0, Common::PageType::DebugMemory);
current_page_table->entries[vaddr >> YUZU_PAGEBITS].ptr.Store(0, Common::PageType::DebugMemory);
break;
default:
UNREACHABLE();
@ -534,17 +521,15 @@ struct Memory::Impl {
// Switch page type to non-debug if now non-debug
switch (page_type) {
case Common::PageType::Unmapped:
ASSERT_MSG(false, "Attempted to mark unmapped pages as non-debug");
ASSERT(false && "Attempted to mark unmapped pages as non-debug");
break;
case Common::PageType::RasterizerCachedMemory:
case Common::PageType::Memory:
// Don't mess with already non-debug or rasterizer memory.
break;
case Common::PageType::DebugMemory: {
u8* const pointer{GetPointerFromDebugMemory(vaddr & ~YUZU_PAGEMASK)};
current_page_table->pointers[vaddr >> YUZU_PAGEBITS].Store(
reinterpret_cast<uintptr_t>(pointer) - (vaddr & ~YUZU_PAGEMASK),
Common::PageType::Memory);
u8* const pointer = GetPointerFromDebugMemory(vaddr & ~YUZU_PAGEMASK);
current_page_table->entries[vaddr >> YUZU_PAGEBITS].ptr.Store(uintptr_t(pointer) - (vaddr & ~YUZU_PAGEMASK), Common::PageType::Memory);
break;
}
default:
@ -577,8 +562,7 @@ struct Memory::Impl {
const u64 num_pages = ((vaddr + size - 1) >> YUZU_PAGEBITS) - (vaddr >> YUZU_PAGEBITS) + 1;
for (u64 i = 0; i < num_pages; ++i, vaddr += YUZU_PAGESIZE) {
const Common::PageType page_type{
current_page_table->pointers[vaddr >> YUZU_PAGEBITS].Type()};
const Common::PageType page_type= current_page_table->entries[vaddr >> YUZU_PAGEBITS].ptr.Type();
if (cached) {
// Switch page type to cached if now cached
switch (page_type) {
@ -588,8 +572,7 @@ struct Memory::Impl {
break;
case Common::PageType::DebugMemory:
case Common::PageType::Memory:
current_page_table->pointers[vaddr >> YUZU_PAGEBITS].Store(
0, Common::PageType::RasterizerCachedMemory);
current_page_table->entries[vaddr >> YUZU_PAGEBITS].ptr.Store(0, Common::PageType::RasterizerCachedMemory);
break;
case Common::PageType::RasterizerCachedMemory:
// There can be more than one GPU region mapped per CPU region, so it's common
@ -611,17 +594,13 @@ struct Memory::Impl {
// that this area is already unmarked as cached.
break;
case Common::PageType::RasterizerCachedMemory: {
u8* const pointer{GetPointerFromRasterizerCachedMemory(vaddr & ~YUZU_PAGEMASK)};
if (pointer == nullptr) {
if (u8* const pointer = GetPointerFromRasterizerCachedMemory(vaddr & ~YUZU_PAGEMASK); pointer == nullptr) {
// It's possible that this function has been called while updating the
// pagetable after unmapping a VMA. In that case the underlying VMA will no
// longer exist, and we should just leave the pagetable entry blank.
current_page_table->pointers[vaddr >> YUZU_PAGEBITS].Store(
0, Common::PageType::Unmapped);
current_page_table->entries[vaddr >> YUZU_PAGEBITS].ptr.Store(0, Common::PageType::Unmapped);
} else {
current_page_table->pointers[vaddr >> YUZU_PAGEBITS].Store(
reinterpret_cast<uintptr_t>(pointer) - (vaddr & ~YUZU_PAGEMASK),
Common::PageType::Memory);
current_page_table->entries[vaddr >> YUZU_PAGEBITS].ptr.Store(uintptr_t(pointer) - (vaddr & ~YUZU_PAGEMASK), Common::PageType::Memory);
}
break;
}
@ -649,31 +628,28 @@ struct Memory::Impl {
base * YUZU_PAGESIZE, (base + size) * YUZU_PAGESIZE);
const auto end = base + size;
ASSERT_MSG(end <= page_table.pointers.size(), "out of range mapping at {:016X}",
base + page_table.pointers.size());
ASSERT_MSG(end <= page_table.entries.size(), "out of range mapping at {:016X}", base + page_table.entries.size());
if (!target) {
ASSERT_MSG(type != Common::PageType::Memory,
"Mapping memory page without a pointer @ {:016x}", base * YUZU_PAGESIZE);
while (base != end) {
page_table.pointers[base].Store(0, type);
page_table.backing_addr[base] = 0;
page_table.blocks[base] = 0;
page_table.entries[base].ptr.Store(0, type);
page_table.entries[base].addr = 0;
page_table.entries[base].block = 0;
base += 1;
}
} else {
auto orig_base = base;
while (base != end) {
auto host_ptr =
reinterpret_cast<uintptr_t>(system.DeviceMemory().GetPointer<u8>(target)) -
(base << YUZU_PAGEBITS);
auto host_ptr = uintptr_t(system.DeviceMemory().GetPointer<u8>(target)) - (base << YUZU_PAGEBITS);
auto backing = GetInteger(target) - (base << YUZU_PAGEBITS);
page_table.pointers[base].Store(host_ptr, type);
page_table.backing_addr[base] = backing;
page_table.blocks[base] = orig_base << YUZU_PAGEBITS;
page_table.entries[base].ptr.Store(host_ptr, type);
page_table.entries[base].addr = backing;
page_table.entries[base].block = orig_base << YUZU_PAGEBITS;
ASSERT_MSG(page_table.pointers[base].Pointer(),
ASSERT_MSG(page_table.entries[base].ptr.Pointer(),
"memory mapping base yield a nullptr within the table");
base += 1;
@ -688,7 +664,7 @@ struct Memory::Impl {
vaddr &= 0xffffffffffffULL;
if (AddressSpaceContains(*current_page_table, vaddr, 1)) [[likely]] {
// Avoid adding any extra logic to this fast-path block
const uintptr_t raw_pointer = current_page_table->pointers[vaddr >> YUZU_PAGEBITS].Raw();
const uintptr_t raw_pointer = current_page_table->entries[vaddr >> YUZU_PAGEBITS].ptr.Raw();
if (const uintptr_t pointer = Common::PageTable::PageInfo::ExtractPointer(raw_pointer)) [[likely]] {
return reinterpret_cast<u8*>(pointer + vaddr);
} else {
@ -933,10 +909,10 @@ void Memory::ProtectRegion(Common::PageTable& page_table, Common::ProcessAddress
bool Memory::IsValidVirtualAddress(const Common::ProcessAddress vaddr) const {
const auto& page_table = *impl->current_page_table;
const size_t page = vaddr >> YUZU_PAGEBITS;
if (page >= page_table.pointers.size()) {
if (page >= page_table.entries.size()) {
return false;
}
const auto [pointer, type] = page_table.pointers[page].PointerType();
const auto [pointer, type] = page_table.entries[page].ptr.PointerType();
return pointer != 0 || type == Common::PageType::RasterizerCachedMemory ||
type == Common::PageType::DebugMemory;
}

1
src/dynarmic/src/dynarmic/backend/arm64/a32_address_space.cpp

@ -372,6 +372,7 @@ EmitConfig A32AddressSpace::GetEmitConfig() {
.page_table_pointer = std::bit_cast<u64>(conf.page_table),
.page_table_address_space_bits = 32,
.page_table_pointer_mask_bits = conf.page_table_pointer_mask_bits,
.page_table_log2_stride = conf.page_table_log2_stride,
.silently_mirror_page_table = true,
.absolute_offset_page_table = conf.absolute_offset_page_table,
.detect_misaligned_access_via_page_table = conf.detect_misaligned_access_via_page_table,

1
src/dynarmic/src/dynarmic/backend/arm64/a64_address_space.cpp

@ -547,6 +547,7 @@ EmitConfig A64AddressSpace::GetEmitConfig() {
.page_table_pointer = std::bit_cast<u64>(conf.page_table),
.page_table_address_space_bits = conf.page_table_address_space_bits,
.page_table_pointer_mask_bits = conf.page_table_pointer_mask_bits,
.page_table_log2_stride = conf.page_table_log2_stride,
.silently_mirror_page_table = conf.silently_mirror_page_table,
.absolute_offset_page_table = conf.absolute_offset_page_table,
.detect_misaligned_access_via_page_table = conf.detect_misaligned_access_via_page_table,

1
src/dynarmic/src/dynarmic/backend/arm64/emit_arm64.h

@ -129,6 +129,7 @@ struct EmitConfig {
u64 page_table_pointer;
size_t page_table_address_space_bits;
int page_table_pointer_mask_bits;
size_t page_table_log2_stride;
bool silently_mirror_page_table;
bool absolute_offset_page_table;
u8 detect_misaligned_access_via_page_table;

2
src/dynarmic/src/dynarmic/backend/arm64/emit_arm64_memory.cpp

@ -268,7 +268,7 @@ std::pair<oaknut::XReg, oaknut::XReg> InlinePageTableEmitVAddrLookup(oaknut::Cod
code.B(NE, *fallback);
}
code.LDR(Xscratch0, Xpagetable, Xscratch0, LSL, 3);
code.LDR(Xscratch0, Xpagetable, Xscratch0, LSL, ctx.conf.page_table_log2_stride);
if (ctx.conf.page_table_pointer_mask_bits != 0) {
const u64 mask = u64(~u64(0)) << ctx.conf.page_table_pointer_mask_bits;

10
src/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.h

@ -83,9 +83,9 @@ template<>
// TODO: This code assumes vaddr has been zext from 32-bits to 64-bits.
code.mov(tmp, vaddr.cvt32());
code.shr(tmp, static_cast<int>(page_bits));
code.mov(page, qword[r14 + tmp.cvt64() * sizeof(void*)]);
code.shr(tmp, int(page_bits));
code.shl(tmp, int(ctx.conf.page_table_log2_stride));
code.mov(page, qword[r14 + tmp.cvt64()]);
if (ctx.conf.page_table_pointer_mask_bits == 0) {
code.test(page, page);
} else {
@ -138,7 +138,9 @@ template<>
code.test(tmp, u32(-(1 << valid_page_index_bits)));
code.jnz(abort, code.T_NEAR);
}
code.mov(page, qword[r14 + tmp * sizeof(void*)]);
code.shl(tmp, int(ctx.conf.page_table_log2_stride));
code.mov(page, qword[r14 + tmp]);
if (ctx.conf.page_table_pointer_mask_bits == 0) {
code.test(page, page);
} else {

3
src/dynarmic/src/dynarmic/interface/A32/config.h

@ -168,6 +168,9 @@ struct UserConfig {
/// If the configured value is 3, all pointers will be forcefully aligned to 8 bytes.
std::int32_t page_table_pointer_mask_bits = 0;
// Log2 of the size per page entry, value should be either 3 or 4
std::size_t page_table_log2_stride = 3;
/// Select the architecture version to use.
/// There are minor behavioural differences between versions.
ArchVersion arch_version = ArchVersion::v8;

6
src/dynarmic/src/dynarmic/interface/A64/config.h

@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
/* This file is part of the dynarmic project.
* Copyright (c) 2018 MerryMage
* SPDX-License-Identifier: 0BSD
@ -179,6 +182,9 @@ struct UserConfig {
/// If the configured value is 3, all pointers will be forcefully aligned to 8 bytes.
std::int32_t page_table_pointer_mask_bits = 0;
// Log2 of the size per page entry, value should be either 3 or 4
std::size_t page_table_log2_stride = 3;
/// Counter-timer frequency register. The value of the register is not interpreted by
/// dynarmic.
std::uint32_t cntfrq_el0 = 600000000;

3
src/video_core/renderer_vulkan/maxwell_to_vk.h

@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later

Loading…
Cancel
Save