From 791a5af791ab0b8910e9387d71a28107912c4e3a Mon Sep 17 00:00:00 2001 From: Hexcoder Date: Fri, 20 Mar 2026 01:43:58 +0100 Subject: [PATCH] experimental: allow tracy to get names for jitted function so they can show up in the profiler in sample based mode names provided by dynarmic WARNING: currently jit code does not support stacktraces, which tracy relies on, so only top level function at time of sample show up which makes using this for profiling jitted code mostly pointless, but maybe we can fix this --- CMakeLists.txt | 3 +- cpmfile.json | 4 +- src/common/tracy_jit_symbols.cpp | 62 +++++++++ src/common/tracy_jit_symbols.h | 118 ++++++++++++++++++ .../src/dynarmic/backend/x64/emit_x64.cpp | 9 +- 5 files changed, 192 insertions(+), 4 deletions(-) create mode 100644 src/common/tracy_jit_symbols.cpp create mode 100644 src/common/tracy_jit_symbols.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 5501343e38..7b5deed870 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -670,7 +670,7 @@ if (ENABLE_TRACY) if (tracy_ADDED) include_directories(${tracy_SOURCE_DIR}/public) # build tracy client as its own static library even if overkill - add_library(tracy_client STATIC ${tracy_SOURCE_DIR}/public/TracyClient.cpp) + add_library(tracy_client STATIC ${tracy_SOURCE_DIR}/public/TracyClient.cpp src/common/tracy_jit_symbols.cpp) target_include_directories(tracy_client PRIVATE src) # Avoid tracy errors (This sets it to W0 instead of W3 though? TODO: fix this) @@ -682,6 +682,7 @@ if (ENABLE_TRACY) add_compile_definitions(TRACY_SAMPLING_PROFILER_MANUAL_START) # See yuzu\main.cpp add_compile_definitions(TRACY_ON_DEMAND) # Use on demand mode to avoid profiling emulator start up and game load screens add_compile_definitions(TRACY_FIBERS) + add_compile_definitions(TRACY_HAS_USER_SYMBOLS) # try and capture jitted functions in sample mode add_library(Tracy::client ALIAS tracy_client) endif() diff --git a/cpmfile.json b/cpmfile.json index f2cf069c8e..17df4d9c78 100644 --- a/cpmfile.json +++ b/cpmfile.json @@ -117,8 +117,8 @@ }, "tracy": { "repo": "wolfpld/tracy", - "sha": "05cceee", - "hash": "fdf8f3eb0f44c17760e9e559ece6907606580da20568b7900e97e40f3ea773b9e6dbac7f0b04ef32e363d779ec013af63c85adbe2a3807db9205ec48887a546c", + "sha": "984e08e", + "hash": "1D9B3028DA6E0B14BBF06A81D0A13171461EFF216A56C9495C30D469556CB827B352E1B189A10C2CE3EEAB675C3F1B6824E6D2F12795E71641ACEFEC11F6303F", "version": "0.13.1", "download_only": true } diff --git a/src/common/tracy_jit_symbols.cpp b/src/common/tracy_jit_symbols.cpp new file mode 100644 index 0000000000..9e10c9e294 --- /dev/null +++ b/src/common/tracy_jit_symbols.cpp @@ -0,0 +1,62 @@ +#include "tracy_jit_symbols.h" + +#if defined(TRACY_ENABLE) && defined(TRACY_HAS_USER_SYMBOLS) +#include +#include + +#ifndef WIN32_LEAN_AND_MEAN + #define WIN32_LEAN_AND_MEAN +#endif +#ifndef WIN32_NOMINMAX + #define WIN32_NOMINMAX +#endif +#ifndef NOMINMAX + #define NOMINMAX +#endif +#include + +namespace tracy_jit { + std::string JitCodeMap::get_exe_name () { + char name[1024]; + const auto nameLength = GetModuleFileNameA( NULL, name, sizeof( name ) ); + return " ["+std::string(name, nameLength)+"]"; + } +} + +namespace tracy { + extern bool UserDecodeCallstackPtrFast( uint64_t ptr, char* name_buf, size_t buf_size ) { + if (buf_size <= 0) return false; + + auto& map = tracy_jit::JitCodeMap::instance(); + return map.lookup_code_address( ptr, [name_buf, buf_size] (tracy_jit::JitCodeMap::JittedCodeBlock& block, char const* exe_name) -> void { + // copy name truncated + size_t safe_chars = std::min(block.name.size(), buf_size-1); + memcpy(name_buf, block.name.c_str(), safe_chars); + name_buf[safe_chars] = '\0'; + } ); + } + extern bool UserDecodeSymbolAddress( uint64_t ptr, CallstackSymbolData* result ) { + auto& map = tracy_jit::JitCodeMap::instance(); + return map.lookup_code_address( ptr, [result] (tracy_jit::JitCodeMap::JittedCodeBlock& block, char const* exe_name) -> void { + result->symAddr = block.entrypoint; + // we have no source file + result->file = CopyStringFast("[unknown]"); + result->line = 0; + result->needFree = true; + } ); + } + extern bool UserDecodeCallstackPtr( uint64_t ptr, CallstackEntryData* result, CallstackEntry* cb_data ) { + auto& map = tracy_jit::JitCodeMap::instance(); + return map.lookup_code_address( ptr, [result, cb_data] (tracy_jit::JitCodeMap::JittedCodeBlock& block, char const* exe_name) -> void { + cb_data[0].symAddr = block.entrypoint; + cb_data[0].symLen = block.size; + cb_data[0].name = CopyStringFast(block.name.c_str(), block.name.size()); + // we have no source file + cb_data[0].file = CopyStringFast("[unknown]"); + cb_data[0].line = 0; + + *result = { cb_data, 1, exe_name }; + } ); + } +} +#endif diff --git a/src/common/tracy_jit_symbols.h b/src/common/tracy_jit_symbols.h new file mode 100644 index 0000000000..e12a1d4e8e --- /dev/null +++ b/src/common/tracy_jit_symbols.h @@ -0,0 +1,118 @@ +#pragma once +#include +#include +#include +#include + +/* + Warning: This allows tracy to show jitted code blocks in its Statistics (Sampling) view and source view (including disassembly) + Will only show up in Statistics (Sampling) view with "Hide unknown" unchecked, as the source code is not known + + But the jitted code does not support stack traces, at least not with windows event tracing + This means that while some jitted blocks show up, I believe it is only the ones that happen to be at the top of the stack at the time of the stample (as seen in the ghost flamegraph) + It appears that dynarmic supports stacktraces via an API, but windows ETW does not know about this, and merging ETW results after the fact might not be possible... +*/ + +#if defined(TRACY_ENABLE) && defined(TRACY_HAS_USER_SYMBOLS) +#include +#include + +namespace tracy_jit { + +class JitCodeMap { +public: + static_assert(sizeof(const void*) == sizeof(uint64_t)); + static_assert(sizeof(size_t) == sizeof(uint64_t)); + struct JittedCodeBlock { + // friendly name already provided by dynarmic + std::string name; + uint64_t entrypoint; + uint32_t size; + }; + + //// registration functions called by dynarmic EmitX64 + // which appears to execute on CPU threads, so this needs a mutex to be threadsafe + + // TODO: copy block code RegisterBlock and never unregister? + // because tracy copies code way after emulation has already stopped and code may already have been freed + + void _RegisterBlock (const void* entrypoint, size_t size, std::string&& name) { + std::unique_lock lock{mutex}; + sorted_blocks.emplace((uint64_t)entrypoint, JittedCodeBlock{ std::move(name), (uint64_t)entrypoint, (uint32_t)size }); + } + void _UnregisterBlock (const void* entrypoint) { + std::unique_lock lock{mutex}; + sorted_blocks.erase((uint64_t)entrypoint); + } + void _ClearAllBlocks () { + std::unique_lock lock{mutex}; + sorted_blocks.clear(); + } + static void RegisterBlock (const void* entrypoint, size_t size, std::string&& name) { + instance()._RegisterBlock(entrypoint, size, std::move(name)); + } + static void UnregisterBlock (const void* entrypoint) { + instance()._UnregisterBlock(entrypoint); + } + static void ClearAllBlocks () { + instance()._ClearAllBlocks(); + } + + // Currently we do a binary search to lookup any tracy::Decode* even if the queried addres is not part of any jitted code + // it is likely that all jitted code lives in a single or a few ranges of addresses managed by dynarmic + // if so we really should do a quick check against those ranges first to speed up these cases + // TODO: investigate where jitted code blocks are allocated from + + // called by tracy symbol worker thread -> TracyCallstack.cpp + template + bool lookup_code_address (uint64_t ptr, FUNC set_result) { + std::unique_lock lock{mutex}; + + // Use upper bound: upper bound = find first entry from sorted list/tree where lookup_ptr < sorted_entry_key + // normally [lower_bound, upper_bound) would be the range of entries with equal key, but we need to find the entry with lower or equal address + auto it = sorted_blocks.upper_bound(ptr); + if (it == sorted_blocks.begin()) { // ptr lower than first block, no match + return false; + } + it--; + + assert(ptr >= it->second.entrypoint); + if (ptr - it->second.entrypoint < it->second.size) { + set_result(it->second, exe_name.c_str()); + return true; + } + return false; + } + + // Make registration API done via singleton since jitting code is too complex to pass this around cleanly + // use function static since other way of doing singletons may not be safe with how static libaries are used in this project (?) + static JitCodeMap& instance () { + // Leak intentionally, as apparently c++ destructs classes when main exits, despite other threads still running? + static JitCodeMap* inst = new JitCodeMap(); + return *inst; + } + +private: + // sorted map of currently active jitted code blocks + std::map sorted_blocks; + std::mutex mutex; + std::string exe_name; + + static std::string get_exe_name (); + + JitCodeMap () { + exe_name = get_exe_name(); + } +}; + +} +#else +namespace tracy_jit { +class JitCodeMap { +public: + static void RegisterBlock (const void* entrypoint, size_t size, std::string&& name) {} + static void UnregisterBlock (const void* entrypoint) {} + static void ClearAllBlocks () {} +}; +} +#endif diff --git a/src/dynarmic/src/dynarmic/backend/x64/emit_x64.cpp b/src/dynarmic/src/dynarmic/backend/x64/emit_x64.cpp index 2b0540e4a7..b5bf3a5fd5 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/emit_x64.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/emit_x64.cpp @@ -26,6 +26,8 @@ #include "dynarmic/ir/microinstruction.h" #include "dynarmic/ir/opcodes.h" +#include "common/tracy_jit_symbols.h" + // TODO: Have ARM flags in host flags and not have them use up GPR registers unless necessary. // TODO: Actually implement that proper instruction selector you've always wanted to sweetheart. @@ -346,7 +348,9 @@ Xbyak::Label EmitX64::EmitCond(IR::Cond cond) { } EmitX64::BlockDescriptor EmitX64::RegisterBlock(const IR::LocationDescriptor& descriptor, CodePtr entrypoint, size_t size) { - PerfMapRegister(entrypoint, code.getCurr(), LocationDescriptorToFriendlyName(descriptor)); + auto friendly_name = LocationDescriptorToFriendlyName(descriptor); + PerfMapRegister(entrypoint, code.getCurr(), friendly_name); + tracy_jit::JitCodeMap::RegisterBlock(entrypoint, size, std::move(friendly_name)); Patch(descriptor, entrypoint); BlockDescriptor block_desc{entrypoint, size}; @@ -392,6 +396,7 @@ void EmitX64::ClearCache() { patch_information.clear(); PerfMapClear(); + tracy_jit::JitCodeMap::ClearAllBlocks(); } void EmitX64::InvalidateBasicBlocks(const ankerl::unordered_dense::set& locations) { @@ -399,6 +404,8 @@ void EmitX64::InvalidateBasicBlocks(const ankerl::unordered_dense::setsecond.entrypoint); + block_descriptors.erase(it); } }