diff --git a/CMakeLists.txt b/CMakeLists.txt index 5501343e38..7b5deed870 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -670,7 +670,7 @@ if (ENABLE_TRACY) if (tracy_ADDED) include_directories(${tracy_SOURCE_DIR}/public) # build tracy client as its own static library even if overkill - add_library(tracy_client STATIC ${tracy_SOURCE_DIR}/public/TracyClient.cpp) + add_library(tracy_client STATIC ${tracy_SOURCE_DIR}/public/TracyClient.cpp src/common/tracy_jit_symbols.cpp) target_include_directories(tracy_client PRIVATE src) # Avoid tracy errors (This sets it to W0 instead of W3 though? TODO: fix this) @@ -682,6 +682,7 @@ if (ENABLE_TRACY) add_compile_definitions(TRACY_SAMPLING_PROFILER_MANUAL_START) # See yuzu\main.cpp add_compile_definitions(TRACY_ON_DEMAND) # Use on demand mode to avoid profiling emulator start up and game load screens add_compile_definitions(TRACY_FIBERS) + add_compile_definitions(TRACY_HAS_USER_SYMBOLS) # try and capture jitted functions in sample mode add_library(Tracy::client ALIAS tracy_client) endif() diff --git a/cpmfile.json b/cpmfile.json index f2cf069c8e..17df4d9c78 100644 --- a/cpmfile.json +++ b/cpmfile.json @@ -117,8 +117,8 @@ }, "tracy": { "repo": "wolfpld/tracy", - "sha": "05cceee", - "hash": "fdf8f3eb0f44c17760e9e559ece6907606580da20568b7900e97e40f3ea773b9e6dbac7f0b04ef32e363d779ec013af63c85adbe2a3807db9205ec48887a546c", + "sha": "984e08e", + "hash": "1D9B3028DA6E0B14BBF06A81D0A13171461EFF216A56C9495C30D469556CB827B352E1B189A10C2CE3EEAB675C3F1B6824E6D2F12795E71641ACEFEC11F6303F", "version": "0.13.1", "download_only": true } diff --git a/src/common/tracy_jit_symbols.cpp b/src/common/tracy_jit_symbols.cpp new file mode 100644 index 0000000000..9e10c9e294 --- /dev/null +++ b/src/common/tracy_jit_symbols.cpp @@ -0,0 +1,62 @@ +#include "tracy_jit_symbols.h" + +#if defined(TRACY_ENABLE) && defined(TRACY_HAS_USER_SYMBOLS) +#include +#include + +#ifndef WIN32_LEAN_AND_MEAN + #define WIN32_LEAN_AND_MEAN +#endif +#ifndef WIN32_NOMINMAX + #define WIN32_NOMINMAX +#endif +#ifndef NOMINMAX + #define NOMINMAX +#endif +#include + +namespace tracy_jit { + std::string JitCodeMap::get_exe_name () { + char name[1024]; + const auto nameLength = GetModuleFileNameA( NULL, name, sizeof( name ) ); + return " ["+std::string(name, nameLength)+"]"; + } +} + +namespace tracy { + extern bool UserDecodeCallstackPtrFast( uint64_t ptr, char* name_buf, size_t buf_size ) { + if (buf_size <= 0) return false; + + auto& map = tracy_jit::JitCodeMap::instance(); + return map.lookup_code_address( ptr, [name_buf, buf_size] (tracy_jit::JitCodeMap::JittedCodeBlock& block, char const* exe_name) -> void { + // copy name truncated + size_t safe_chars = std::min(block.name.size(), buf_size-1); + memcpy(name_buf, block.name.c_str(), safe_chars); + name_buf[safe_chars] = '\0'; + } ); + } + extern bool UserDecodeSymbolAddress( uint64_t ptr, CallstackSymbolData* result ) { + auto& map = tracy_jit::JitCodeMap::instance(); + return map.lookup_code_address( ptr, [result] (tracy_jit::JitCodeMap::JittedCodeBlock& block, char const* exe_name) -> void { + result->symAddr = block.entrypoint; + // we have no source file + result->file = CopyStringFast("[unknown]"); + result->line = 0; + result->needFree = true; + } ); + } + extern bool UserDecodeCallstackPtr( uint64_t ptr, CallstackEntryData* result, CallstackEntry* cb_data ) { + auto& map = tracy_jit::JitCodeMap::instance(); + return map.lookup_code_address( ptr, [result, cb_data] (tracy_jit::JitCodeMap::JittedCodeBlock& block, char const* exe_name) -> void { + cb_data[0].symAddr = block.entrypoint; + cb_data[0].symLen = block.size; + cb_data[0].name = CopyStringFast(block.name.c_str(), block.name.size()); + // we have no source file + cb_data[0].file = CopyStringFast("[unknown]"); + cb_data[0].line = 0; + + *result = { cb_data, 1, exe_name }; + } ); + } +} +#endif diff --git a/src/common/tracy_jit_symbols.h b/src/common/tracy_jit_symbols.h new file mode 100644 index 0000000000..e12a1d4e8e --- /dev/null +++ b/src/common/tracy_jit_symbols.h @@ -0,0 +1,118 @@ +#pragma once +#include +#include +#include +#include + +/* + Warning: This allows tracy to show jitted code blocks in its Statistics (Sampling) view and source view (including disassembly) + Will only show up in Statistics (Sampling) view with "Hide unknown" unchecked, as the source code is not known + + But the jitted code does not support stack traces, at least not with windows event tracing + This means that while some jitted blocks show up, I believe it is only the ones that happen to be at the top of the stack at the time of the stample (as seen in the ghost flamegraph) + It appears that dynarmic supports stacktraces via an API, but windows ETW does not know about this, and merging ETW results after the fact might not be possible... +*/ + +#if defined(TRACY_ENABLE) && defined(TRACY_HAS_USER_SYMBOLS) +#include +#include + +namespace tracy_jit { + +class JitCodeMap { +public: + static_assert(sizeof(const void*) == sizeof(uint64_t)); + static_assert(sizeof(size_t) == sizeof(uint64_t)); + struct JittedCodeBlock { + // friendly name already provided by dynarmic + std::string name; + uint64_t entrypoint; + uint32_t size; + }; + + //// registration functions called by dynarmic EmitX64 + // which appears to execute on CPU threads, so this needs a mutex to be threadsafe + + // TODO: copy block code RegisterBlock and never unregister? + // because tracy copies code way after emulation has already stopped and code may already have been freed + + void _RegisterBlock (const void* entrypoint, size_t size, std::string&& name) { + std::unique_lock lock{mutex}; + sorted_blocks.emplace((uint64_t)entrypoint, JittedCodeBlock{ std::move(name), (uint64_t)entrypoint, (uint32_t)size }); + } + void _UnregisterBlock (const void* entrypoint) { + std::unique_lock lock{mutex}; + sorted_blocks.erase((uint64_t)entrypoint); + } + void _ClearAllBlocks () { + std::unique_lock lock{mutex}; + sorted_blocks.clear(); + } + static void RegisterBlock (const void* entrypoint, size_t size, std::string&& name) { + instance()._RegisterBlock(entrypoint, size, std::move(name)); + } + static void UnregisterBlock (const void* entrypoint) { + instance()._UnregisterBlock(entrypoint); + } + static void ClearAllBlocks () { + instance()._ClearAllBlocks(); + } + + // Currently we do a binary search to lookup any tracy::Decode* even if the queried addres is not part of any jitted code + // it is likely that all jitted code lives in a single or a few ranges of addresses managed by dynarmic + // if so we really should do a quick check against those ranges first to speed up these cases + // TODO: investigate where jitted code blocks are allocated from + + // called by tracy symbol worker thread -> TracyCallstack.cpp + template + bool lookup_code_address (uint64_t ptr, FUNC set_result) { + std::unique_lock lock{mutex}; + + // Use upper bound: upper bound = find first entry from sorted list/tree where lookup_ptr < sorted_entry_key + // normally [lower_bound, upper_bound) would be the range of entries with equal key, but we need to find the entry with lower or equal address + auto it = sorted_blocks.upper_bound(ptr); + if (it == sorted_blocks.begin()) { // ptr lower than first block, no match + return false; + } + it--; + + assert(ptr >= it->second.entrypoint); + if (ptr - it->second.entrypoint < it->second.size) { + set_result(it->second, exe_name.c_str()); + return true; + } + return false; + } + + // Make registration API done via singleton since jitting code is too complex to pass this around cleanly + // use function static since other way of doing singletons may not be safe with how static libaries are used in this project (?) + static JitCodeMap& instance () { + // Leak intentionally, as apparently c++ destructs classes when main exits, despite other threads still running? + static JitCodeMap* inst = new JitCodeMap(); + return *inst; + } + +private: + // sorted map of currently active jitted code blocks + std::map sorted_blocks; + std::mutex mutex; + std::string exe_name; + + static std::string get_exe_name (); + + JitCodeMap () { + exe_name = get_exe_name(); + } +}; + +} +#else +namespace tracy_jit { +class JitCodeMap { +public: + static void RegisterBlock (const void* entrypoint, size_t size, std::string&& name) {} + static void UnregisterBlock (const void* entrypoint) {} + static void ClearAllBlocks () {} +}; +} +#endif diff --git a/src/dynarmic/src/dynarmic/backend/x64/emit_x64.cpp b/src/dynarmic/src/dynarmic/backend/x64/emit_x64.cpp index 2b0540e4a7..b5bf3a5fd5 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/emit_x64.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/emit_x64.cpp @@ -26,6 +26,8 @@ #include "dynarmic/ir/microinstruction.h" #include "dynarmic/ir/opcodes.h" +#include "common/tracy_jit_symbols.h" + // TODO: Have ARM flags in host flags and not have them use up GPR registers unless necessary. // TODO: Actually implement that proper instruction selector you've always wanted to sweetheart. @@ -346,7 +348,9 @@ Xbyak::Label EmitX64::EmitCond(IR::Cond cond) { } EmitX64::BlockDescriptor EmitX64::RegisterBlock(const IR::LocationDescriptor& descriptor, CodePtr entrypoint, size_t size) { - PerfMapRegister(entrypoint, code.getCurr(), LocationDescriptorToFriendlyName(descriptor)); + auto friendly_name = LocationDescriptorToFriendlyName(descriptor); + PerfMapRegister(entrypoint, code.getCurr(), friendly_name); + tracy_jit::JitCodeMap::RegisterBlock(entrypoint, size, std::move(friendly_name)); Patch(descriptor, entrypoint); BlockDescriptor block_desc{entrypoint, size}; @@ -392,6 +396,7 @@ void EmitX64::ClearCache() { patch_information.clear(); PerfMapClear(); + tracy_jit::JitCodeMap::ClearAllBlocks(); } void EmitX64::InvalidateBasicBlocks(const ankerl::unordered_dense::set& locations) { @@ -399,6 +404,8 @@ void EmitX64::InvalidateBasicBlocks(const ankerl::unordered_dense::setsecond.entrypoint); + block_descriptors.erase(it); } }