Browse Source

experimental: allow tracy to get names for jitted function so they can show up in the profiler in sample based mode

names provided by dynarmic
WARNING: currently jit code does not support stacktraces, which tracy relies on, so only top level function at time of sample show up
which makes using this for profiling jitted code mostly pointless, but maybe we can fix this
pull/3759/head
Hexcoder 6 days ago
parent
commit
791a5af791
  1. 3
      CMakeLists.txt
  2. 4
      cpmfile.json
  3. 62
      src/common/tracy_jit_symbols.cpp
  4. 118
      src/common/tracy_jit_symbols.h
  5. 9
      src/dynarmic/src/dynarmic/backend/x64/emit_x64.cpp

3
CMakeLists.txt

@ -670,7 +670,7 @@ if (ENABLE_TRACY)
if (tracy_ADDED)
include_directories(${tracy_SOURCE_DIR}/public)
# build tracy client as its own static library even if overkill
add_library(tracy_client STATIC ${tracy_SOURCE_DIR}/public/TracyClient.cpp)
add_library(tracy_client STATIC ${tracy_SOURCE_DIR}/public/TracyClient.cpp src/common/tracy_jit_symbols.cpp)
target_include_directories(tracy_client PRIVATE src)
# Avoid tracy errors (This sets it to W0 instead of W3 though? TODO: fix this)
@ -682,6 +682,7 @@ if (ENABLE_TRACY)
add_compile_definitions(TRACY_SAMPLING_PROFILER_MANUAL_START) # See yuzu\main.cpp
add_compile_definitions(TRACY_ON_DEMAND) # Use on demand mode to avoid profiling emulator start up and game load screens
add_compile_definitions(TRACY_FIBERS)
add_compile_definitions(TRACY_HAS_USER_SYMBOLS) # try and capture jitted functions in sample mode
add_library(Tracy::client ALIAS tracy_client)
endif()

4
cpmfile.json

@ -117,8 +117,8 @@
},
"tracy": {
"repo": "wolfpld/tracy",
"sha": "05cceee",
"hash": "fdf8f3eb0f44c17760e9e559ece6907606580da20568b7900e97e40f3ea773b9e6dbac7f0b04ef32e363d779ec013af63c85adbe2a3807db9205ec48887a546c",
"sha": "984e08e",
"hash": "1D9B3028DA6E0B14BBF06A81D0A13171461EFF216A56C9495C30D469556CB827B352E1B189A10C2CE3EEAB675C3F1B6824E6D2F12795E71641ACEFEC11F6303F",
"version": "0.13.1",
"download_only": true
}

62
src/common/tracy_jit_symbols.cpp

@ -0,0 +1,62 @@
#include "tracy_jit_symbols.h"
#if defined(TRACY_ENABLE) && defined(TRACY_HAS_USER_SYMBOLS)
#include <client/TracyStringHelpers.hpp>
#include <tracy/Tracy.hpp>
#ifndef WIN32_LEAN_AND_MEAN
#define WIN32_LEAN_AND_MEAN
#endif
#ifndef WIN32_NOMINMAX
#define WIN32_NOMINMAX
#endif
#ifndef NOMINMAX
#define NOMINMAX
#endif
#include <windows.h>
namespace tracy_jit {
std::string JitCodeMap::get_exe_name () {
char name[1024];
const auto nameLength = GetModuleFileNameA( NULL, name, sizeof( name ) );
return "<JIT> ["+std::string(name, nameLength)+"]";
}
}
namespace tracy {
extern bool UserDecodeCallstackPtrFast( uint64_t ptr, char* name_buf, size_t buf_size ) {
if (buf_size <= 0) return false;
auto& map = tracy_jit::JitCodeMap::instance();
return map.lookup_code_address( ptr, [name_buf, buf_size] (tracy_jit::JitCodeMap::JittedCodeBlock& block, char const* exe_name) -> void {
// copy name truncated
size_t safe_chars = std::min(block.name.size(), buf_size-1);
memcpy(name_buf, block.name.c_str(), safe_chars);
name_buf[safe_chars] = '\0';
} );
}
extern bool UserDecodeSymbolAddress( uint64_t ptr, CallstackSymbolData* result ) {
auto& map = tracy_jit::JitCodeMap::instance();
return map.lookup_code_address( ptr, [result] (tracy_jit::JitCodeMap::JittedCodeBlock& block, char const* exe_name) -> void {
result->symAddr = block.entrypoint;
// we have no source file
result->file = CopyStringFast("[unknown]");
result->line = 0;
result->needFree = true;
} );
}
extern bool UserDecodeCallstackPtr( uint64_t ptr, CallstackEntryData* result, CallstackEntry* cb_data ) {
auto& map = tracy_jit::JitCodeMap::instance();
return map.lookup_code_address( ptr, [result, cb_data] (tracy_jit::JitCodeMap::JittedCodeBlock& block, char const* exe_name) -> void {
cb_data[0].symAddr = block.entrypoint;
cb_data[0].symLen = block.size;
cb_data[0].name = CopyStringFast(block.name.c_str(), block.name.size());
// we have no source file
cb_data[0].file = CopyStringFast("[unknown]");
cb_data[0].line = 0;
*result = { cb_data, 1, exe_name };
} );
}
}
#endif

118
src/common/tracy_jit_symbols.h

@ -0,0 +1,118 @@
#pragma once
#include <cstdint>
#include <cassert>
#include <string>
#include <string_view>
/*
Warning: This allows tracy to show jitted code blocks in its Statistics (Sampling) view and source view (including disassembly)
Will only show up in Statistics (Sampling) view with "Hide unknown" unchecked, as the source code is not known
But the jitted code does not support stack traces, at least not with windows event tracing
This means that while some jitted blocks show up, I believe it is only the ones that happen to be at the top of the stack at the time of the stample (as seen in the ghost flamegraph)
It appears that dynarmic supports stacktraces via an API, but windows ETW does not know about this, and merging ETW results after the fact might not be possible...
*/
#if defined(TRACY_ENABLE) && defined(TRACY_HAS_USER_SYMBOLS)
#include <map>
#include <mutex>
namespace tracy_jit {
class JitCodeMap {
public:
static_assert(sizeof(const void*) == sizeof(uint64_t));
static_assert(sizeof(size_t) == sizeof(uint64_t));
struct JittedCodeBlock {
// friendly name already provided by dynarmic
std::string name;
uint64_t entrypoint;
uint32_t size;
};
//// registration functions called by dynarmic EmitX64
// which appears to execute on CPU threads, so this needs a mutex to be threadsafe
// TODO: copy block code RegisterBlock and never unregister?
// because tracy copies code way after emulation has already stopped and code may already have been freed
void _RegisterBlock (const void* entrypoint, size_t size, std::string&& name) {
std::unique_lock lock{mutex};
sorted_blocks.emplace((uint64_t)entrypoint, JittedCodeBlock{ std::move(name), (uint64_t)entrypoint, (uint32_t)size });
}
void _UnregisterBlock (const void* entrypoint) {
std::unique_lock lock{mutex};
sorted_blocks.erase((uint64_t)entrypoint);
}
void _ClearAllBlocks () {
std::unique_lock lock{mutex};
sorted_blocks.clear();
}
static void RegisterBlock (const void* entrypoint, size_t size, std::string&& name) {
instance()._RegisterBlock(entrypoint, size, std::move(name));
}
static void UnregisterBlock (const void* entrypoint) {
instance()._UnregisterBlock(entrypoint);
}
static void ClearAllBlocks () {
instance()._ClearAllBlocks();
}
// Currently we do a binary search to lookup any tracy::Decode* even if the queried addres is not part of any jitted code
// it is likely that all jitted code lives in a single or a few ranges of addresses managed by dynarmic
// if so we really should do a quick check against those ranges first to speed up these cases
// TODO: investigate where jitted code blocks are allocated from
// called by tracy symbol worker thread -> TracyCallstack.cpp
template <typename FUNC>
bool lookup_code_address (uint64_t ptr, FUNC set_result) {
std::unique_lock lock{mutex};
// Use upper bound: upper bound = find first entry from sorted list/tree where lookup_ptr < sorted_entry_key
// normally [lower_bound, upper_bound) would be the range of entries with equal key, but we need to find the entry with lower or equal address
auto it = sorted_blocks.upper_bound(ptr);
if (it == sorted_blocks.begin()) { // ptr lower than first block, no match
return false;
}
it--;
assert(ptr >= it->second.entrypoint);
if (ptr - it->second.entrypoint < it->second.size) {
set_result(it->second, exe_name.c_str());
return true;
}
return false;
}
// Make registration API done via singleton since jitting code is too complex to pass this around cleanly
// use function static since other way of doing singletons may not be safe with how static libaries are used in this project (?)
static JitCodeMap& instance () {
// Leak intentionally, as apparently c++ destructs classes when main exits, despite other threads still running?
static JitCodeMap* inst = new JitCodeMap();
return *inst;
}
private:
// sorted map of currently active jitted code blocks
std::map<uint64_t, JittedCodeBlock> sorted_blocks;
std::mutex mutex;
std::string exe_name;
static std::string get_exe_name ();
JitCodeMap () {
exe_name = get_exe_name();
}
};
}
#else
namespace tracy_jit {
class JitCodeMap {
public:
static void RegisterBlock (const void* entrypoint, size_t size, std::string&& name) {}
static void UnregisterBlock (const void* entrypoint) {}
static void ClearAllBlocks () {}
};
}
#endif

9
src/dynarmic/src/dynarmic/backend/x64/emit_x64.cpp

@ -26,6 +26,8 @@
#include "dynarmic/ir/microinstruction.h"
#include "dynarmic/ir/opcodes.h"
#include "common/tracy_jit_symbols.h"
// TODO: Have ARM flags in host flags and not have them use up GPR registers unless necessary.
// TODO: Actually implement that proper instruction selector you've always wanted to sweetheart.
@ -346,7 +348,9 @@ Xbyak::Label EmitX64::EmitCond(IR::Cond cond) {
}
EmitX64::BlockDescriptor EmitX64::RegisterBlock(const IR::LocationDescriptor& descriptor, CodePtr entrypoint, size_t size) {
PerfMapRegister(entrypoint, code.getCurr(), LocationDescriptorToFriendlyName(descriptor));
auto friendly_name = LocationDescriptorToFriendlyName(descriptor);
PerfMapRegister(entrypoint, code.getCurr(), friendly_name);
tracy_jit::JitCodeMap::RegisterBlock(entrypoint, size, std::move(friendly_name));
Patch(descriptor, entrypoint);
BlockDescriptor block_desc{entrypoint, size};
@ -392,6 +396,7 @@ void EmitX64::ClearCache() {
patch_information.clear();
PerfMapClear();
tracy_jit::JitCodeMap::ClearAllBlocks();
}
void EmitX64::InvalidateBasicBlocks(const ankerl::unordered_dense::set<IR::LocationDescriptor>& locations) {
@ -399,6 +404,8 @@ void EmitX64::InvalidateBasicBlocks(const ankerl::unordered_dense::set<IR::Locat
for (const auto& descriptor : locations) {
if (auto const it = block_descriptors.find(descriptor); it != block_descriptors.end()) {
Unpatch(descriptor);
tracy_jit::JitCodeMap::UnregisterBlock(it->second.entrypoint);
block_descriptors.erase(it);
}
}

Loading…
Cancel
Save