7 changed files with 1107 additions and 0 deletions
-
6src/video_core/CMakeLists.txt
-
106src/video_core/query_cache/bank_base.h
-
72src/video_core/query_cache/query_base.h
-
543src/video_core/query_cache/query_cache.h
-
181src/video_core/query_cache/query_cache_base.h
-
125src/video_core/query_cache/query_stream.h
-
74src/video_core/query_cache/types.h
@ -0,0 +1,106 @@ |
|||
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project |
|||
// SPDX-License-Identifier: GPL-3.0-or-later |
|||
|
|||
#pragma once |
|||
|
|||
#include <atomic> |
|||
#include <deque> |
|||
#include <utility> |
|||
|
|||
|
|||
#include "common/common_types.h" |
|||
|
|||
namespace VideoCommon { |
|||
|
|||
class BankBase { |
|||
protected: |
|||
const size_t base_bank_size; |
|||
size_t bank_size; |
|||
std::atomic<size_t> references; |
|||
size_t current_slot; |
|||
|
|||
public: |
|||
BankBase(size_t bank_size_) |
|||
: base_bank_size{bank_size_}, bank_size(bank_size_), references(0), current_slot(0) {} |
|||
|
|||
virtual ~BankBase() = default; |
|||
|
|||
virtual std::pair<bool, size_t> Reserve() { |
|||
if (IsClosed()) { |
|||
return {false, bank_size}; |
|||
} |
|||
const size_t result = current_slot++; |
|||
return {true, result}; |
|||
} |
|||
|
|||
virtual void Reset() { |
|||
current_slot = 0; |
|||
references = 0; |
|||
bank_size = base_bank_size; |
|||
} |
|||
|
|||
size_t Size() const { |
|||
return bank_size; |
|||
} |
|||
|
|||
void AddReference(size_t how_many = 1) { |
|||
references.fetch_add(how_many, std::memory_order_relaxed); |
|||
} |
|||
|
|||
void CloseReference(size_t how_many = 1) { |
|||
if (how_many > references.load(std::memory_order_relaxed)) { |
|||
UNREACHABLE(); |
|||
} |
|||
references.fetch_sub(how_many, std::memory_order_relaxed); |
|||
} |
|||
|
|||
void Close() { |
|||
bank_size = current_slot; |
|||
} |
|||
|
|||
constexpr bool IsClosed() { |
|||
return current_slot >= bank_size; |
|||
} |
|||
|
|||
bool IsDead() { |
|||
return IsClosed() && references == 0; |
|||
} |
|||
}; |
|||
|
|||
template <typename BankType> |
|||
class BankPool { |
|||
private: |
|||
std::deque<BankType> bank_pool; |
|||
std::deque<size_t> bank_indices; |
|||
|
|||
public: |
|||
BankPool() = default; |
|||
~BankPool() = default; |
|||
|
|||
// Reserve a bank from the pool and return its index |
|||
template <typename Func> |
|||
size_t ReserveBank(Func&& builder) { |
|||
if (!bank_indices.empty() && bank_pool[bank_indices.front()].IsDead()) { |
|||
size_t new_index = bank_indices.front(); |
|||
bank_indices.pop_front(); |
|||
bank_pool[new_index].Reset(); |
|||
return new_index; |
|||
} |
|||
size_t new_index = bank_pool.size(); |
|||
builder(bank_pool, new_index); |
|||
bank_indices.push_back(new_index); |
|||
return new_index; |
|||
} |
|||
|
|||
// Get a reference to a bank using its index |
|||
BankType& GetBank(size_t index) { |
|||
return bank_pool[index]; |
|||
} |
|||
|
|||
// Get the total number of banks in the pool |
|||
size_t BankCount() const { |
|||
return bank_pool.size(); |
|||
} |
|||
}; |
|||
|
|||
} // namespace VideoCommon |
|||
@ -0,0 +1,72 @@ |
|||
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project |
|||
// SPDX-License-Identifier: GPL-3.0-or-later |
|||
|
|||
#pragma once |
|||
|
|||
#include "common/common_funcs.h" |
|||
#include "common/common_types.h" |
|||
|
|||
namespace VideoCommon { |
|||
|
|||
enum class QueryFlagBits : u32 { |
|||
HasTimestamp = 1 << 0, ///< Indicates if this query has a tiemstamp. |
|||
IsFinalValueSynced = 1 << 1, ///< Indicates if the query has been synced in the host |
|||
IsHostSynced = 1 << 2, ///< Indicates if the query has been synced in the host |
|||
IsGuestSynced = 1 << 3, ///< Indicates if the query has been synced with the guest. |
|||
IsHostManaged = 1 << 4, ///< Indicates if this query points to a host query |
|||
IsRewritten = 1 << 5, ///< Indicates if this query was rewritten by another query |
|||
IsInvalidated = 1 << 6, ///< Indicates the value of th query has been nullified. |
|||
IsOrphan = 1 << 7, ///< Indicates the query has not been set by a guest query. |
|||
IsFence = 1 << 8, ///< Indicates the query is a fence. |
|||
}; |
|||
DECLARE_ENUM_FLAG_OPERATORS(QueryFlagBits) |
|||
|
|||
class QueryBase { |
|||
public: |
|||
VAddr guest_address; |
|||
QueryFlagBits flags; |
|||
u64 value; |
|||
|
|||
protected: |
|||
// Default constructor |
|||
QueryBase() : guest_address(0), flags{}, value{} {} |
|||
|
|||
// Parameterized constructor |
|||
QueryBase(VAddr address, QueryFlagBits flags_, u64 value_) |
|||
: guest_address(address), flags(flags_), value{value_} {} |
|||
}; |
|||
|
|||
class GuestQuery : public QueryBase { |
|||
public: |
|||
// Parameterized constructor |
|||
GuestQuery(bool isLong, VAddr address, u64 queryValue) |
|||
: QueryBase(address, QueryFlagBits::IsFinalValueSynced, queryValue) { |
|||
if (isLong) { |
|||
flags |= QueryFlagBits::HasTimestamp; |
|||
} |
|||
} |
|||
}; |
|||
|
|||
class HostQueryBase : public QueryBase { |
|||
public: |
|||
// Default constructor |
|||
HostQueryBase() |
|||
: QueryBase(0, QueryFlagBits::IsHostManaged | QueryFlagBits::IsOrphan, 0), start_bank_id{}, |
|||
size_banks{}, start_slot{}, size_slots{} {} |
|||
|
|||
// Parameterized constructor |
|||
HostQueryBase(bool isLong, VAddr address) |
|||
: QueryBase(address, QueryFlagBits::IsHostManaged, 0), start_bank_id{}, size_banks{}, |
|||
start_slot{}, size_slots{} { |
|||
if (isLong) { |
|||
flags |= QueryFlagBits::HasTimestamp; |
|||
} |
|||
} |
|||
|
|||
u32 start_bank_id; |
|||
u32 size_banks; |
|||
size_t start_slot; |
|||
size_t size_slots; |
|||
}; |
|||
|
|||
} // namespace VideoCommon |
|||
@ -0,0 +1,543 @@ |
|||
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project |
|||
// SPDX-License-Identifier: GPL-3.0-or-later |
|||
|
|||
#pragma once |
|||
|
|||
#include <array> |
|||
#include <deque> |
|||
#include <memory> |
|||
#include <mutex> |
|||
#include <unordered_map> |
|||
#include <utility> |
|||
|
|||
#include "common/assert.h" |
|||
#include "common/common_types.h" |
|||
#include "common/logging/log.h" |
|||
#include "common/scope_exit.h" |
|||
#include "common/settings.h" |
|||
#include "core/memory.h" |
|||
#include "video_core/engines/maxwell_3d.h" |
|||
#include "video_core/gpu.h" |
|||
#include "video_core/memory_manager.h" |
|||
#include "video_core/query_cache/bank_base.h" |
|||
#include "video_core/query_cache/query_base.h" |
|||
#include "video_core/query_cache/query_cache_base.h" |
|||
#include "video_core/query_cache/query_stream.h" |
|||
#include "video_core/query_cache/types.h" |
|||
|
|||
namespace VideoCommon { |
|||
|
|||
using Maxwell = Tegra::Engines::Maxwell3D; |
|||
|
|||
struct SyncValuesStruct { |
|||
VAddr address; |
|||
u64 value; |
|||
u64 size; |
|||
|
|||
static constexpr bool GeneratesBaseBuffer = true; |
|||
}; |
|||
|
|||
template <typename Traits> |
|||
class GuestStreamer : public SimpleStreamer<GuestQuery> { |
|||
public: |
|||
using RuntimeType = typename Traits::RuntimeType; |
|||
|
|||
GuestStreamer(size_t id_, RuntimeType& runtime_) |
|||
: SimpleStreamer<GuestQuery>(id_), runtime{runtime_} {} |
|||
|
|||
virtual ~GuestStreamer() = default; |
|||
|
|||
size_t WriteCounter(VAddr address, bool has_timestamp, u32 value, |
|||
std::optional<u32> subreport = std::nullopt) override { |
|||
auto new_id = BuildQuery(has_timestamp, address, static_cast<u64>(value)); |
|||
pending_sync.push_back(new_id); |
|||
return new_id; |
|||
} |
|||
|
|||
bool HasPendingSync() override { |
|||
return !pending_sync.empty(); |
|||
} |
|||
|
|||
void SyncWrites() override { |
|||
if (pending_sync.empty()) { |
|||
return; |
|||
} |
|||
std::vector<SyncValuesStruct> sync_values; |
|||
sync_values.reserve(pending_sync.size()); |
|||
for (size_t pending_id : pending_sync) { |
|||
auto& query = slot_queries[pending_id]; |
|||
if (True(query.flags & QueryFlagBits::IsRewritten) || |
|||
True(query.flags & QueryFlagBits::IsInvalidated)) { |
|||
continue; |
|||
} |
|||
query.flags |= QueryFlagBits::IsHostSynced; |
|||
sync_values.emplace_back(query.guest_address, query.value, |
|||
True(query.flags & QueryFlagBits::HasTimestamp) ? 8 : 4); |
|||
} |
|||
pending_sync.clear(); |
|||
if (sync_values.size() > 0) { |
|||
runtime.template SyncValues<SyncValuesStruct>(sync_values); |
|||
} |
|||
} |
|||
|
|||
private: |
|||
RuntimeType& runtime; |
|||
std::deque<size_t> pending_sync; |
|||
}; |
|||
|
|||
template <typename Traits> |
|||
class StubStreamer : public GuestStreamer<Traits> { |
|||
public: |
|||
using RuntimeType = typename Traits::RuntimeType; |
|||
|
|||
StubStreamer(size_t id_, RuntimeType& runtime_) : GuestStreamer<Traits>(id_, runtime_) {} |
|||
|
|||
~StubStreamer() override = default; |
|||
|
|||
size_t WriteCounter(VAddr address, bool has_timestamp, [[maybe_unused]] u32 value, |
|||
std::optional<u32> subreport = std::nullopt) override { |
|||
size_t new_id = GuestStreamer<Traits>::WriteCounter(address, has_timestamp, 1U, subreport); |
|||
return new_id; |
|||
} |
|||
}; |
|||
|
|||
template <typename Traits> |
|||
struct QueryCacheBase<Traits>::QueryCacheBaseImpl { |
|||
using RuntimeType = typename Traits::RuntimeType; |
|||
|
|||
QueryCacheBaseImpl(QueryCacheBase<Traits>* owner_, VideoCore::RasterizerInterface& rasterizer_, |
|||
Core::Memory::Memory& cpu_memory_, RuntimeType& runtime_, Tegra::GPU& gpu_) |
|||
: owner{owner_}, rasterizer{rasterizer_}, |
|||
cpu_memory{cpu_memory_}, runtime{runtime_}, gpu{gpu_} { |
|||
streamer_mask = 0; |
|||
for (size_t i = 0; i < static_cast<size_t>(QueryType::MaxQueryTypes); i++) { |
|||
streamers[i] = runtime.GetStreamerInterface(static_cast<QueryType>(i)); |
|||
if (streamers[i]) { |
|||
streamer_mask |= 1ULL << i; |
|||
} |
|||
} |
|||
} |
|||
|
|||
template <typename Func> |
|||
void ForEachStreamerIn(u64 mask, Func&& func) { |
|||
static constexpr bool RETURNS_BOOL = |
|||
std::is_same_v<std::invoke_result<Func, StreamerInterface*>, bool>; |
|||
while (mask != 0) { |
|||
size_t position = std::countr_zero(mask); |
|||
mask &= ~(1ULL << position); |
|||
if constexpr (RETURNS_BOOL) { |
|||
if (func(streamers[position])) { |
|||
return; |
|||
} |
|||
} else { |
|||
func(streamers[position]); |
|||
} |
|||
} |
|||
} |
|||
|
|||
template <typename Func> |
|||
void ForEachStreamer(Func&& func) { |
|||
ForEachStreamerIn(streamer_mask, func); |
|||
} |
|||
|
|||
QueryBase* ObtainQuery(QueryCacheBase<Traits>::QueryLocation location) { |
|||
size_t which_stream = location.stream_id.Value(); |
|||
auto* streamer = streamers[which_stream]; |
|||
if (!streamer) { |
|||
return nullptr; |
|||
} |
|||
return streamer->GetQuery(location.query_id.Value()); |
|||
} |
|||
|
|||
QueryCacheBase<Traits>* owner; |
|||
VideoCore::RasterizerInterface& rasterizer; |
|||
Core::Memory::Memory& cpu_memory; |
|||
Traits::RuntimeType& runtime; |
|||
Tegra::GPU& gpu; |
|||
std::array<StreamerInterface*, static_cast<size_t>(QueryType::MaxQueryTypes)> streamers; |
|||
u64 streamer_mask; |
|||
std::mutex flush_guard; |
|||
std::deque<u64> flushes_pending; |
|||
std::vector<QueryCacheBase<Traits>::QueryLocation> pending_unregister; |
|||
}; |
|||
|
|||
template <typename Traits> |
|||
QueryCacheBase<Traits>::QueryCacheBase(Tegra::GPU& gpu_, |
|||
VideoCore::RasterizerInterface& rasterizer_, |
|||
Core::Memory::Memory& cpu_memory_, RuntimeType& runtime_) |
|||
: cached_queries{} { |
|||
impl = std::make_unique<QueryCacheBase<Traits>::QueryCacheBaseImpl>( |
|||
this, rasterizer_, cpu_memory_, runtime_, gpu_); |
|||
} |
|||
|
|||
template <typename Traits> |
|||
QueryCacheBase<Traits>::~QueryCacheBase() = default; |
|||
|
|||
template <typename Traits> |
|||
void QueryCacheBase<Traits>::CounterEnable(QueryType counter_type, bool is_enabled) { |
|||
size_t index = static_cast<size_t>(counter_type); |
|||
StreamerInterface* streamer = impl->streamers[index]; |
|||
if (!streamer) [[unlikely]] { |
|||
UNREACHABLE(); |
|||
return; |
|||
} |
|||
if (is_enabled) { |
|||
streamer->StartCounter(); |
|||
} else { |
|||
streamer->PauseCounter(); |
|||
} |
|||
} |
|||
|
|||
template <typename Traits> |
|||
void QueryCacheBase<Traits>::CounterClose(QueryType counter_type) { |
|||
size_t index = static_cast<size_t>(counter_type); |
|||
StreamerInterface* streamer = impl->streamers[index]; |
|||
if (!streamer) [[unlikely]] { |
|||
UNREACHABLE(); |
|||
return; |
|||
} |
|||
streamer->CloseCounter(); |
|||
} |
|||
|
|||
template <typename Traits> |
|||
void QueryCacheBase<Traits>::CounterReset(QueryType counter_type) { |
|||
size_t index = static_cast<size_t>(counter_type); |
|||
StreamerInterface* streamer = impl->streamers[index]; |
|||
if (!streamer) [[unlikely]] { |
|||
UNIMPLEMENTED(); |
|||
return; |
|||
} |
|||
streamer->ResetCounter(); |
|||
} |
|||
|
|||
template <typename Traits> |
|||
void QueryCacheBase<Traits>::BindToChannel(s32 id) { |
|||
VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo>::BindToChannel(id); |
|||
impl->runtime.Bind3DEngine(maxwell3d); |
|||
} |
|||
|
|||
template <typename Traits> |
|||
void QueryCacheBase<Traits>::CounterReport(GPUVAddr addr, QueryType counter_type, |
|||
QueryPropertiesFlags flags, u32 payload, u32 subreport) { |
|||
const bool has_timestamp = True(flags & QueryPropertiesFlags::HasTimeout); |
|||
const bool is_fence = True(flags & QueryPropertiesFlags::IsAFence); |
|||
size_t streamer_id = static_cast<size_t>(counter_type); |
|||
auto* streamer = impl->streamers[streamer_id]; |
|||
if (!streamer) [[unlikely]] { |
|||
if (has_timestamp) { |
|||
u64 timestamp = impl->gpu.GetTicks(); |
|||
gpu_memory->Write<u64>(addr + 8, timestamp); |
|||
gpu_memory->Write<u64>(addr, 1ULL); |
|||
} else { |
|||
gpu_memory->Write<u32>(addr, 1U); |
|||
} |
|||
return; |
|||
} |
|||
auto cpu_addr_opt = gpu_memory->GpuToCpuAddress(addr); |
|||
if (!cpu_addr_opt) [[unlikely]] { |
|||
return; |
|||
} |
|||
VAddr cpu_addr = *cpu_addr_opt; |
|||
const size_t new_query_id = streamer->WriteCounter(cpu_addr, has_timestamp, payload, subreport); |
|||
auto* query = streamer->GetQuery(new_query_id); |
|||
if (is_fence) { |
|||
query->flags |= QueryFlagBits::IsFence; |
|||
} |
|||
QueryLocation query_location{}; |
|||
query_location.stream_id.Assign(static_cast<u32>(streamer_id)); |
|||
query_location.query_id.Assign(static_cast<u32>(new_query_id)); |
|||
const auto gen_caching_indexing = [](VAddr cur_addr) { |
|||
return std::make_pair<u64, u32>(cur_addr >> Core::Memory::YUZU_PAGEBITS, |
|||
static_cast<u32>(cur_addr & Core::Memory::YUZU_PAGEMASK)); |
|||
}; |
|||
u8* pointer = impl->cpu_memory.GetPointer(cpu_addr); |
|||
u8* pointer_timestamp = impl->cpu_memory.GetPointer(cpu_addr + 8); |
|||
bool is_synced = !Settings::IsGPULevelHigh() && is_fence; |
|||
std::function<void()> operation( |
|||
[this, is_synced, query_base = query, query_location, pointer, pointer_timestamp] { |
|||
if (True(query_base->flags & QueryFlagBits::IsInvalidated)) { |
|||
if (!is_synced) [[likely]] { |
|||
impl->pending_unregister.push_back(query_location); |
|||
} |
|||
return; |
|||
} |
|||
if (False(query_base->flags & QueryFlagBits::IsFinalValueSynced)) [[unlikely]] { |
|||
UNREACHABLE(); |
|||
return; |
|||
} |
|||
if (True(query_base->flags & QueryFlagBits::HasTimestamp)) { |
|||
u64 timestamp = impl->gpu.GetTicks(); |
|||
std::memcpy(pointer_timestamp, ×tamp, sizeof(timestamp)); |
|||
std::memcpy(pointer, &query_base->value, sizeof(query_base->value)); |
|||
} else { |
|||
u32 value = static_cast<u32>(query_base->value); |
|||
std::memcpy(pointer, &value, sizeof(value)); |
|||
} |
|||
if (!is_synced) [[likely]] { |
|||
impl->pending_unregister.push_back(query_location); |
|||
} |
|||
}); |
|||
if (is_fence) { |
|||
impl->rasterizer.SignalFence(std::move(operation)); |
|||
} else { |
|||
impl->rasterizer.SyncOperation(std::move(operation)); |
|||
} |
|||
if (is_synced) { |
|||
streamer->Free(new_query_id); |
|||
return; |
|||
} |
|||
auto [cont_addr, base] = gen_caching_indexing(cpu_addr); |
|||
{ |
|||
std::scoped_lock lock(cache_mutex); |
|||
auto it1 = cached_queries.try_emplace(cont_addr); |
|||
auto& sub_container = it1.first->second; |
|||
auto it_current = sub_container.find(base); |
|||
if (it_current == sub_container.end()) { |
|||
sub_container.insert_or_assign(base, query_location); |
|||
return; |
|||
} |
|||
auto* old_query = impl->ObtainQuery(it_current->second); |
|||
old_query->flags |= QueryFlagBits::IsRewritten; |
|||
sub_container.insert_or_assign(base, query_location); |
|||
} |
|||
} |
|||
|
|||
template <typename Traits> |
|||
void QueryCacheBase<Traits>::UnregisterPending() { |
|||
const auto gen_caching_indexing = [](VAddr cur_addr) { |
|||
return std::make_pair<u64, u32>(cur_addr >> Core::Memory::YUZU_PAGEBITS, |
|||
static_cast<u32>(cur_addr & Core::Memory::YUZU_PAGEMASK)); |
|||
}; |
|||
std::scoped_lock lock(cache_mutex); |
|||
for (QueryLocation loc : impl->pending_unregister) { |
|||
const auto [streamer_id, query_id] = loc.unpack(); |
|||
auto* streamer = impl->streamers[streamer_id]; |
|||
if (!streamer) [[unlikely]] { |
|||
continue; |
|||
} |
|||
auto* query = streamer->GetQuery(query_id); |
|||
auto [cont_addr, base] = gen_caching_indexing(query->guest_address); |
|||
auto it1 = cached_queries.find(cont_addr); |
|||
if (it1 != cached_queries.end()) { |
|||
auto it2 = it1->second.find(base); |
|||
if (it2 != it1->second.end()) { |
|||
if (it2->second.raw == loc.raw) { |
|||
it1->second.erase(it2); |
|||
} |
|||
} |
|||
} |
|||
streamer->Free(query_id); |
|||
} |
|||
impl->pending_unregister.clear(); |
|||
} |
|||
|
|||
template <typename Traits> |
|||
void QueryCacheBase<Traits>::NotifyWFI() { |
|||
bool should_sync = false; |
|||
impl->ForEachStreamer( |
|||
[&should_sync](StreamerInterface* streamer) { should_sync |= streamer->HasPendingSync(); }); |
|||
if (!should_sync) { |
|||
return; |
|||
} |
|||
|
|||
impl->ForEachStreamer([](StreamerInterface* streamer) { streamer->PresyncWrites(); }); |
|||
impl->runtime.Barriers(true); |
|||
impl->ForEachStreamer([](StreamerInterface* streamer) { streamer->SyncWrites(); }); |
|||
impl->runtime.Barriers(false); |
|||
} |
|||
|
|||
template <typename Traits> |
|||
void QueryCacheBase<Traits>::NotifySegment(bool resume) { |
|||
if (resume) { |
|||
impl->runtime.ResumeHostConditionalRendering(); |
|||
} else { |
|||
impl->runtime.PauseHostConditionalRendering(); |
|||
CounterClose(VideoCommon::QueryType::ZPassPixelCount64); |
|||
CounterClose(VideoCommon::QueryType::StreamingByteCount); |
|||
} |
|||
} |
|||
|
|||
template <typename Traits> |
|||
bool QueryCacheBase<Traits>::AccelerateHostConditionalRendering() { |
|||
bool qc_dirty = false; |
|||
const auto gen_lookup = [this, &qc_dirty](GPUVAddr address) -> VideoCommon::LookupData { |
|||
auto cpu_addr_opt = gpu_memory->GpuToCpuAddress(address); |
|||
if (!cpu_addr_opt) [[unlikely]] { |
|||
return VideoCommon::LookupData{ |
|||
.address = 0, |
|||
.found_query = nullptr, |
|||
}; |
|||
} |
|||
VAddr cpu_addr = *cpu_addr_opt; |
|||
std::scoped_lock lock(cache_mutex); |
|||
auto it1 = cached_queries.find(cpu_addr >> Core::Memory::YUZU_PAGEBITS); |
|||
if (it1 == cached_queries.end()) { |
|||
return VideoCommon::LookupData{ |
|||
.address = cpu_addr, |
|||
.found_query = nullptr, |
|||
}; |
|||
} |
|||
auto& sub_container = it1->second; |
|||
auto it_current = sub_container.find(cpu_addr & Core::Memory::YUZU_PAGEMASK); |
|||
|
|||
if (it_current == sub_container.end()) { |
|||
auto it_current_2 = sub_container.find((cpu_addr & Core::Memory::YUZU_PAGEMASK) + 4); |
|||
if (it_current_2 == sub_container.end()) { |
|||
return VideoCommon::LookupData{ |
|||
.address = cpu_addr, |
|||
.found_query = nullptr, |
|||
}; |
|||
} |
|||
} |
|||
auto* query = impl->ObtainQuery(it_current->second); |
|||
qc_dirty |= True(query->flags & QueryFlagBits::IsHostManaged) && |
|||
False(query->flags & QueryFlagBits::IsGuestSynced); |
|||
return VideoCommon::LookupData{ |
|||
.address = cpu_addr, |
|||
.found_query = query, |
|||
}; |
|||
}; |
|||
|
|||
auto& regs = maxwell3d->regs; |
|||
if (regs.render_enable_override != Maxwell::Regs::RenderEnable::Override::UseRenderEnable) { |
|||
impl->runtime.EndHostConditionalRendering(); |
|||
return false; |
|||
} |
|||
/*if (!Settings::IsGPULevelHigh()) { |
|||
impl->runtime.EndHostConditionalRendering(); |
|||
return gpu_memory->IsMemoryDirty(regs.render_enable.Address(), 24, |
|||
VideoCommon::CacheType::BufferCache | |
|||
VideoCommon::CacheType::QueryCache); |
|||
}*/ |
|||
const ComparisonMode mode = static_cast<ComparisonMode>(regs.render_enable.mode); |
|||
const GPUVAddr address = regs.render_enable.Address(); |
|||
switch (mode) { |
|||
case ComparisonMode::True: |
|||
impl->runtime.EndHostConditionalRendering(); |
|||
return false; |
|||
case ComparisonMode::False: |
|||
impl->runtime.EndHostConditionalRendering(); |
|||
return false; |
|||
case ComparisonMode::Conditional: { |
|||
VideoCommon::LookupData object_1{gen_lookup(address)}; |
|||
return impl->runtime.HostConditionalRenderingCompareValue(object_1, qc_dirty); |
|||
} |
|||
case ComparisonMode::IfEqual: { |
|||
VideoCommon::LookupData object_1{gen_lookup(address)}; |
|||
VideoCommon::LookupData object_2{gen_lookup(address + 16)}; |
|||
return impl->runtime.HostConditionalRenderingCompareValues(object_1, object_2, qc_dirty, |
|||
true); |
|||
} |
|||
case ComparisonMode::IfNotEqual: { |
|||
VideoCommon::LookupData object_1{gen_lookup(address)}; |
|||
VideoCommon::LookupData object_2{gen_lookup(address + 16)}; |
|||
return impl->runtime.HostConditionalRenderingCompareValues(object_1, object_2, qc_dirty, |
|||
false); |
|||
} |
|||
default: |
|||
return false; |
|||
} |
|||
} |
|||
|
|||
// Async downloads |
|||
template <typename Traits> |
|||
void QueryCacheBase<Traits>::CommitAsyncFlushes() { |
|||
u64 mask{}; |
|||
{ |
|||
std::scoped_lock lk(impl->flush_guard); |
|||
impl->ForEachStreamer([&mask](StreamerInterface* streamer) { |
|||
bool local_result = streamer->HasUnsyncedQueries(); |
|||
if (local_result) { |
|||
mask |= 1ULL << streamer->GetId(); |
|||
} |
|||
}); |
|||
impl->flushes_pending.push_back(mask); |
|||
} |
|||
std::function<void()> func([this] { UnregisterPending(); }); |
|||
impl->rasterizer.SyncOperation(std::move(func)); |
|||
if (mask == 0) { |
|||
return; |
|||
} |
|||
impl->ForEachStreamerIn(mask, |
|||
[](StreamerInterface* streamer) { streamer->PushUnsyncedQueries(); }); |
|||
} |
|||
|
|||
template <typename Traits> |
|||
bool QueryCacheBase<Traits>::HasUncommittedFlushes() const { |
|||
bool result = false; |
|||
impl->ForEachStreamer([&result](StreamerInterface* streamer) { |
|||
result |= streamer->HasUnsyncedQueries(); |
|||
return result; |
|||
}); |
|||
return result; |
|||
} |
|||
|
|||
template <typename Traits> |
|||
bool QueryCacheBase<Traits>::ShouldWaitAsyncFlushes() { |
|||
std::scoped_lock lk(impl->flush_guard); |
|||
return !impl->flushes_pending.empty() && impl->flushes_pending.front() != 0ULL; |
|||
} |
|||
|
|||
template <typename Traits> |
|||
void QueryCacheBase<Traits>::PopAsyncFlushes() { |
|||
u64 mask; |
|||
{ |
|||
std::scoped_lock lk(impl->flush_guard); |
|||
mask = impl->flushes_pending.front(); |
|||
impl->flushes_pending.pop_front(); |
|||
} |
|||
if (mask == 0) { |
|||
return; |
|||
} |
|||
impl->ForEachStreamerIn(mask, |
|||
[](StreamerInterface* streamer) { streamer->PopUnsyncedQueries(); }); |
|||
} |
|||
|
|||
// Invalidation |
|||
|
|||
template <typename Traits> |
|||
void QueryCacheBase<Traits>::InvalidateQuery(QueryCacheBase<Traits>::QueryLocation location) { |
|||
auto* query_base = impl->ObtainQuery(location); |
|||
if (!query_base) { |
|||
return; |
|||
} |
|||
query_base->flags |= QueryFlagBits::IsInvalidated; |
|||
} |
|||
|
|||
template <typename Traits> |
|||
bool QueryCacheBase<Traits>::IsQueryDirty(QueryCacheBase<Traits>::QueryLocation location) { |
|||
auto* query_base = impl->ObtainQuery(location); |
|||
if (!query_base) { |
|||
return false; |
|||
} |
|||
return True(query_base->flags & QueryFlagBits::IsHostManaged) && |
|||
False(query_base->flags & QueryFlagBits::IsGuestSynced); |
|||
} |
|||
|
|||
template <typename Traits> |
|||
bool QueryCacheBase<Traits>::SemiFlushQueryDirty(QueryCacheBase<Traits>::QueryLocation location) { |
|||
auto* query_base = impl->ObtainQuery(location); |
|||
if (!query_base) { |
|||
return false; |
|||
} |
|||
if (True(query_base->flags & QueryFlagBits::IsFinalValueSynced) && |
|||
False(query_base->flags & QueryFlagBits::IsGuestSynced)) { |
|||
auto* ptr = impl->cpu_memory.GetPointer(query_base->guest_address); |
|||
if (True(query_base->flags & QueryFlagBits::HasTimestamp)) { |
|||
std::memcpy(ptr, &query_base->value, sizeof(query_base->value)); |
|||
return false; |
|||
} |
|||
u32 value_l = static_cast<u32>(query_base->value); |
|||
std::memcpy(ptr, &value_l, sizeof(value_l)); |
|||
return false; |
|||
} |
|||
return True(query_base->flags & QueryFlagBits::IsHostManaged) && |
|||
False(query_base->flags & QueryFlagBits::IsGuestSynced); |
|||
} |
|||
|
|||
template <typename Traits> |
|||
void QueryCacheBase<Traits>::RequestGuestHostSync() { |
|||
impl->rasterizer.ReleaseFences(); |
|||
} |
|||
|
|||
} // namespace VideoCommon |
|||
@ -0,0 +1,181 @@ |
|||
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project |
|||
// SPDX-License-Identifier: GPL-3.0-or-later |
|||
|
|||
#pragma once |
|||
|
|||
#include <functional> |
|||
#include <mutex> |
|||
#include <optional> |
|||
#include <span> |
|||
#include <unordered_map> |
|||
#include <utility> |
|||
|
|||
#include "common/assert.h" |
|||
#include "common/bit_field.h" |
|||
#include "common/common_types.h" |
|||
#include "core/memory.h" |
|||
#include "video_core/control/channel_state_cache.h" |
|||
#include "video_core/query_cache/query_base.h" |
|||
#include "video_core/query_cache/types.h" |
|||
|
|||
namespace Core::Memory { |
|||
class Memory; |
|||
} |
|||
|
|||
namespace VideoCore { |
|||
class RasterizerInterface; |
|||
} |
|||
|
|||
namespace Tegra { |
|||
class GPU; |
|||
} |
|||
|
|||
namespace VideoCommon { |
|||
|
|||
struct LookupData { |
|||
VAddr address; |
|||
QueryBase* found_query; |
|||
}; |
|||
|
|||
template <typename Traits> |
|||
class QueryCacheBase : public VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> { |
|||
using RuntimeType = typename Traits::RuntimeType; |
|||
|
|||
public: |
|||
union QueryLocation { |
|||
BitField<27, 5, u32> stream_id; |
|||
BitField<0, 27, u32> query_id; |
|||
u32 raw; |
|||
|
|||
std::pair<size_t, size_t> unpack() { |
|||
return {static_cast<size_t>(stream_id.Value()), static_cast<size_t>(query_id.Value())}; |
|||
} |
|||
}; |
|||
|
|||
explicit QueryCacheBase(Tegra::GPU& gpu, VideoCore::RasterizerInterface& rasterizer_, |
|||
Core::Memory::Memory& cpu_memory_, RuntimeType& runtime_); |
|||
|
|||
~QueryCacheBase(); |
|||
|
|||
void InvalidateRegion(VAddr addr, std::size_t size) { |
|||
IterateCache<true>(addr, size, |
|||
[this](QueryLocation location) { InvalidateQuery(location); }); |
|||
} |
|||
|
|||
void FlushRegion(VAddr addr, std::size_t size) { |
|||
bool result = false; |
|||
IterateCache<false>(addr, size, [this, &result](QueryLocation location) { |
|||
result |= SemiFlushQueryDirty(location); |
|||
return result; |
|||
}); |
|||
if (result) { |
|||
RequestGuestHostSync(); |
|||
} |
|||
} |
|||
|
|||
static u64 BuildMask(std::span<QueryType> types) { |
|||
u64 mask = 0; |
|||
for (auto query_type : types) { |
|||
mask |= 1ULL << (static_cast<u64>(query_type)); |
|||
} |
|||
return mask; |
|||
} |
|||
|
|||
/// Return true when a CPU region is modified from the GPU |
|||
[[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size) { |
|||
bool result = false; |
|||
IterateCache<false>(addr, size, [this, &result](QueryLocation location) { |
|||
result |= IsQueryDirty(location); |
|||
return result; |
|||
}); |
|||
return result; |
|||
} |
|||
|
|||
void CounterEnable(QueryType counter_type, bool is_enabled); |
|||
|
|||
void CounterReset(QueryType counter_type); |
|||
|
|||
void CounterClose(QueryType counter_type); |
|||
|
|||
void CounterReport(GPUVAddr addr, QueryType counter_type, QueryPropertiesFlags flags, |
|||
u32 payload, u32 subreport); |
|||
|
|||
void NotifyWFI(); |
|||
|
|||
bool AccelerateHostConditionalRendering(); |
|||
|
|||
// Async downloads |
|||
void CommitAsyncFlushes(); |
|||
|
|||
bool HasUncommittedFlushes() const; |
|||
|
|||
bool ShouldWaitAsyncFlushes(); |
|||
|
|||
void PopAsyncFlushes(); |
|||
|
|||
void NotifySegment(bool resume); |
|||
|
|||
void BindToChannel(s32 id) override; |
|||
|
|||
protected: |
|||
template <bool remove_from_cache, typename Func> |
|||
void IterateCache(VAddr addr, std::size_t size, Func&& func) { |
|||
static constexpr bool RETURNS_BOOL = |
|||
std::is_same_v<std::invoke_result<Func, QueryLocation>, bool>; |
|||
const u64 addr_begin = addr; |
|||
const u64 addr_end = addr_begin + size; |
|||
|
|||
const u64 page_end = addr_end >> Core::Memory::YUZU_PAGEBITS; |
|||
std::scoped_lock lock(cache_mutex); |
|||
for (u64 page = addr_begin >> Core::Memory::YUZU_PAGEBITS; page <= page_end; ++page) { |
|||
const u64 page_start = page << Core::Memory::YUZU_PAGEBITS; |
|||
const auto in_range = [page_start, addr_begin, addr_end](const u32 query_location) { |
|||
const u64 cache_begin = page_start + query_location; |
|||
const u64 cache_end = cache_begin + sizeof(u32); |
|||
return cache_begin < addr_end && addr_begin < cache_end; |
|||
}; |
|||
const auto& it = cached_queries.find(page); |
|||
if (it == std::end(cached_queries)) { |
|||
continue; |
|||
} |
|||
auto& contents = it->second; |
|||
for (auto& query : contents) { |
|||
if (!in_range(query.first)) { |
|||
continue; |
|||
} |
|||
if constexpr (RETURNS_BOOL) { |
|||
if (func(query.second)) { |
|||
return; |
|||
} |
|||
} else { |
|||
func(query.second); |
|||
} |
|||
} |
|||
if constexpr (remove_from_cache) { |
|||
const auto in_range2 = [&](const std::pair<u32, QueryLocation>& pair) { |
|||
return in_range(pair.first); |
|||
}; |
|||
std::erase_if(contents, in_range2); |
|||
} |
|||
} |
|||
} |
|||
|
|||
using ContentCache = typename std::unordered_map<u64, std::unordered_map<u32, QueryLocation>>; |
|||
|
|||
void InvalidateQuery(QueryLocation location); |
|||
bool IsQueryDirty(QueryLocation location); |
|||
bool SemiFlushQueryDirty(QueryLocation location); |
|||
void RequestGuestHostSync(); |
|||
void UnregisterPending(); |
|||
|
|||
std::unordered_map<u64, std::unordered_map<u32, QueryLocation>> cached_queries; |
|||
std::mutex cache_mutex; |
|||
|
|||
struct QueryCacheBaseImpl; |
|||
friend struct QueryCacheBaseImpl; |
|||
friend RuntimeType; |
|||
|
|||
std::unique_ptr<struct QueryCacheBaseImpl> impl; |
|||
}; |
|||
|
|||
} // namespace VideoCommon |
|||
@ -0,0 +1,125 @@ |
|||
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project |
|||
// SPDX-License-Identifier: GPL-3.0-or-later |
|||
|
|||
#pragma once |
|||
|
|||
#include <deque> |
|||
#include <optional> |
|||
#include <vector> |
|||
|
|||
#include "common/assert.h" |
|||
#include "common/common_types.h" |
|||
#include "video_core/query_cache/bank_base.h" |
|||
#include "video_core/query_cache/query_base.h" |
|||
|
|||
namespace VideoCommon { |
|||
|
|||
class StreamerInterface { |
|||
public: |
|||
StreamerInterface(size_t id_, u64 dependance_mask_ = 0) : id{id_}, dependance_mask{dependance_mask_} {} |
|||
virtual ~StreamerInterface() = default; |
|||
|
|||
virtual QueryBase* GetQuery(size_t id) = 0; |
|||
|
|||
virtual void StartCounter() { |
|||
/* Do Nothing */ |
|||
} |
|||
|
|||
virtual void PauseCounter() { |
|||
/* Do Nothing */ |
|||
} |
|||
|
|||
virtual void ResetCounter() { |
|||
/* Do Nothing */ |
|||
} |
|||
|
|||
virtual void CloseCounter() { |
|||
/* Do Nothing */ |
|||
} |
|||
|
|||
virtual bool HasPendingSync() { |
|||
return false; |
|||
} |
|||
|
|||
virtual void PresyncWrites() { |
|||
/* Do Nothing */ |
|||
} |
|||
|
|||
virtual void SyncWrites() { |
|||
/* Do Nothing */ |
|||
} |
|||
|
|||
virtual size_t WriteCounter(VAddr address, bool has_timestamp, u32 value, |
|||
std::optional<u32> subreport = std::nullopt) = 0; |
|||
|
|||
virtual bool HasUnsyncedQueries() { |
|||
return false; |
|||
} |
|||
|
|||
virtual void PushUnsyncedQueries() { |
|||
/* Do Nothing */ |
|||
} |
|||
|
|||
virtual void PopUnsyncedQueries() { |
|||
/* Do Nothing */ |
|||
} |
|||
|
|||
virtual void Free(size_t query_id) = 0; |
|||
|
|||
size_t GetId() const { |
|||
return id; |
|||
} |
|||
|
|||
protected: |
|||
const size_t id; |
|||
const u64 dependance_mask; |
|||
}; |
|||
|
|||
template <typename QueryType> |
|||
class SimpleStreamer : public StreamerInterface { |
|||
public: |
|||
SimpleStreamer(size_t id_) : StreamerInterface{id_} {} |
|||
virtual ~SimpleStreamer() = default; |
|||
|
|||
protected: |
|||
virtual QueryType* GetQuery(size_t query_id) override { |
|||
if (query_id < slot_queries.size()) { |
|||
return &slot_queries[query_id]; |
|||
} |
|||
return nullptr; |
|||
} |
|||
|
|||
virtual void Free(size_t query_id) override { |
|||
std::scoped_lock lk(guard); |
|||
ReleaseQuery(query_id); |
|||
} |
|||
|
|||
template <typename... Args, typename = decltype(QueryType(std::declval<Args>()...))> |
|||
size_t BuildQuery(Args&&... args) { |
|||
std::scoped_lock lk(guard); |
|||
if (!old_queries.empty()) { |
|||
size_t new_id = old_queries.front(); |
|||
old_queries.pop_front(); |
|||
new (&slot_queries[new_id]) QueryType(std::forward<Args>(args)...); |
|||
return new_id; |
|||
} |
|||
size_t new_id = slot_queries.size(); |
|||
slot_queries.emplace_back(std::forward<Args>(args)...); |
|||
return new_id; |
|||
} |
|||
|
|||
void ReleaseQuery(size_t query_id) { |
|||
|
|||
if (query_id < slot_queries.size()) { |
|||
old_queries.push_back(query_id); |
|||
return; |
|||
} |
|||
UNREACHABLE(); |
|||
} |
|||
|
|||
std::mutex guard; |
|||
std::deque<QueryType> slot_queries; |
|||
std::deque<size_t> old_queries; |
|||
}; |
|||
|
|||
} // namespace VideoCommon |
|||
@ -0,0 +1,74 @@ |
|||
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project |
|||
// SPDX-License-Identifier: GPL-3.0-or-later |
|||
|
|||
#pragma once |
|||
|
|||
#include "common/common_funcs.h" |
|||
#include "common/common_types.h" |
|||
|
|||
namespace VideoCommon { |
|||
|
|||
enum class QueryPropertiesFlags : u32 { |
|||
HasTimeout = 1 << 0, |
|||
IsAFence = 1 << 1, |
|||
}; |
|||
DECLARE_ENUM_FLAG_OPERATORS(QueryPropertiesFlags) |
|||
|
|||
// This should always be equivalent to maxwell3d Report Semaphore Reports |
|||
enum class QueryType : u32 { |
|||
Payload = 0, // "None" in docs, but confirmed via hardware to return the payload |
|||
VerticesGenerated = 1, |
|||
ZPassPixelCount = 2, |
|||
PrimitivesGenerated = 3, |
|||
AlphaBetaClocks = 4, |
|||
VertexShaderInvocations = 5, |
|||
StreamingPrimitivesNeededMinusSucceeded = 6, |
|||
GeometryShaderInvocations = 7, |
|||
GeometryShaderPrimitivesGenerated = 9, |
|||
ZCullStats0 = 10, |
|||
StreamingPrimitivesSucceeded = 11, |
|||
ZCullStats1 = 12, |
|||
StreamingPrimitivesNeeded = 13, |
|||
ZCullStats2 = 14, |
|||
ClipperInvocations = 15, |
|||
ZCullStats3 = 16, |
|||
ClipperPrimitivesGenerated = 17, |
|||
VtgPrimitivesOut = 18, |
|||
PixelShaderInvocations = 19, |
|||
ZPassPixelCount64 = 21, |
|||
IEEECleanColorTarget = 24, |
|||
IEEECleanZetaTarget = 25, |
|||
StreamingByteCount = 26, |
|||
TessellationInitInvocations = 27, |
|||
BoundingRectangle = 28, |
|||
TessellationShaderInvocations = 29, |
|||
TotalStreamingPrimitivesNeededMinusSucceeded = 30, |
|||
TessellationShaderPrimitivesGenerated = 31, |
|||
// max. |
|||
MaxQueryTypes, |
|||
}; |
|||
|
|||
// Comparison modes for Host Conditional Rendering |
|||
enum class ComparisonMode : u32 { |
|||
False = 0, |
|||
True = 1, |
|||
Conditional = 2, |
|||
IfEqual = 3, |
|||
IfNotEqual = 4, |
|||
MaxComparisonMode, |
|||
}; |
|||
|
|||
// Reduction ops. |
|||
enum class ReductionOp : u32 { |
|||
RedAdd = 0, |
|||
RedMin = 1, |
|||
RedMax = 2, |
|||
RedInc = 3, |
|||
RedDec = 4, |
|||
RedAnd = 5, |
|||
RedOr = 6, |
|||
RedXor = 7, |
|||
MaxReductionOp, |
|||
}; |
|||
|
|||
} // namespace VideoCommon |
|||
Write
Preview
Loading…
Cancel
Save
Reference in new issue