Browse Source
Merge pull request #11225 from FernandoS27/no-laxatives-in-santas-cookies
Merge pull request #11225 from FernandoS27/no-laxatives-in-santas-cookies
Y.F.C: Rework the Query Cache.pull/15/merge
committed by
GitHub
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
45 changed files with 3553 additions and 366 deletions
-
10src/common/settings.cpp
-
2src/common/settings.h
-
6src/video_core/CMakeLists.txt
-
18src/video_core/buffer_cache/buffer_cache.h
-
12src/video_core/buffer_cache/buffer_cache_base.h
-
2src/video_core/control/channel_state_cache.h
-
1src/video_core/engines/draw_manager.h
-
74src/video_core/engines/maxwell_3d.cpp
-
3src/video_core/engines/maxwell_3d.h
-
12src/video_core/engines/maxwell_dma.cpp
-
13src/video_core/engines/puller.cpp
-
21src/video_core/fence_manager.h
-
4src/video_core/gpu.cpp
-
6src/video_core/host_shaders/CMakeLists.txt
-
173src/video_core/host_shaders/queries_prefix_scan_sum.comp
-
138src/video_core/host_shaders/queries_prefix_scan_sum_nosubgroups.comp
-
20src/video_core/host_shaders/resolve_conditional_render.comp
-
49src/video_core/macro/macro_hle.cpp
-
13src/video_core/query_cache.h
-
104src/video_core/query_cache/bank_base.h
-
70src/video_core/query_cache/query_base.h
-
580src/video_core/query_cache/query_cache.h
-
181src/video_core/query_cache/query_cache_base.h
-
149src/video_core/query_cache/query_stream.h
-
74src/video_core/query_cache/types.h
-
13src/video_core/rasterizer_interface.h
-
18src/video_core/renderer_null/null_rasterizer.cpp
-
7src/video_core/renderer_null/null_rasterizer.h
-
2src/video_core/renderer_opengl/gl_query_cache.cpp
-
2src/video_core/renderer_opengl/gl_query_cache.h
-
40src/video_core/renderer_opengl/gl_rasterizer.cpp
-
7src/video_core/renderer_opengl/gl_rasterizer.h
-
3src/video_core/renderer_vulkan/vk_buffer_cache.cpp
-
181src/video_core/renderer_vulkan/vk_compute_pass.cpp
-
31src/video_core/renderer_vulkan/vk_compute_pass.h
-
2src/video_core/renderer_vulkan/vk_fence_manager.h
-
1593src/video_core/renderer_vulkan/vk_query_cache.cpp
-
106src/video_core/renderer_vulkan/vk_query_cache.h
-
107src/video_core/renderer_vulkan/vk_rasterizer.cpp
-
14src/video_core/renderer_vulkan/vk_rasterizer.h
-
9src/video_core/renderer_vulkan/vk_scheduler.cpp
-
12src/video_core/renderer_vulkan/vk_scheduler.h
-
6src/video_core/vulkan_common/vulkan_device.h
-
4src/video_core/vulkan_common/vulkan_wrapper.cpp
-
27src/video_core/vulkan_common/vulkan_wrapper.h
@ -0,0 +1,173 @@ |
|||||
|
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project |
||||
|
// SPDX-License-Identifier: GPL-3.0-or-later |
||||
|
|
||||
|
#version 460 core |
||||
|
|
||||
|
#extension GL_KHR_shader_subgroup_basic : require |
||||
|
#extension GL_KHR_shader_subgroup_shuffle : require |
||||
|
#extension GL_KHR_shader_subgroup_shuffle_relative : require |
||||
|
#extension GL_KHR_shader_subgroup_arithmetic : require |
||||
|
|
||||
|
#ifdef VULKAN |
||||
|
|
||||
|
#define HAS_EXTENDED_TYPES 1 |
||||
|
#define BEGIN_PUSH_CONSTANTS layout(push_constant) uniform PushConstants { |
||||
|
#define END_PUSH_CONSTANTS }; |
||||
|
#define UNIFORM(n) |
||||
|
#define BINDING_INPUT_BUFFER 0 |
||||
|
#define BINDING_OUTPUT_IMAGE 1 |
||||
|
|
||||
|
#else // ^^^ Vulkan ^^^ // vvv OpenGL vvv |
||||
|
|
||||
|
#extension GL_NV_gpu_shader5 : enable |
||||
|
#ifdef GL_NV_gpu_shader5 |
||||
|
#define HAS_EXTENDED_TYPES 1 |
||||
|
#else |
||||
|
#define HAS_EXTENDED_TYPES 0 |
||||
|
#endif |
||||
|
#define BEGIN_PUSH_CONSTANTS |
||||
|
#define END_PUSH_CONSTANTS |
||||
|
#define UNIFORM(n) layout(location = n) uniform |
||||
|
#define BINDING_INPUT_BUFFER 0 |
||||
|
#define BINDING_OUTPUT_IMAGE 0 |
||||
|
|
||||
|
#endif |
||||
|
|
||||
|
BEGIN_PUSH_CONSTANTS |
||||
|
UNIFORM(0) uint min_accumulation_base; |
||||
|
UNIFORM(1) uint max_accumulation_base; |
||||
|
UNIFORM(2) uint accumulation_limit; |
||||
|
UNIFORM(3) uint buffer_offset; |
||||
|
END_PUSH_CONSTANTS |
||||
|
|
||||
|
#define LOCAL_RESULTS 8 |
||||
|
#define QUERIES_PER_INVOC 2048 |
||||
|
|
||||
|
layout(local_size_x = QUERIES_PER_INVOC / LOCAL_RESULTS) in; |
||||
|
|
||||
|
layout(std430, binding = 0) readonly buffer block1 { |
||||
|
uvec2 input_data[]; |
||||
|
}; |
||||
|
|
||||
|
layout(std430, binding = 1) coherent buffer block2 { |
||||
|
uvec2 output_data[]; |
||||
|
}; |
||||
|
|
||||
|
layout(std430, binding = 2) coherent buffer block3 { |
||||
|
uvec2 accumulated_data; |
||||
|
}; |
||||
|
|
||||
|
shared uvec2 shared_data[128]; |
||||
|
|
||||
|
// Simple Uint64 add that uses 2 uint variables for GPUs that don't support uint64 |
||||
|
uvec2 AddUint64(uvec2 value_1, uvec2 value_2) { |
||||
|
uint carry = 0; |
||||
|
uvec2 result; |
||||
|
result.x = uaddCarry(value_1.x, value_2.x, carry); |
||||
|
result.y = value_1.y + value_2.y + carry; |
||||
|
return result; |
||||
|
} |
||||
|
|
||||
|
// do subgroup Prefix Sum using Hillis and Steele's algorithm |
||||
|
uvec2 subgroupInclusiveAddUint64(uvec2 value) { |
||||
|
uvec2 result = value; |
||||
|
for (uint i = 1; i < gl_SubgroupSize; i *= 2) { |
||||
|
uvec2 other = subgroupShuffleUp(result, i); // get value from subgroup_inv_id - i; |
||||
|
if (i <= gl_SubgroupInvocationID) { |
||||
|
result = AddUint64(result, other); |
||||
|
} |
||||
|
} |
||||
|
return result; |
||||
|
} |
||||
|
|
||||
|
// Writes down the results to the output buffer and to the accumulation buffer |
||||
|
void WriteResults(uvec2 results[LOCAL_RESULTS]) { |
||||
|
const uint current_id = gl_LocalInvocationID.x; |
||||
|
const uvec2 accum = accumulated_data; |
||||
|
for (uint i = 0; i < LOCAL_RESULTS; i++) { |
||||
|
uvec2 base_data = current_id * LOCAL_RESULTS + i < min_accumulation_base ? accum : uvec2(0, 0); |
||||
|
AddUint64(results[i], base_data); |
||||
|
} |
||||
|
for (uint i = 0; i < LOCAL_RESULTS; i++) { |
||||
|
output_data[buffer_offset + current_id * LOCAL_RESULTS + i] = results[i]; |
||||
|
} |
||||
|
uint index = accumulation_limit % LOCAL_RESULTS; |
||||
|
uint base_id = accumulation_limit / LOCAL_RESULTS; |
||||
|
if (min_accumulation_base >= accumulation_limit + 1) { |
||||
|
if (current_id == base_id) { |
||||
|
accumulated_data = results[index]; |
||||
|
} |
||||
|
return; |
||||
|
} |
||||
|
// We have that ugly case in which the accumulation data is reset in the middle somewhere. |
||||
|
barrier(); |
||||
|
groupMemoryBarrier(); |
||||
|
|
||||
|
if (current_id == base_id) { |
||||
|
uvec2 reset_value = output_data[max_accumulation_base - 1]; |
||||
|
// Calculate two complement / negate manually |
||||
|
reset_value = AddUint64(uvec2(1,0), ~reset_value); |
||||
|
accumulated_data = AddUint64(results[index], reset_value); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
void main() { |
||||
|
const uint subgroup_inv_id = gl_SubgroupInvocationID; |
||||
|
const uint subgroup_id = gl_SubgroupID + gl_WorkGroupID.x * gl_NumSubgroups; |
||||
|
const uint last_subgroup_id = subgroupMax(subgroup_inv_id); |
||||
|
const uint current_id = gl_LocalInvocationID.x; |
||||
|
const uint total_work = accumulation_limit; |
||||
|
const uint last_result_id = LOCAL_RESULTS - 1; |
||||
|
uvec2 data[LOCAL_RESULTS]; |
||||
|
for (uint i = 0; i < LOCAL_RESULTS; i++) { |
||||
|
data[i] = input_data[buffer_offset + current_id * LOCAL_RESULTS + i]; |
||||
|
} |
||||
|
uvec2 results[LOCAL_RESULTS]; |
||||
|
results[0] = data[0]; |
||||
|
for (uint i = 1; i < LOCAL_RESULTS; i++) { |
||||
|
results[i] = AddUint64(data[i], results[i - 1]); |
||||
|
} |
||||
|
// make sure all input data has been loaded |
||||
|
subgroupBarrier(); |
||||
|
subgroupMemoryBarrier(); |
||||
|
|
||||
|
// on the last local result, do a subgroup inclusive scan sum |
||||
|
results[last_result_id] = subgroupInclusiveAddUint64(results[last_result_id]); |
||||
|
// get the last local result from the subgroup behind the current |
||||
|
uvec2 result_behind = subgroupShuffleUp(results[last_result_id], 1); |
||||
|
if (subgroup_inv_id != 0) { |
||||
|
for (uint i = 1; i < LOCAL_RESULTS; i++) { |
||||
|
results[i - 1] = AddUint64(results[i - 1], result_behind); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
// if we had less queries than our subgroup, just write down the results. |
||||
|
if (total_work <= gl_SubgroupSize * LOCAL_RESULTS) { // This condition is constant per dispatch. |
||||
|
WriteResults(results); |
||||
|
return; |
||||
|
} |
||||
|
|
||||
|
// We now have more, so lets write the last result into shared memory. |
||||
|
// Only pick the last subgroup. |
||||
|
if (subgroup_inv_id == last_subgroup_id) { |
||||
|
shared_data[subgroup_id] = results[last_result_id]; |
||||
|
} |
||||
|
// wait until everyone loaded their stuffs |
||||
|
barrier(); |
||||
|
memoryBarrierShared(); |
||||
|
|
||||
|
// only if it's not the first subgroup |
||||
|
if (subgroup_id != 0) { |
||||
|
// get the results from some previous invocation |
||||
|
uvec2 tmp = shared_data[subgroup_inv_id]; |
||||
|
subgroupBarrier(); |
||||
|
subgroupMemoryBarrierShared(); |
||||
|
tmp = subgroupInclusiveAddUint64(tmp); |
||||
|
// obtain the result that would be equivalent to the previous result |
||||
|
uvec2 shuffled_result = subgroupShuffle(tmp, subgroup_id - 1); |
||||
|
for (uint i = 0; i < LOCAL_RESULTS; i++) { |
||||
|
results[i] = AddUint64(results[i], shuffled_result); |
||||
|
} |
||||
|
} |
||||
|
WriteResults(results); |
||||
|
} |
||||
@ -0,0 +1,138 @@ |
|||||
|
// SPDX-FileCopyrightText: Copyright 2015 Graham Sellers, Richard Wright Jr. and Nicholas Haemel |
||||
|
// SPDX-License-Identifier: MIT |
||||
|
|
||||
|
// Code obtained from OpenGL SuperBible, Seventh Edition by Graham Sellers, Richard Wright Jr. and |
||||
|
// Nicholas Haemel. Modified to suit needs. |
||||
|
|
||||
|
#version 460 core |
||||
|
|
||||
|
#ifdef VULKAN |
||||
|
|
||||
|
#define HAS_EXTENDED_TYPES 1 |
||||
|
#define BEGIN_PUSH_CONSTANTS layout(push_constant) uniform PushConstants { |
||||
|
#define END_PUSH_CONSTANTS }; |
||||
|
#define UNIFORM(n) |
||||
|
#define BINDING_INPUT_BUFFER 0 |
||||
|
#define BINDING_OUTPUT_IMAGE 1 |
||||
|
|
||||
|
#else // ^^^ Vulkan ^^^ // vvv OpenGL vvv |
||||
|
|
||||
|
#extension GL_NV_gpu_shader5 : enable |
||||
|
#ifdef GL_NV_gpu_shader5 |
||||
|
#define HAS_EXTENDED_TYPES 1 |
||||
|
#else |
||||
|
#define HAS_EXTENDED_TYPES 0 |
||||
|
#endif |
||||
|
#define BEGIN_PUSH_CONSTANTS |
||||
|
#define END_PUSH_CONSTANTS |
||||
|
#define UNIFORM(n) layout(location = n) uniform |
||||
|
#define BINDING_INPUT_BUFFER 0 |
||||
|
#define BINDING_OUTPUT_IMAGE 0 |
||||
|
|
||||
|
#endif |
||||
|
|
||||
|
BEGIN_PUSH_CONSTANTS |
||||
|
UNIFORM(0) uint min_accumulation_base; |
||||
|
UNIFORM(1) uint max_accumulation_base; |
||||
|
UNIFORM(2) uint accumulation_limit; |
||||
|
UNIFORM(3) uint buffer_offset; |
||||
|
END_PUSH_CONSTANTS |
||||
|
|
||||
|
#define LOCAL_RESULTS 4 |
||||
|
#define QUERIES_PER_INVOC 2048 |
||||
|
|
||||
|
layout(local_size_x = QUERIES_PER_INVOC / LOCAL_RESULTS) in; |
||||
|
|
||||
|
layout(std430, binding = 0) readonly buffer block1 { |
||||
|
uvec2 input_data[gl_WorkGroupSize.x * LOCAL_RESULTS]; |
||||
|
}; |
||||
|
|
||||
|
layout(std430, binding = 1) writeonly coherent buffer block2 { |
||||
|
uvec2 output_data[gl_WorkGroupSize.x * LOCAL_RESULTS]; |
||||
|
}; |
||||
|
|
||||
|
layout(std430, binding = 2) coherent buffer block3 { |
||||
|
uvec2 accumulated_data; |
||||
|
}; |
||||
|
|
||||
|
shared uvec2 shared_data[gl_WorkGroupSize.x * LOCAL_RESULTS]; |
||||
|
|
||||
|
uvec2 AddUint64(uvec2 value_1, uvec2 value_2) { |
||||
|
uint carry = 0; |
||||
|
uvec2 result; |
||||
|
result.x = uaddCarry(value_1.x, value_2.x, carry); |
||||
|
result.y = value_1.y + value_2.y + carry; |
||||
|
return result; |
||||
|
} |
||||
|
|
||||
|
void main(void) { |
||||
|
uint id = gl_LocalInvocationID.x; |
||||
|
uvec2 base_value[LOCAL_RESULTS]; |
||||
|
const uvec2 accum = accumulated_data; |
||||
|
for (uint i = 0; i < LOCAL_RESULTS; i++) { |
||||
|
base_value[i] = (buffer_offset + id * LOCAL_RESULTS + i) < min_accumulation_base |
||||
|
? accumulated_data |
||||
|
: uvec2(0); |
||||
|
} |
||||
|
uint work_size = gl_WorkGroupSize.x; |
||||
|
uint rd_id; |
||||
|
uint wr_id; |
||||
|
uint mask; |
||||
|
uvec2 inputs[LOCAL_RESULTS]; |
||||
|
for (uint i = 0; i < LOCAL_RESULTS; i++) { |
||||
|
inputs[i] = input_data[buffer_offset + id * LOCAL_RESULTS + i]; |
||||
|
} |
||||
|
// The number of steps is the log base 2 of the |
||||
|
// work group size, which should be a power of 2 |
||||
|
const uint steps = uint(log2(work_size)) + uint(log2(LOCAL_RESULTS)); |
||||
|
uint step = 0; |
||||
|
|
||||
|
// Each invocation is responsible for the content of |
||||
|
// two elements of the output array |
||||
|
for (uint i = 0; i < LOCAL_RESULTS; i++) { |
||||
|
shared_data[id * LOCAL_RESULTS + i] = inputs[i]; |
||||
|
} |
||||
|
// Synchronize to make sure that everyone has initialized |
||||
|
// their elements of shared_data[] with data loaded from |
||||
|
// the input arrays |
||||
|
barrier(); |
||||
|
memoryBarrierShared(); |
||||
|
// For each step... |
||||
|
for (step = 0; step < steps; step++) { |
||||
|
// Calculate the read and write index in the |
||||
|
// shared array |
||||
|
mask = (1 << step) - 1; |
||||
|
rd_id = ((id >> step) << (step + 1)) + mask; |
||||
|
wr_id = rd_id + 1 + (id & mask); |
||||
|
// Accumulate the read data into our element |
||||
|
|
||||
|
shared_data[wr_id] = AddUint64(shared_data[rd_id], shared_data[wr_id]); |
||||
|
// Synchronize again to make sure that everyone |
||||
|
// has caught up with us |
||||
|
barrier(); |
||||
|
memoryBarrierShared(); |
||||
|
} |
||||
|
// Add the accumulation |
||||
|
for (uint i = 0; i < LOCAL_RESULTS; i++) { |
||||
|
shared_data[id * LOCAL_RESULTS + i] = |
||||
|
AddUint64(shared_data[id * LOCAL_RESULTS + i], base_value[i]); |
||||
|
} |
||||
|
barrier(); |
||||
|
memoryBarrierShared(); |
||||
|
|
||||
|
// Finally write our data back to the output buffer |
||||
|
for (uint i = 0; i < LOCAL_RESULTS; i++) { |
||||
|
output_data[buffer_offset + id * LOCAL_RESULTS + i] = shared_data[id * LOCAL_RESULTS + i]; |
||||
|
} |
||||
|
if (id == 0) { |
||||
|
if (min_accumulation_base >= accumulation_limit + 1) { |
||||
|
accumulated_data = shared_data[accumulation_limit]; |
||||
|
return; |
||||
|
} |
||||
|
uvec2 reset_value = shared_data[max_accumulation_base - 1]; |
||||
|
uvec2 final_value = shared_data[accumulation_limit]; |
||||
|
// Two complements |
||||
|
reset_value = AddUint64(uvec2(1, 0), ~reset_value); |
||||
|
accumulated_data = AddUint64(final_value, reset_value); |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,20 @@ |
|||||
|
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project |
||||
|
// SPDX-License-Identifier: GPL-3.0-or-later |
||||
|
|
||||
|
#version 450 |
||||
|
|
||||
|
layout(local_size_x = 1) in; |
||||
|
|
||||
|
layout(std430, binding = 0) buffer Query { |
||||
|
uvec2 initial; |
||||
|
uvec2 unknown; |
||||
|
uvec2 current; |
||||
|
}; |
||||
|
|
||||
|
layout(std430, binding = 1) buffer Result { |
||||
|
uint result; |
||||
|
}; |
||||
|
|
||||
|
void main() { |
||||
|
result = all(equal(initial, current)) ? 1 : 0; |
||||
|
} |
||||
@ -0,0 +1,104 @@ |
|||||
|
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project |
||||
|
// SPDX-License-Identifier: GPL-3.0-or-later |
||||
|
|
||||
|
#pragma once |
||||
|
|
||||
|
#include <atomic> |
||||
|
#include <deque> |
||||
|
#include <utility> |
||||
|
|
||||
|
#include "common/common_types.h" |
||||
|
|
||||
|
namespace VideoCommon { |
||||
|
|
||||
|
class BankBase { |
||||
|
protected: |
||||
|
const size_t base_bank_size{}; |
||||
|
size_t bank_size{}; |
||||
|
std::atomic<size_t> references{}; |
||||
|
size_t current_slot{}; |
||||
|
|
||||
|
public: |
||||
|
explicit BankBase(size_t bank_size_) : base_bank_size{bank_size_}, bank_size(bank_size_) {} |
||||
|
|
||||
|
virtual ~BankBase() = default; |
||||
|
|
||||
|
virtual std::pair<bool, size_t> Reserve() { |
||||
|
if (IsClosed()) { |
||||
|
return {false, bank_size}; |
||||
|
} |
||||
|
const size_t result = current_slot++; |
||||
|
return {true, result}; |
||||
|
} |
||||
|
|
||||
|
virtual void Reset() { |
||||
|
current_slot = 0; |
||||
|
references = 0; |
||||
|
bank_size = base_bank_size; |
||||
|
} |
||||
|
|
||||
|
size_t Size() const { |
||||
|
return bank_size; |
||||
|
} |
||||
|
|
||||
|
void AddReference(size_t how_many = 1) { |
||||
|
references.fetch_add(how_many, std::memory_order_relaxed); |
||||
|
} |
||||
|
|
||||
|
void CloseReference(size_t how_many = 1) { |
||||
|
if (how_many > references.load(std::memory_order_relaxed)) { |
||||
|
UNREACHABLE(); |
||||
|
} |
||||
|
references.fetch_sub(how_many, std::memory_order_relaxed); |
||||
|
} |
||||
|
|
||||
|
void Close() { |
||||
|
bank_size = current_slot; |
||||
|
} |
||||
|
|
||||
|
bool IsClosed() const { |
||||
|
return current_slot >= bank_size; |
||||
|
} |
||||
|
|
||||
|
bool IsDead() const { |
||||
|
return IsClosed() && references == 0; |
||||
|
} |
||||
|
}; |
||||
|
|
||||
|
template <typename BankType> |
||||
|
class BankPool { |
||||
|
private: |
||||
|
std::deque<BankType> bank_pool; |
||||
|
std::deque<size_t> bank_indices; |
||||
|
|
||||
|
public: |
||||
|
BankPool() = default; |
||||
|
~BankPool() = default; |
||||
|
|
||||
|
// Reserve a bank from the pool and return its index |
||||
|
template <typename Func> |
||||
|
size_t ReserveBank(Func&& builder) { |
||||
|
if (!bank_indices.empty() && bank_pool[bank_indices.front()].IsDead()) { |
||||
|
size_t new_index = bank_indices.front(); |
||||
|
bank_indices.pop_front(); |
||||
|
bank_pool[new_index].Reset(); |
||||
|
return new_index; |
||||
|
} |
||||
|
size_t new_index = bank_pool.size(); |
||||
|
builder(bank_pool, new_index); |
||||
|
bank_indices.push_back(new_index); |
||||
|
return new_index; |
||||
|
} |
||||
|
|
||||
|
// Get a reference to a bank using its index |
||||
|
BankType& GetBank(size_t index) { |
||||
|
return bank_pool[index]; |
||||
|
} |
||||
|
|
||||
|
// Get the total number of banks in the pool |
||||
|
size_t BankCount() const { |
||||
|
return bank_pool.size(); |
||||
|
} |
||||
|
}; |
||||
|
|
||||
|
} // namespace VideoCommon |
||||
@ -0,0 +1,70 @@ |
|||||
|
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project |
||||
|
// SPDX-License-Identifier: GPL-3.0-or-later |
||||
|
|
||||
|
#pragma once |
||||
|
|
||||
|
#include "common/common_funcs.h" |
||||
|
#include "common/common_types.h" |
||||
|
|
||||
|
namespace VideoCommon { |
||||
|
|
||||
|
enum class QueryFlagBits : u32 { |
||||
|
HasTimestamp = 1 << 0, ///< Indicates if this query has a timestamp. |
||||
|
IsFinalValueSynced = 1 << 1, ///< Indicates if the query has been synced in the host |
||||
|
IsHostSynced = 1 << 2, ///< Indicates if the query has been synced in the host |
||||
|
IsGuestSynced = 1 << 3, ///< Indicates if the query has been synced with the guest. |
||||
|
IsHostManaged = 1 << 4, ///< Indicates if this query points to a host query |
||||
|
IsRewritten = 1 << 5, ///< Indicates if this query was rewritten by another query |
||||
|
IsInvalidated = 1 << 6, ///< Indicates the value of th query has been nullified. |
||||
|
IsOrphan = 1 << 7, ///< Indicates the query has not been set by a guest query. |
||||
|
IsFence = 1 << 8, ///< Indicates the query is a fence. |
||||
|
}; |
||||
|
DECLARE_ENUM_FLAG_OPERATORS(QueryFlagBits) |
||||
|
|
||||
|
class QueryBase { |
||||
|
public: |
||||
|
VAddr guest_address{}; |
||||
|
QueryFlagBits flags{}; |
||||
|
u64 value{}; |
||||
|
|
||||
|
protected: |
||||
|
// Default constructor |
||||
|
QueryBase() = default; |
||||
|
|
||||
|
// Parameterized constructor |
||||
|
QueryBase(VAddr address, QueryFlagBits flags_, u64 value_) |
||||
|
: guest_address(address), flags(flags_), value{value_} {} |
||||
|
}; |
||||
|
|
||||
|
class GuestQuery : public QueryBase { |
||||
|
public: |
||||
|
// Parameterized constructor |
||||
|
GuestQuery(bool isLong, VAddr address, u64 queryValue) |
||||
|
: QueryBase(address, QueryFlagBits::IsFinalValueSynced, queryValue) { |
||||
|
if (isLong) { |
||||
|
flags |= QueryFlagBits::HasTimestamp; |
||||
|
} |
||||
|
} |
||||
|
}; |
||||
|
|
||||
|
class HostQueryBase : public QueryBase { |
||||
|
public: |
||||
|
// Default constructor |
||||
|
HostQueryBase() : QueryBase(0, QueryFlagBits::IsHostManaged | QueryFlagBits::IsOrphan, 0) {} |
||||
|
|
||||
|
// Parameterized constructor |
||||
|
HostQueryBase(bool has_timestamp, VAddr address) |
||||
|
: QueryBase(address, QueryFlagBits::IsHostManaged, 0), start_bank_id{}, size_banks{}, |
||||
|
start_slot{}, size_slots{} { |
||||
|
if (has_timestamp) { |
||||
|
flags |= QueryFlagBits::HasTimestamp; |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
u32 start_bank_id{}; |
||||
|
u32 size_banks{}; |
||||
|
size_t start_slot{}; |
||||
|
size_t size_slots{}; |
||||
|
}; |
||||
|
|
||||
|
} // namespace VideoCommon |
||||
@ -0,0 +1,580 @@ |
|||||
|
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project |
||||
|
// SPDX-License-Identifier: GPL-3.0-or-later |
||||
|
|
||||
|
#pragma once |
||||
|
|
||||
|
#include <array> |
||||
|
#include <deque> |
||||
|
#include <memory> |
||||
|
#include <mutex> |
||||
|
#include <unordered_map> |
||||
|
#include <utility> |
||||
|
|
||||
|
#include "common/assert.h" |
||||
|
#include "common/common_types.h" |
||||
|
#include "common/logging/log.h" |
||||
|
#include "common/scope_exit.h" |
||||
|
#include "common/settings.h" |
||||
|
#include "core/memory.h" |
||||
|
#include "video_core/engines/maxwell_3d.h" |
||||
|
#include "video_core/gpu.h" |
||||
|
#include "video_core/memory_manager.h" |
||||
|
#include "video_core/query_cache/bank_base.h" |
||||
|
#include "video_core/query_cache/query_base.h" |
||||
|
#include "video_core/query_cache/query_cache_base.h" |
||||
|
#include "video_core/query_cache/query_stream.h" |
||||
|
#include "video_core/query_cache/types.h" |
||||
|
|
||||
|
namespace VideoCommon { |
||||
|
|
||||
|
using Maxwell = Tegra::Engines::Maxwell3D; |
||||
|
|
||||
|
struct SyncValuesStruct { |
||||
|
VAddr address; |
||||
|
u64 value; |
||||
|
u64 size; |
||||
|
|
||||
|
static constexpr bool GeneratesBaseBuffer = true; |
||||
|
}; |
||||
|
|
||||
|
template <typename Traits> |
||||
|
class GuestStreamer : public SimpleStreamer<GuestQuery> { |
||||
|
public: |
||||
|
using RuntimeType = typename Traits::RuntimeType; |
||||
|
|
||||
|
GuestStreamer(size_t id_, RuntimeType& runtime_) |
||||
|
: SimpleStreamer<GuestQuery>(id_), runtime{runtime_} {} |
||||
|
|
||||
|
virtual ~GuestStreamer() = default; |
||||
|
|
||||
|
size_t WriteCounter(VAddr address, bool has_timestamp, u32 value, |
||||
|
std::optional<u32> subreport = std::nullopt) override { |
||||
|
auto new_id = BuildQuery(has_timestamp, address, static_cast<u64>(value)); |
||||
|
pending_sync.push_back(new_id); |
||||
|
return new_id; |
||||
|
} |
||||
|
|
||||
|
bool HasPendingSync() const override { |
||||
|
return !pending_sync.empty(); |
||||
|
} |
||||
|
|
||||
|
void SyncWrites() override { |
||||
|
if (pending_sync.empty()) { |
||||
|
return; |
||||
|
} |
||||
|
std::vector<SyncValuesStruct> sync_values; |
||||
|
sync_values.reserve(pending_sync.size()); |
||||
|
for (size_t pending_id : pending_sync) { |
||||
|
auto& query = slot_queries[pending_id]; |
||||
|
if (True(query.flags & QueryFlagBits::IsRewritten) || |
||||
|
True(query.flags & QueryFlagBits::IsInvalidated)) { |
||||
|
continue; |
||||
|
} |
||||
|
query.flags |= QueryFlagBits::IsHostSynced; |
||||
|
sync_values.emplace_back(SyncValuesStruct{ |
||||
|
.address = query.guest_address, |
||||
|
.value = query.value, |
||||
|
.size = static_cast<u64>(True(query.flags & QueryFlagBits::HasTimestamp) ? 8 : 4)}); |
||||
|
} |
||||
|
pending_sync.clear(); |
||||
|
if (sync_values.size() > 0) { |
||||
|
runtime.template SyncValues<SyncValuesStruct>(sync_values); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
private: |
||||
|
RuntimeType& runtime; |
||||
|
std::deque<size_t> pending_sync; |
||||
|
}; |
||||
|
|
||||
|
template <typename Traits> |
||||
|
class StubStreamer : public GuestStreamer<Traits> { |
||||
|
public: |
||||
|
using RuntimeType = typename Traits::RuntimeType; |
||||
|
|
||||
|
StubStreamer(size_t id_, RuntimeType& runtime_, u32 stub_value_) |
||||
|
: GuestStreamer<Traits>(id_, runtime_), stub_value{stub_value_} {} |
||||
|
|
||||
|
~StubStreamer() override = default; |
||||
|
|
||||
|
size_t WriteCounter(VAddr address, bool has_timestamp, [[maybe_unused]] u32 value, |
||||
|
std::optional<u32> subreport = std::nullopt) override { |
||||
|
size_t new_id = |
||||
|
GuestStreamer<Traits>::WriteCounter(address, has_timestamp, stub_value, subreport); |
||||
|
return new_id; |
||||
|
} |
||||
|
|
||||
|
private: |
||||
|
u32 stub_value; |
||||
|
}; |
||||
|
|
||||
|
template <typename Traits> |
||||
|
struct QueryCacheBase<Traits>::QueryCacheBaseImpl { |
||||
|
using RuntimeType = typename Traits::RuntimeType; |
||||
|
|
||||
|
QueryCacheBaseImpl(QueryCacheBase<Traits>* owner_, VideoCore::RasterizerInterface& rasterizer_, |
||||
|
Core::Memory::Memory& cpu_memory_, RuntimeType& runtime_, Tegra::GPU& gpu_) |
||||
|
: owner{owner_}, rasterizer{rasterizer_}, |
||||
|
cpu_memory{cpu_memory_}, runtime{runtime_}, gpu{gpu_} { |
||||
|
streamer_mask = 0; |
||||
|
for (size_t i = 0; i < static_cast<size_t>(QueryType::MaxQueryTypes); i++) { |
||||
|
streamers[i] = runtime.GetStreamerInterface(static_cast<QueryType>(i)); |
||||
|
if (streamers[i]) { |
||||
|
streamer_mask |= 1ULL << streamers[i]->GetId(); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
template <typename Func> |
||||
|
void ForEachStreamerIn(u64 mask, Func&& func) { |
||||
|
static constexpr bool RETURNS_BOOL = |
||||
|
std::is_same_v<std::invoke_result<Func, StreamerInterface*>, bool>; |
||||
|
while (mask != 0) { |
||||
|
size_t position = std::countr_zero(mask); |
||||
|
mask &= ~(1ULL << position); |
||||
|
if constexpr (RETURNS_BOOL) { |
||||
|
if (func(streamers[position])) { |
||||
|
return; |
||||
|
} |
||||
|
} else { |
||||
|
func(streamers[position]); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
template <typename Func> |
||||
|
void ForEachStreamer(Func&& func) { |
||||
|
ForEachStreamerIn(streamer_mask, func); |
||||
|
} |
||||
|
|
||||
|
QueryBase* ObtainQuery(QueryCacheBase<Traits>::QueryLocation location) { |
||||
|
size_t which_stream = location.stream_id.Value(); |
||||
|
auto* streamer = streamers[which_stream]; |
||||
|
if (!streamer) { |
||||
|
return nullptr; |
||||
|
} |
||||
|
return streamer->GetQuery(location.query_id.Value()); |
||||
|
} |
||||
|
|
||||
|
QueryCacheBase<Traits>* owner; |
||||
|
VideoCore::RasterizerInterface& rasterizer; |
||||
|
Core::Memory::Memory& cpu_memory; |
||||
|
RuntimeType& runtime; |
||||
|
Tegra::GPU& gpu; |
||||
|
std::array<StreamerInterface*, static_cast<size_t>(QueryType::MaxQueryTypes)> streamers; |
||||
|
u64 streamer_mask; |
||||
|
std::mutex flush_guard; |
||||
|
std::deque<u64> flushes_pending; |
||||
|
std::vector<QueryCacheBase<Traits>::QueryLocation> pending_unregister; |
||||
|
}; |
||||
|
|
||||
|
template <typename Traits> |
||||
|
QueryCacheBase<Traits>::QueryCacheBase(Tegra::GPU& gpu_, |
||||
|
VideoCore::RasterizerInterface& rasterizer_, |
||||
|
Core::Memory::Memory& cpu_memory_, RuntimeType& runtime_) |
||||
|
: cached_queries{} { |
||||
|
impl = std::make_unique<QueryCacheBase<Traits>::QueryCacheBaseImpl>( |
||||
|
this, rasterizer_, cpu_memory_, runtime_, gpu_); |
||||
|
} |
||||
|
|
||||
|
template <typename Traits> |
||||
|
QueryCacheBase<Traits>::~QueryCacheBase() = default; |
||||
|
|
||||
|
template <typename Traits> |
||||
|
void QueryCacheBase<Traits>::CounterEnable(QueryType counter_type, bool is_enabled) { |
||||
|
size_t index = static_cast<size_t>(counter_type); |
||||
|
StreamerInterface* streamer = impl->streamers[index]; |
||||
|
if (!streamer) [[unlikely]] { |
||||
|
UNREACHABLE(); |
||||
|
return; |
||||
|
} |
||||
|
if (is_enabled) { |
||||
|
streamer->StartCounter(); |
||||
|
} else { |
||||
|
streamer->PauseCounter(); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
template <typename Traits> |
||||
|
void QueryCacheBase<Traits>::CounterClose(QueryType counter_type) { |
||||
|
size_t index = static_cast<size_t>(counter_type); |
||||
|
StreamerInterface* streamer = impl->streamers[index]; |
||||
|
if (!streamer) [[unlikely]] { |
||||
|
UNREACHABLE(); |
||||
|
return; |
||||
|
} |
||||
|
streamer->CloseCounter(); |
||||
|
} |
||||
|
|
||||
|
template <typename Traits> |
||||
|
void QueryCacheBase<Traits>::CounterReset(QueryType counter_type) { |
||||
|
size_t index = static_cast<size_t>(counter_type); |
||||
|
StreamerInterface* streamer = impl->streamers[index]; |
||||
|
if (!streamer) [[unlikely]] { |
||||
|
UNIMPLEMENTED(); |
||||
|
return; |
||||
|
} |
||||
|
streamer->ResetCounter(); |
||||
|
} |
||||
|
|
||||
|
template <typename Traits> |
||||
|
void QueryCacheBase<Traits>::BindToChannel(s32 id) { |
||||
|
VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo>::BindToChannel(id); |
||||
|
impl->runtime.Bind3DEngine(maxwell3d); |
||||
|
} |
||||
|
|
||||
|
template <typename Traits> |
||||
|
void QueryCacheBase<Traits>::CounterReport(GPUVAddr addr, QueryType counter_type, |
||||
|
QueryPropertiesFlags flags, u32 payload, u32 subreport) { |
||||
|
const bool has_timestamp = True(flags & QueryPropertiesFlags::HasTimeout); |
||||
|
const bool is_fence = True(flags & QueryPropertiesFlags::IsAFence); |
||||
|
size_t streamer_id = static_cast<size_t>(counter_type); |
||||
|
auto* streamer = impl->streamers[streamer_id]; |
||||
|
if (streamer == nullptr) [[unlikely]] { |
||||
|
counter_type = QueryType::Payload; |
||||
|
payload = 1U; |
||||
|
streamer_id = static_cast<size_t>(counter_type); |
||||
|
streamer = impl->streamers[streamer_id]; |
||||
|
} |
||||
|
auto cpu_addr_opt = gpu_memory->GpuToCpuAddress(addr); |
||||
|
if (!cpu_addr_opt) [[unlikely]] { |
||||
|
return; |
||||
|
} |
||||
|
VAddr cpu_addr = *cpu_addr_opt; |
||||
|
const size_t new_query_id = streamer->WriteCounter(cpu_addr, has_timestamp, payload, subreport); |
||||
|
auto* query = streamer->GetQuery(new_query_id); |
||||
|
if (is_fence) { |
||||
|
query->flags |= QueryFlagBits::IsFence; |
||||
|
} |
||||
|
QueryLocation query_location{}; |
||||
|
query_location.stream_id.Assign(static_cast<u32>(streamer_id)); |
||||
|
query_location.query_id.Assign(static_cast<u32>(new_query_id)); |
||||
|
const auto gen_caching_indexing = [](VAddr cur_addr) { |
||||
|
return std::make_pair<u64, u32>(cur_addr >> Core::Memory::YUZU_PAGEBITS, |
||||
|
static_cast<u32>(cur_addr & Core::Memory::YUZU_PAGEMASK)); |
||||
|
}; |
||||
|
u8* pointer = impl->cpu_memory.GetPointer(cpu_addr); |
||||
|
u8* pointer_timestamp = impl->cpu_memory.GetPointer(cpu_addr + 8); |
||||
|
bool is_synced = !Settings::IsGPULevelHigh() && is_fence; |
||||
|
|
||||
|
std::function<void()> operation([this, is_synced, streamer, query_base = query, query_location, |
||||
|
pointer, pointer_timestamp] { |
||||
|
if (True(query_base->flags & QueryFlagBits::IsInvalidated)) { |
||||
|
if (!is_synced) [[likely]] { |
||||
|
impl->pending_unregister.push_back(query_location); |
||||
|
} |
||||
|
return; |
||||
|
} |
||||
|
if (False(query_base->flags & QueryFlagBits::IsFinalValueSynced)) [[unlikely]] { |
||||
|
UNREACHABLE(); |
||||
|
return; |
||||
|
} |
||||
|
query_base->value += streamer->GetAmmendValue(); |
||||
|
streamer->SetAccumulationValue(query_base->value); |
||||
|
if (True(query_base->flags & QueryFlagBits::HasTimestamp)) { |
||||
|
u64 timestamp = impl->gpu.GetTicks(); |
||||
|
std::memcpy(pointer_timestamp, ×tamp, sizeof(timestamp)); |
||||
|
std::memcpy(pointer, &query_base->value, sizeof(query_base->value)); |
||||
|
} else { |
||||
|
u32 value = static_cast<u32>(query_base->value); |
||||
|
std::memcpy(pointer, &value, sizeof(value)); |
||||
|
} |
||||
|
if (!is_synced) [[likely]] { |
||||
|
impl->pending_unregister.push_back(query_location); |
||||
|
} |
||||
|
}); |
||||
|
if (is_fence) { |
||||
|
impl->rasterizer.SignalFence(std::move(operation)); |
||||
|
} else { |
||||
|
if (!Settings::IsGPULevelHigh() && counter_type == QueryType::Payload) { |
||||
|
if (has_timestamp) { |
||||
|
u64 timestamp = impl->gpu.GetTicks(); |
||||
|
u64 value = static_cast<u64>(payload); |
||||
|
std::memcpy(pointer_timestamp, ×tamp, sizeof(timestamp)); |
||||
|
std::memcpy(pointer, &value, sizeof(value)); |
||||
|
} else { |
||||
|
std::memcpy(pointer, &payload, sizeof(payload)); |
||||
|
} |
||||
|
streamer->Free(new_query_id); |
||||
|
return; |
||||
|
} |
||||
|
impl->rasterizer.SyncOperation(std::move(operation)); |
||||
|
} |
||||
|
if (is_synced) { |
||||
|
streamer->Free(new_query_id); |
||||
|
return; |
||||
|
} |
||||
|
auto [cont_addr, base] = gen_caching_indexing(cpu_addr); |
||||
|
{ |
||||
|
std::scoped_lock lock(cache_mutex); |
||||
|
auto it1 = cached_queries.try_emplace(cont_addr); |
||||
|
auto& sub_container = it1.first->second; |
||||
|
auto it_current = sub_container.find(base); |
||||
|
if (it_current == sub_container.end()) { |
||||
|
sub_container.insert_or_assign(base, query_location); |
||||
|
return; |
||||
|
} |
||||
|
auto* old_query = impl->ObtainQuery(it_current->second); |
||||
|
old_query->flags |= QueryFlagBits::IsRewritten; |
||||
|
sub_container.insert_or_assign(base, query_location); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
template <typename Traits> |
||||
|
void QueryCacheBase<Traits>::UnregisterPending() { |
||||
|
const auto gen_caching_indexing = [](VAddr cur_addr) { |
||||
|
return std::make_pair<u64, u32>(cur_addr >> Core::Memory::YUZU_PAGEBITS, |
||||
|
static_cast<u32>(cur_addr & Core::Memory::YUZU_PAGEMASK)); |
||||
|
}; |
||||
|
std::scoped_lock lock(cache_mutex); |
||||
|
for (QueryLocation loc : impl->pending_unregister) { |
||||
|
const auto [streamer_id, query_id] = loc.unpack(); |
||||
|
auto* streamer = impl->streamers[streamer_id]; |
||||
|
if (!streamer) [[unlikely]] { |
||||
|
continue; |
||||
|
} |
||||
|
auto* query = streamer->GetQuery(query_id); |
||||
|
auto [cont_addr, base] = gen_caching_indexing(query->guest_address); |
||||
|
auto it1 = cached_queries.find(cont_addr); |
||||
|
if (it1 != cached_queries.end()) { |
||||
|
auto it2 = it1->second.find(base); |
||||
|
if (it2 != it1->second.end()) { |
||||
|
if (it2->second.raw == loc.raw) { |
||||
|
it1->second.erase(it2); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
streamer->Free(query_id); |
||||
|
} |
||||
|
impl->pending_unregister.clear(); |
||||
|
} |
||||
|
|
||||
|
template <typename Traits> |
||||
|
void QueryCacheBase<Traits>::NotifyWFI() { |
||||
|
bool should_sync = false; |
||||
|
impl->ForEachStreamer( |
||||
|
[&should_sync](StreamerInterface* streamer) { should_sync |= streamer->HasPendingSync(); }); |
||||
|
if (!should_sync) { |
||||
|
return; |
||||
|
} |
||||
|
|
||||
|
impl->ForEachStreamer([](StreamerInterface* streamer) { streamer->PresyncWrites(); }); |
||||
|
impl->runtime.Barriers(true); |
||||
|
impl->ForEachStreamer([](StreamerInterface* streamer) { streamer->SyncWrites(); }); |
||||
|
impl->runtime.Barriers(false); |
||||
|
} |
||||
|
|
||||
|
template <typename Traits> |
||||
|
void QueryCacheBase<Traits>::NotifySegment(bool resume) { |
||||
|
if (resume) { |
||||
|
impl->runtime.ResumeHostConditionalRendering(); |
||||
|
} else { |
||||
|
CounterClose(VideoCommon::QueryType::ZPassPixelCount64); |
||||
|
CounterClose(VideoCommon::QueryType::StreamingByteCount); |
||||
|
impl->runtime.PauseHostConditionalRendering(); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
template <typename Traits> |
||||
|
bool QueryCacheBase<Traits>::AccelerateHostConditionalRendering() { |
||||
|
bool qc_dirty = false; |
||||
|
const auto gen_lookup = [this, &qc_dirty](GPUVAddr address) -> VideoCommon::LookupData { |
||||
|
auto cpu_addr_opt = gpu_memory->GpuToCpuAddress(address); |
||||
|
if (!cpu_addr_opt) [[unlikely]] { |
||||
|
return VideoCommon::LookupData{ |
||||
|
.address = 0, |
||||
|
.found_query = nullptr, |
||||
|
}; |
||||
|
} |
||||
|
VAddr cpu_addr = *cpu_addr_opt; |
||||
|
std::scoped_lock lock(cache_mutex); |
||||
|
auto it1 = cached_queries.find(cpu_addr >> Core::Memory::YUZU_PAGEBITS); |
||||
|
if (it1 == cached_queries.end()) { |
||||
|
return VideoCommon::LookupData{ |
||||
|
.address = cpu_addr, |
||||
|
.found_query = nullptr, |
||||
|
}; |
||||
|
} |
||||
|
auto& sub_container = it1->second; |
||||
|
auto it_current = sub_container.find(cpu_addr & Core::Memory::YUZU_PAGEMASK); |
||||
|
|
||||
|
if (it_current == sub_container.end()) { |
||||
|
auto it_current_2 = sub_container.find((cpu_addr & Core::Memory::YUZU_PAGEMASK) + 4); |
||||
|
if (it_current_2 == sub_container.end()) { |
||||
|
return VideoCommon::LookupData{ |
||||
|
.address = cpu_addr, |
||||
|
.found_query = nullptr, |
||||
|
}; |
||||
|
} |
||||
|
} |
||||
|
auto* query = impl->ObtainQuery(it_current->second); |
||||
|
qc_dirty |= True(query->flags & QueryFlagBits::IsHostManaged) && |
||||
|
False(query->flags & QueryFlagBits::IsGuestSynced); |
||||
|
return VideoCommon::LookupData{ |
||||
|
.address = cpu_addr, |
||||
|
.found_query = query, |
||||
|
}; |
||||
|
}; |
||||
|
|
||||
|
auto& regs = maxwell3d->regs; |
||||
|
if (regs.render_enable_override != Maxwell::Regs::RenderEnable::Override::UseRenderEnable) { |
||||
|
impl->runtime.EndHostConditionalRendering(); |
||||
|
return false; |
||||
|
} |
||||
|
const ComparisonMode mode = static_cast<ComparisonMode>(regs.render_enable.mode); |
||||
|
const GPUVAddr address = regs.render_enable.Address(); |
||||
|
switch (mode) { |
||||
|
case ComparisonMode::True: |
||||
|
impl->runtime.EndHostConditionalRendering(); |
||||
|
return false; |
||||
|
case ComparisonMode::False: |
||||
|
impl->runtime.EndHostConditionalRendering(); |
||||
|
return false; |
||||
|
case ComparisonMode::Conditional: { |
||||
|
VideoCommon::LookupData object_1{gen_lookup(address)}; |
||||
|
return impl->runtime.HostConditionalRenderingCompareValue(object_1, qc_dirty); |
||||
|
} |
||||
|
case ComparisonMode::IfEqual: { |
||||
|
VideoCommon::LookupData object_1{gen_lookup(address)}; |
||||
|
VideoCommon::LookupData object_2{gen_lookup(address + 16)}; |
||||
|
return impl->runtime.HostConditionalRenderingCompareValues(object_1, object_2, qc_dirty, |
||||
|
true); |
||||
|
} |
||||
|
case ComparisonMode::IfNotEqual: { |
||||
|
VideoCommon::LookupData object_1{gen_lookup(address)}; |
||||
|
VideoCommon::LookupData object_2{gen_lookup(address + 16)}; |
||||
|
return impl->runtime.HostConditionalRenderingCompareValues(object_1, object_2, qc_dirty, |
||||
|
false); |
||||
|
} |
||||
|
default: |
||||
|
return false; |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
// Async downloads |
||||
|
template <typename Traits> |
||||
|
void QueryCacheBase<Traits>::CommitAsyncFlushes() { |
||||
|
// Make sure to have the results synced in Host. |
||||
|
NotifyWFI(); |
||||
|
|
||||
|
u64 mask{}; |
||||
|
{ |
||||
|
std::scoped_lock lk(impl->flush_guard); |
||||
|
impl->ForEachStreamer([&mask](StreamerInterface* streamer) { |
||||
|
bool local_result = streamer->HasUnsyncedQueries(); |
||||
|
if (local_result) { |
||||
|
mask |= 1ULL << streamer->GetId(); |
||||
|
} |
||||
|
}); |
||||
|
impl->flushes_pending.push_back(mask); |
||||
|
} |
||||
|
std::function<void()> func([this] { UnregisterPending(); }); |
||||
|
impl->rasterizer.SyncOperation(std::move(func)); |
||||
|
if (mask == 0) { |
||||
|
return; |
||||
|
} |
||||
|
u64 ran_mask = ~mask; |
||||
|
while (mask) { |
||||
|
impl->ForEachStreamerIn(mask, [&mask, &ran_mask](StreamerInterface* streamer) { |
||||
|
u64 dep_mask = streamer->GetDependentMask(); |
||||
|
if ((dep_mask & ~ran_mask) != 0) { |
||||
|
return; |
||||
|
} |
||||
|
u64 index = streamer->GetId(); |
||||
|
ran_mask |= (1ULL << index); |
||||
|
mask &= ~(1ULL << index); |
||||
|
streamer->PushUnsyncedQueries(); |
||||
|
}); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
template <typename Traits> |
||||
|
bool QueryCacheBase<Traits>::HasUncommittedFlushes() const { |
||||
|
bool result = false; |
||||
|
impl->ForEachStreamer([&result](StreamerInterface* streamer) { |
||||
|
result |= streamer->HasUnsyncedQueries(); |
||||
|
return result; |
||||
|
}); |
||||
|
return result; |
||||
|
} |
||||
|
|
||||
|
template <typename Traits> |
||||
|
bool QueryCacheBase<Traits>::ShouldWaitAsyncFlushes() { |
||||
|
std::scoped_lock lk(impl->flush_guard); |
||||
|
return !impl->flushes_pending.empty() && impl->flushes_pending.front() != 0ULL; |
||||
|
} |
||||
|
|
||||
|
template <typename Traits> |
||||
|
void QueryCacheBase<Traits>::PopAsyncFlushes() { |
||||
|
u64 mask; |
||||
|
{ |
||||
|
std::scoped_lock lk(impl->flush_guard); |
||||
|
mask = impl->flushes_pending.front(); |
||||
|
impl->flushes_pending.pop_front(); |
||||
|
} |
||||
|
if (mask == 0) { |
||||
|
return; |
||||
|
} |
||||
|
u64 ran_mask = ~mask; |
||||
|
while (mask) { |
||||
|
impl->ForEachStreamerIn(mask, [&mask, &ran_mask](StreamerInterface* streamer) { |
||||
|
u64 dep_mask = streamer->GetDependenceMask(); |
||||
|
if ((dep_mask & ~ran_mask) != 0) { |
||||
|
return; |
||||
|
} |
||||
|
u64 index = streamer->GetId(); |
||||
|
ran_mask |= (1ULL << index); |
||||
|
mask &= ~(1ULL << index); |
||||
|
streamer->PopUnsyncedQueries(); |
||||
|
}); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
// Invalidation |
||||
|
|
||||
|
template <typename Traits> |
||||
|
void QueryCacheBase<Traits>::InvalidateQuery(QueryCacheBase<Traits>::QueryLocation location) { |
||||
|
auto* query_base = impl->ObtainQuery(location); |
||||
|
if (!query_base) { |
||||
|
return; |
||||
|
} |
||||
|
query_base->flags |= QueryFlagBits::IsInvalidated; |
||||
|
} |
||||
|
|
||||
|
template <typename Traits> |
||||
|
bool QueryCacheBase<Traits>::IsQueryDirty(QueryCacheBase<Traits>::QueryLocation location) { |
||||
|
auto* query_base = impl->ObtainQuery(location); |
||||
|
if (!query_base) { |
||||
|
return false; |
||||
|
} |
||||
|
return True(query_base->flags & QueryFlagBits::IsHostManaged) && |
||||
|
False(query_base->flags & QueryFlagBits::IsGuestSynced); |
||||
|
} |
||||
|
|
||||
|
template <typename Traits> |
||||
|
bool QueryCacheBase<Traits>::SemiFlushQueryDirty(QueryCacheBase<Traits>::QueryLocation location) { |
||||
|
auto* query_base = impl->ObtainQuery(location); |
||||
|
if (!query_base) { |
||||
|
return false; |
||||
|
} |
||||
|
if (True(query_base->flags & QueryFlagBits::IsFinalValueSynced) && |
||||
|
False(query_base->flags & QueryFlagBits::IsGuestSynced)) { |
||||
|
auto* ptr = impl->cpu_memory.GetPointer(query_base->guest_address); |
||||
|
if (True(query_base->flags & QueryFlagBits::HasTimestamp)) { |
||||
|
std::memcpy(ptr, &query_base->value, sizeof(query_base->value)); |
||||
|
return false; |
||||
|
} |
||||
|
u32 value_l = static_cast<u32>(query_base->value); |
||||
|
std::memcpy(ptr, &value_l, sizeof(value_l)); |
||||
|
return false; |
||||
|
} |
||||
|
return True(query_base->flags & QueryFlagBits::IsHostManaged) && |
||||
|
False(query_base->flags & QueryFlagBits::IsGuestSynced); |
||||
|
} |
||||
|
|
||||
|
template <typename Traits> |
||||
|
void QueryCacheBase<Traits>::RequestGuestHostSync() { |
||||
|
impl->rasterizer.ReleaseFences(); |
||||
|
} |
||||
|
|
||||
|
} // namespace VideoCommon |
||||
@ -0,0 +1,181 @@ |
|||||
|
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project |
||||
|
// SPDX-License-Identifier: GPL-3.0-or-later |
||||
|
|
||||
|
#pragma once |
||||
|
|
||||
|
#include <functional> |
||||
|
#include <mutex> |
||||
|
#include <optional> |
||||
|
#include <span> |
||||
|
#include <unordered_map> |
||||
|
#include <utility> |
||||
|
|
||||
|
#include "common/assert.h" |
||||
|
#include "common/bit_field.h" |
||||
|
#include "common/common_types.h" |
||||
|
#include "core/memory.h" |
||||
|
#include "video_core/control/channel_state_cache.h" |
||||
|
#include "video_core/query_cache/query_base.h" |
||||
|
#include "video_core/query_cache/types.h" |
||||
|
|
||||
|
namespace Core::Memory { |
||||
|
class Memory; |
||||
|
} |
||||
|
|
||||
|
namespace VideoCore { |
||||
|
class RasterizerInterface; |
||||
|
} |
||||
|
|
||||
|
namespace Tegra { |
||||
|
class GPU; |
||||
|
} |
||||
|
|
||||
|
namespace VideoCommon { |
||||
|
|
||||
|
struct LookupData { |
||||
|
VAddr address; |
||||
|
QueryBase* found_query; |
||||
|
}; |
||||
|
|
||||
|
template <typename Traits> |
||||
|
class QueryCacheBase : public VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> { |
||||
|
using RuntimeType = typename Traits::RuntimeType; |
||||
|
|
||||
|
public: |
||||
|
union QueryLocation { |
||||
|
BitField<27, 5, u32> stream_id; |
||||
|
BitField<0, 27, u32> query_id; |
||||
|
u32 raw; |
||||
|
|
||||
|
std::pair<size_t, size_t> unpack() const { |
||||
|
return {static_cast<size_t>(stream_id.Value()), static_cast<size_t>(query_id.Value())}; |
||||
|
} |
||||
|
}; |
||||
|
|
||||
|
explicit QueryCacheBase(Tegra::GPU& gpu, VideoCore::RasterizerInterface& rasterizer_, |
||||
|
Core::Memory::Memory& cpu_memory_, RuntimeType& runtime_); |
||||
|
|
||||
|
~QueryCacheBase(); |
||||
|
|
||||
|
void InvalidateRegion(VAddr addr, std::size_t size) { |
||||
|
IterateCache<true>(addr, size, |
||||
|
[this](QueryLocation location) { InvalidateQuery(location); }); |
||||
|
} |
||||
|
|
||||
|
void FlushRegion(VAddr addr, std::size_t size) { |
||||
|
bool result = false; |
||||
|
IterateCache<false>(addr, size, [this, &result](QueryLocation location) { |
||||
|
result |= SemiFlushQueryDirty(location); |
||||
|
return result; |
||||
|
}); |
||||
|
if (result) { |
||||
|
RequestGuestHostSync(); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
static u64 BuildMask(std::span<const QueryType> types) { |
||||
|
u64 mask = 0; |
||||
|
for (auto query_type : types) { |
||||
|
mask |= 1ULL << (static_cast<u64>(query_type)); |
||||
|
} |
||||
|
return mask; |
||||
|
} |
||||
|
|
||||
|
/// Return true when a CPU region is modified from the GPU |
||||
|
[[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size) { |
||||
|
bool result = false; |
||||
|
IterateCache<false>(addr, size, [this, &result](QueryLocation location) { |
||||
|
result |= IsQueryDirty(location); |
||||
|
return result; |
||||
|
}); |
||||
|
return result; |
||||
|
} |
||||
|
|
||||
|
void CounterEnable(QueryType counter_type, bool is_enabled); |
||||
|
|
||||
|
void CounterReset(QueryType counter_type); |
||||
|
|
||||
|
void CounterClose(QueryType counter_type); |
||||
|
|
||||
|
void CounterReport(GPUVAddr addr, QueryType counter_type, QueryPropertiesFlags flags, |
||||
|
u32 payload, u32 subreport); |
||||
|
|
||||
|
void NotifyWFI(); |
||||
|
|
||||
|
bool AccelerateHostConditionalRendering(); |
||||
|
|
||||
|
// Async downloads |
||||
|
void CommitAsyncFlushes(); |
||||
|
|
||||
|
bool HasUncommittedFlushes() const; |
||||
|
|
||||
|
bool ShouldWaitAsyncFlushes(); |
||||
|
|
||||
|
void PopAsyncFlushes(); |
||||
|
|
||||
|
void NotifySegment(bool resume); |
||||
|
|
||||
|
void BindToChannel(s32 id) override; |
||||
|
|
||||
|
protected: |
||||
|
template <bool remove_from_cache, typename Func> |
||||
|
void IterateCache(VAddr addr, std::size_t size, Func&& func) { |
||||
|
static constexpr bool RETURNS_BOOL = |
||||
|
std::is_same_v<std::invoke_result<Func, QueryLocation>, bool>; |
||||
|
const u64 addr_begin = addr; |
||||
|
const u64 addr_end = addr_begin + size; |
||||
|
|
||||
|
const u64 page_end = addr_end >> Core::Memory::YUZU_PAGEBITS; |
||||
|
std::scoped_lock lock(cache_mutex); |
||||
|
for (u64 page = addr_begin >> Core::Memory::YUZU_PAGEBITS; page <= page_end; ++page) { |
||||
|
const u64 page_start = page << Core::Memory::YUZU_PAGEBITS; |
||||
|
const auto in_range = [page_start, addr_begin, addr_end](const u32 query_location) { |
||||
|
const u64 cache_begin = page_start + query_location; |
||||
|
const u64 cache_end = cache_begin + sizeof(u32); |
||||
|
return cache_begin < addr_end && addr_begin < cache_end; |
||||
|
}; |
||||
|
const auto& it = cached_queries.find(page); |
||||
|
if (it == std::end(cached_queries)) { |
||||
|
continue; |
||||
|
} |
||||
|
auto& contents = it->second; |
||||
|
for (auto& query : contents) { |
||||
|
if (!in_range(query.first)) { |
||||
|
continue; |
||||
|
} |
||||
|
if constexpr (RETURNS_BOOL) { |
||||
|
if (func(query.second)) { |
||||
|
return; |
||||
|
} |
||||
|
} else { |
||||
|
func(query.second); |
||||
|
} |
||||
|
} |
||||
|
if constexpr (remove_from_cache) { |
||||
|
const auto in_range2 = [&](const std::pair<u32, QueryLocation>& pair) { |
||||
|
return in_range(pair.first); |
||||
|
}; |
||||
|
std::erase_if(contents, in_range2); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
using ContentCache = std::unordered_map<u64, std::unordered_map<u32, QueryLocation>>; |
||||
|
|
||||
|
void InvalidateQuery(QueryLocation location); |
||||
|
bool IsQueryDirty(QueryLocation location); |
||||
|
bool SemiFlushQueryDirty(QueryLocation location); |
||||
|
void RequestGuestHostSync(); |
||||
|
void UnregisterPending(); |
||||
|
|
||||
|
std::unordered_map<u64, std::unordered_map<u32, QueryLocation>> cached_queries; |
||||
|
std::mutex cache_mutex; |
||||
|
|
||||
|
struct QueryCacheBaseImpl; |
||||
|
friend struct QueryCacheBaseImpl; |
||||
|
friend RuntimeType; |
||||
|
|
||||
|
std::unique_ptr<QueryCacheBaseImpl> impl; |
||||
|
}; |
||||
|
|
||||
|
} // namespace VideoCommon |
||||
@ -0,0 +1,149 @@ |
|||||
|
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project |
||||
|
// SPDX-License-Identifier: GPL-3.0-or-later |
||||
|
|
||||
|
#pragma once |
||||
|
|
||||
|
#include <deque> |
||||
|
#include <optional> |
||||
|
#include <vector> |
||||
|
|
||||
|
#include "common/assert.h" |
||||
|
#include "common/common_types.h" |
||||
|
#include "video_core/query_cache/bank_base.h" |
||||
|
#include "video_core/query_cache/query_base.h" |
||||
|
|
||||
|
namespace VideoCommon { |
||||
|
|
||||
|
class StreamerInterface { |
||||
|
public: |
||||
|
explicit StreamerInterface(size_t id_) : id{id_}, dependence_mask{}, dependent_mask{} {} |
||||
|
virtual ~StreamerInterface() = default; |
||||
|
|
||||
|
virtual QueryBase* GetQuery(size_t id) = 0; |
||||
|
|
||||
|
virtual void StartCounter() { |
||||
|
/* Do Nothing */ |
||||
|
} |
||||
|
|
||||
|
virtual void PauseCounter() { |
||||
|
/* Do Nothing */ |
||||
|
} |
||||
|
|
||||
|
virtual void ResetCounter() { |
||||
|
/* Do Nothing */ |
||||
|
} |
||||
|
|
||||
|
virtual void CloseCounter() { |
||||
|
/* Do Nothing */ |
||||
|
} |
||||
|
|
||||
|
virtual bool HasPendingSync() const { |
||||
|
return false; |
||||
|
} |
||||
|
|
||||
|
virtual void PresyncWrites() { |
||||
|
/* Do Nothing */ |
||||
|
} |
||||
|
|
||||
|
virtual void SyncWrites() { |
||||
|
/* Do Nothing */ |
||||
|
} |
||||
|
|
||||
|
virtual size_t WriteCounter(VAddr address, bool has_timestamp, u32 value, |
||||
|
std::optional<u32> subreport = std::nullopt) = 0; |
||||
|
|
||||
|
virtual bool HasUnsyncedQueries() const { |
||||
|
return false; |
||||
|
} |
||||
|
|
||||
|
virtual void PushUnsyncedQueries() { |
||||
|
/* Do Nothing */ |
||||
|
} |
||||
|
|
||||
|
virtual void PopUnsyncedQueries() { |
||||
|
/* Do Nothing */ |
||||
|
} |
||||
|
|
||||
|
virtual void Free(size_t query_id) = 0; |
||||
|
|
||||
|
size_t GetId() const { |
||||
|
return id; |
||||
|
} |
||||
|
|
||||
|
u64 GetDependenceMask() const { |
||||
|
return dependence_mask; |
||||
|
} |
||||
|
|
||||
|
u64 GetDependentMask() const { |
||||
|
return dependence_mask; |
||||
|
} |
||||
|
|
||||
|
u64 GetAmmendValue() const { |
||||
|
return ammend_value; |
||||
|
} |
||||
|
|
||||
|
void SetAccumulationValue(u64 new_value) { |
||||
|
acumulation_value = new_value; |
||||
|
} |
||||
|
|
||||
|
protected: |
||||
|
void MakeDependent(StreamerInterface* depend_on) { |
||||
|
dependence_mask |= 1ULL << depend_on->id; |
||||
|
depend_on->dependent_mask |= 1ULL << id; |
||||
|
} |
||||
|
|
||||
|
const size_t id; |
||||
|
u64 dependence_mask; |
||||
|
u64 dependent_mask; |
||||
|
u64 ammend_value{}; |
||||
|
u64 acumulation_value{}; |
||||
|
}; |
||||
|
|
||||
|
template <typename QueryType> |
||||
|
class SimpleStreamer : public StreamerInterface { |
||||
|
public: |
||||
|
explicit SimpleStreamer(size_t id_) : StreamerInterface{id_} {} |
||||
|
virtual ~SimpleStreamer() = default; |
||||
|
|
||||
|
protected: |
||||
|
virtual QueryType* GetQuery(size_t query_id) override { |
||||
|
if (query_id < slot_queries.size()) { |
||||
|
return &slot_queries[query_id]; |
||||
|
} |
||||
|
return nullptr; |
||||
|
} |
||||
|
|
||||
|
virtual void Free(size_t query_id) override { |
||||
|
std::scoped_lock lk(guard); |
||||
|
ReleaseQuery(query_id); |
||||
|
} |
||||
|
|
||||
|
template <typename... Args, typename = decltype(QueryType(std::declval<Args>()...))> |
||||
|
size_t BuildQuery(Args&&... args) { |
||||
|
std::scoped_lock lk(guard); |
||||
|
if (!old_queries.empty()) { |
||||
|
size_t new_id = old_queries.front(); |
||||
|
old_queries.pop_front(); |
||||
|
new (&slot_queries[new_id]) QueryType(std::forward<Args>(args)...); |
||||
|
return new_id; |
||||
|
} |
||||
|
size_t new_id = slot_queries.size(); |
||||
|
slot_queries.emplace_back(std::forward<Args>(args)...); |
||||
|
return new_id; |
||||
|
} |
||||
|
|
||||
|
void ReleaseQuery(size_t query_id) { |
||||
|
|
||||
|
if (query_id < slot_queries.size()) { |
||||
|
old_queries.push_back(query_id); |
||||
|
return; |
||||
|
} |
||||
|
UNREACHABLE(); |
||||
|
} |
||||
|
|
||||
|
std::mutex guard; |
||||
|
std::deque<QueryType> slot_queries; |
||||
|
std::deque<size_t> old_queries; |
||||
|
}; |
||||
|
|
||||
|
} // namespace VideoCommon |
||||
@ -0,0 +1,74 @@ |
|||||
|
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project |
||||
|
// SPDX-License-Identifier: GPL-3.0-or-later |
||||
|
|
||||
|
#pragma once |
||||
|
|
||||
|
#include "common/common_funcs.h" |
||||
|
#include "common/common_types.h" |
||||
|
|
||||
|
namespace VideoCommon { |
||||
|
|
||||
|
enum class QueryPropertiesFlags : u32 { |
||||
|
HasTimeout = 1 << 0, |
||||
|
IsAFence = 1 << 1, |
||||
|
}; |
||||
|
DECLARE_ENUM_FLAG_OPERATORS(QueryPropertiesFlags) |
||||
|
|
||||
|
// This should always be equivalent to maxwell3d Report Semaphore Reports |
||||
|
enum class QueryType : u32 { |
||||
|
Payload = 0, // "None" in docs, but confirmed via hardware to return the payload |
||||
|
VerticesGenerated = 1, |
||||
|
ZPassPixelCount = 2, |
||||
|
PrimitivesGenerated = 3, |
||||
|
AlphaBetaClocks = 4, |
||||
|
VertexShaderInvocations = 5, |
||||
|
StreamingPrimitivesNeededMinusSucceeded = 6, |
||||
|
GeometryShaderInvocations = 7, |
||||
|
GeometryShaderPrimitivesGenerated = 9, |
||||
|
ZCullStats0 = 10, |
||||
|
StreamingPrimitivesSucceeded = 11, |
||||
|
ZCullStats1 = 12, |
||||
|
StreamingPrimitivesNeeded = 13, |
||||
|
ZCullStats2 = 14, |
||||
|
ClipperInvocations = 15, |
||||
|
ZCullStats3 = 16, |
||||
|
ClipperPrimitivesGenerated = 17, |
||||
|
VtgPrimitivesOut = 18, |
||||
|
PixelShaderInvocations = 19, |
||||
|
ZPassPixelCount64 = 21, |
||||
|
IEEECleanColorTarget = 24, |
||||
|
IEEECleanZetaTarget = 25, |
||||
|
StreamingByteCount = 26, |
||||
|
TessellationInitInvocations = 27, |
||||
|
BoundingRectangle = 28, |
||||
|
TessellationShaderInvocations = 29, |
||||
|
TotalStreamingPrimitivesNeededMinusSucceeded = 30, |
||||
|
TessellationShaderPrimitivesGenerated = 31, |
||||
|
// max. |
||||
|
MaxQueryTypes, |
||||
|
}; |
||||
|
|
||||
|
// Comparison modes for Host Conditional Rendering |
||||
|
enum class ComparisonMode : u32 { |
||||
|
False = 0, |
||||
|
True = 1, |
||||
|
Conditional = 2, |
||||
|
IfEqual = 3, |
||||
|
IfNotEqual = 4, |
||||
|
MaxComparisonMode, |
||||
|
}; |
||||
|
|
||||
|
// Reduction ops. |
||||
|
enum class ReductionOp : u32 { |
||||
|
RedAdd = 0, |
||||
|
RedMin = 1, |
||||
|
RedMax = 2, |
||||
|
RedInc = 3, |
||||
|
RedDec = 4, |
||||
|
RedAnd = 5, |
||||
|
RedOr = 6, |
||||
|
RedXor = 7, |
||||
|
MaxReductionOp, |
||||
|
}; |
||||
|
|
||||
|
} // namespace VideoCommon |
||||
1593
src/video_core/renderer_vulkan/vk_query_cache.cpp
File diff suppressed because it is too large
View File
File diff suppressed because it is too large
View File
@ -1,101 +1,75 @@ |
|||||
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project |
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later |
|
||||
|
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project |
||||
|
// SPDX-License-Identifier: GPL-3.0-or-later |
||||
|
|
||||
#pragma once |
#pragma once |
||||
|
|
||||
#include <cstddef> |
|
||||
#include <memory> |
#include <memory> |
||||
#include <utility> |
|
||||
#include <vector> |
|
||||
|
|
||||
#include "common/common_types.h" |
|
||||
#include "video_core/query_cache.h" |
|
||||
#include "video_core/renderer_vulkan/vk_resource_pool.h" |
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h" |
|
||||
|
#include "video_core/query_cache/query_cache_base.h" |
||||
|
#include "video_core/renderer_vulkan/vk_buffer_cache.h" |
||||
|
|
||||
namespace VideoCore { |
namespace VideoCore { |
||||
class RasterizerInterface; |
class RasterizerInterface; |
||||
} |
} |
||||
|
|
||||
|
namespace VideoCommon { |
||||
|
class StreamerInterface; |
||||
|
} |
||||
|
|
||||
namespace Vulkan { |
namespace Vulkan { |
||||
|
|
||||
class CachedQuery; |
|
||||
class Device; |
class Device; |
||||
class HostCounter; |
|
||||
class QueryCache; |
|
||||
class Scheduler; |
class Scheduler; |
||||
|
class StagingBufferPool; |
||||
|
|
||||
using CounterStream = VideoCommon::CounterStreamBase<QueryCache, HostCounter>; |
|
||||
|
struct QueryCacheRuntimeImpl; |
||||
|
|
||||
class QueryPool final : public ResourcePool { |
|
||||
|
class QueryCacheRuntime { |
||||
public: |
public: |
||||
explicit QueryPool(const Device& device, Scheduler& scheduler, VideoCore::QueryType type); |
|
||||
~QueryPool() override; |
|
||||
|
explicit QueryCacheRuntime(VideoCore::RasterizerInterface* rasterizer, |
||||
|
Core::Memory::Memory& cpu_memory_, |
||||
|
Vulkan::BufferCache& buffer_cache_, const Device& device_, |
||||
|
const MemoryAllocator& memory_allocator_, Scheduler& scheduler_, |
||||
|
StagingBufferPool& staging_pool_, |
||||
|
ComputePassDescriptorQueue& compute_pass_descriptor_queue, |
||||
|
DescriptorPool& descriptor_pool); |
||||
|
~QueryCacheRuntime(); |
||||
|
|
||||
std::pair<VkQueryPool, u32> Commit(); |
|
||||
|
template <typename SyncValuesType> |
||||
|
void SyncValues(std::span<SyncValuesType> values, VkBuffer base_src_buffer = nullptr); |
||||
|
|
||||
void Reserve(std::pair<VkQueryPool, u32> query); |
|
||||
|
void Barriers(bool is_prebarrier); |
||||
|
|
||||
protected: |
|
||||
void Allocate(std::size_t begin, std::size_t end) override; |
|
||||
|
void EndHostConditionalRendering(); |
||||
|
|
||||
private: |
|
||||
static constexpr std::size_t GROW_STEP = 512; |
|
||||
|
void PauseHostConditionalRendering(); |
||||
|
|
||||
const Device& device; |
|
||||
const VideoCore::QueryType type; |
|
||||
|
void ResumeHostConditionalRendering(); |
||||
|
|
||||
std::vector<vk::QueryPool> pools; |
|
||||
std::vector<bool> usage; |
|
||||
}; |
|
||||
|
bool HostConditionalRenderingCompareValue(VideoCommon::LookupData object_1, bool qc_dirty); |
||||
|
|
||||
class QueryCache final |
|
||||
: public VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream, HostCounter> { |
|
||||
public: |
|
||||
explicit QueryCache(VideoCore::RasterizerInterface& rasterizer_, |
|
||||
Core::Memory::Memory& cpu_memory_, const Device& device_, |
|
||||
Scheduler& scheduler_); |
|
||||
~QueryCache(); |
|
||||
|
|
||||
std::pair<VkQueryPool, u32> AllocateQuery(VideoCore::QueryType type); |
|
||||
|
bool HostConditionalRenderingCompareValues(VideoCommon::LookupData object_1, |
||||
|
VideoCommon::LookupData object_2, bool qc_dirty, |
||||
|
bool equal_check); |
||||
|
|
||||
void Reserve(VideoCore::QueryType type, std::pair<VkQueryPool, u32> query); |
|
||||
|
VideoCommon::StreamerInterface* GetStreamerInterface(VideoCommon::QueryType query_type); |
||||
|
|
||||
const Device& GetDevice() const noexcept { |
|
||||
return device; |
|
||||
} |
|
||||
|
void Bind3DEngine(Tegra::Engines::Maxwell3D* maxwell3d); |
||||
|
|
||||
Scheduler& GetScheduler() const noexcept { |
|
||||
return scheduler; |
|
||||
} |
|
||||
|
template <typename Func> |
||||
|
void View3DRegs(Func&& func); |
||||
|
|
||||
private: |
private: |
||||
const Device& device; |
|
||||
Scheduler& scheduler; |
|
||||
std::array<QueryPool, VideoCore::NumQueryTypes> query_pools; |
|
||||
|
void HostConditionalRenderingCompareValueImpl(VideoCommon::LookupData object, bool is_equal); |
||||
|
void HostConditionalRenderingCompareBCImpl(VAddr address, bool is_equal); |
||||
|
friend struct QueryCacheRuntimeImpl; |
||||
|
std::unique_ptr<QueryCacheRuntimeImpl> impl; |
||||
}; |
}; |
||||
|
|
||||
class HostCounter final : public VideoCommon::HostCounterBase<QueryCache, HostCounter> { |
|
||||
public: |
|
||||
explicit HostCounter(QueryCache& cache_, std::shared_ptr<HostCounter> dependency_, |
|
||||
VideoCore::QueryType type_); |
|
||||
~HostCounter(); |
|
||||
|
|
||||
void EndQuery(); |
|
||||
|
|
||||
private: |
|
||||
u64 BlockingQuery(bool async = false) const override; |
|
||||
|
|
||||
QueryCache& cache; |
|
||||
const VideoCore::QueryType type; |
|
||||
const std::pair<VkQueryPool, u32> query; |
|
||||
const u64 tick; |
|
||||
|
struct QueryCacheParams { |
||||
|
using RuntimeType = typename Vulkan::QueryCacheRuntime; |
||||
}; |
}; |
||||
|
|
||||
class CachedQuery : public VideoCommon::CachedQueryBase<HostCounter> { |
|
||||
public: |
|
||||
explicit CachedQuery(QueryCache&, VideoCore::QueryType, VAddr cpu_addr_, u8* host_ptr_) |
|
||||
: CachedQueryBase{cpu_addr_, host_ptr_} {} |
|
||||
}; |
|
||||
|
using QueryCache = VideoCommon::QueryCacheBase<QueryCacheParams>; |
||||
|
|
||||
} // namespace Vulkan |
} // namespace Vulkan |
||||
Write
Preview
Loading…
Cancel
Save
Reference in new issue