Browse Source
[video_core] Implement GPU-accelerated texture unswizzling and optimize sparse texture handling (#3246)
[video_core] Implement GPU-accelerated texture unswizzling and optimize sparse texture handling (#3246)
- [Added] a new compute shader to handle block-linear unswizzling on the GPU, reducing CPU overhead during texture uploads - [Implemented] BlockLinearUnswizzle3DPass to take advantage of the new compute shader, unimplemented for OpenGL - [Implemented] texture streaming and queue system for large sparse textures to prevent hitches - [Implemented] aggressive garbage collection system to eject large sparse textures to save on memory (Unused) - [Added] user settings to adjust the streaming unswizzle system for low-end machines - [Improved] slightly the ASTC GPU decoding system Co-authored-by: Caio Oliveira <caiooliveirafarias0@gmail.com> Co-authored-by: CamilleLaVey <camillelavey99@gmail.com> Co-authored-by: DraVee <dravee@eden-emu.dev> Reviewed-on: https://git.eden-emu.dev/eden-emu/eden/pulls/3246 Reviewed-by: Maufeat <sahyno1996@gmail.com> Reviewed-by: MaranBr <maranbr@eden-emu.dev> Reviewed-by: DraVee <dravee@eden-emu.dev> Reviewed-by: CamilleLaVey <camillelavey99@gmail.com> Co-authored-by: Forrest Keller <forrestmarkx@outlook.com> Co-committed-by: Forrest Keller <forrestmarkx@outlook.com>pull/3261/head
committed by
crueter
No known key found for this signature in database
GPG Key ID: 425ACD2D4830EBC6
20 changed files with 1076 additions and 83 deletions
-
3src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/IntSetting.kt
-
27src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/view/SettingsItem.kt
-
3src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/ui/SettingsFragmentPresenter.kt
-
48src/android/app/src/main/res/values/arrays.xml
-
28src/android/app/src/main/res/values/strings.xml
-
18src/common/settings.h
-
3src/common/settings_enums.h
-
40src/qt_common/config/shared_translation.cpp
-
1src/video_core/host_shaders/CMakeLists.txt
-
96src/video_core/host_shaders/astc_decoder.comp
-
160src/video_core/host_shaders/block_linear_unswizzle_3d_bcn.comp
-
5src/video_core/renderer_opengl/gl_texture_cache.cpp
-
10src/video_core/renderer_opengl/gl_texture_cache.h
-
290src/video_core/renderer_vulkan/vk_compute_pass.cpp
-
34src/video_core/renderer_vulkan/vk_compute_pass.h
-
10src/video_core/renderer_vulkan/vk_scheduler.cpp
-
97src/video_core/renderer_vulkan/vk_texture_cache.cpp
-
22src/video_core/renderer_vulkan/vk_texture_cache.h
-
243src/video_core/texture_cache/texture_cache.h
-
21src/video_core/texture_cache/texture_cache_base.h
@ -0,0 +1,160 @@ |
|||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project |
|||
// SPDX-License-Identifier: GPL-3.0-or-later |
|||
|
|||
#version 430 |
|||
|
|||
#ifdef VULKAN |
|||
#extension GL_EXT_shader_16bit_storage : require |
|||
#extension GL_EXT_shader_8bit_storage : require |
|||
#define HAS_EXTENDED_TYPES 1 |
|||
#define BEGIN_PUSH_CONSTANTS layout(push_constant) uniform PushConstants { |
|||
#define END_PUSH_CONSTANTS }; |
|||
#define UNIFORM(n) |
|||
#define BINDING_SWIZZLE_BUFFER 0 |
|||
#define BINDING_INPUT_BUFFER 1 |
|||
#define BINDING_OUTPUT_BUFFER 2 |
|||
#else |
|||
#extension GL_NV_gpu_shader5 : enable |
|||
#ifdef GL_NV_gpu_shader5 |
|||
#define HAS_EXTENDED_TYPES 1 |
|||
#else |
|||
#define HAS_EXTENDED_TYPES 0 |
|||
#endif |
|||
#define BEGIN_PUSH_CONSTANTS |
|||
#define END_PUSH_CONSTANTS |
|||
#define UNIFORM(n) layout(location = n) uniform |
|||
#define BINDING_SWIZZLE_BUFFER 0 |
|||
#define BINDING_INPUT_BUFFER 1 |
|||
#define BINDING_OUTPUT_BUFFER 0 |
|||
#endif |
|||
|
|||
// --- Push Constants / Uniforms --- |
|||
#ifdef VULKAN |
|||
layout(push_constant) uniform PushConstants { |
|||
uvec3 blocks_dim; // Offset 0 |
|||
uint bytes_per_block_log2; // Offset 12 |
|||
|
|||
uvec3 origin; // Offset 16 |
|||
uint slice_size; // Offset 28 |
|||
|
|||
uint block_size; // Offset 32 |
|||
uint x_shift; // Offset 36 |
|||
uint block_height; // Offset 40 |
|||
uint block_height_mask; // Offset 44 |
|||
|
|||
uint block_depth; // Offset 48 |
|||
uint block_depth_mask; // Offset 52 |
|||
int _pad; // Offset 56 |
|||
|
|||
ivec3 destination; // Offset 60 |
|||
} pc; |
|||
#else |
|||
BEGIN_PUSH_CONSTANTS |
|||
UNIFORM(0) uvec3 origin; |
|||
UNIFORM(1) ivec3 destination; |
|||
UNIFORM(2) uint bytes_per_block_log2; |
|||
UNIFORM(3) uint slice_size; |
|||
UNIFORM(4) uint block_size; |
|||
UNIFORM(5) uint x_shift; |
|||
UNIFORM(6) uint block_height; |
|||
UNIFORM(7) uint block_height_mask; |
|||
UNIFORM(8) uint block_depth; |
|||
UNIFORM(9) uint block_depth_mask; |
|||
UNIFORM(10) uvec3 blocks_dim; |
|||
END_PUSH_CONSTANTS |
|||
#define pc // Map pc prefix to nothing for OpenGL compatibility |
|||
#endif |
|||
|
|||
// --- Buffers --- |
|||
layout(binding = BINDING_SWIZZLE_BUFFER, std430) readonly buffer SwizzleTable { |
|||
uint swizzle_table[]; |
|||
}; |
|||
|
|||
#if HAS_EXTENDED_TYPES |
|||
layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU8 { uint8_t u8data[]; }; |
|||
layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU16 { uint16_t u16data[]; }; |
|||
#endif |
|||
layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU32 { uint u32data[]; }; |
|||
layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU64 { uvec2 u64data[]; }; |
|||
layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU128 { uvec4 u128data[]; }; |
|||
|
|||
layout(binding = BINDING_OUTPUT_BUFFER, std430) writeonly buffer OutputBuffer { |
|||
uint out_u32[]; |
|||
}; |
|||
|
|||
// --- Constants --- |
|||
layout(local_size_x = 8, local_size_y = 8, local_size_z = 4) in; |
|||
|
|||
const uint GOB_SIZE_X = 64; |
|||
const uint GOB_SIZE_Y = 8; |
|||
const uint GOB_SIZE_Z = 1; |
|||
const uint GOB_SIZE = GOB_SIZE_X * GOB_SIZE_Y * GOB_SIZE_Z; |
|||
|
|||
const uint GOB_SIZE_X_SHIFT = 6; |
|||
const uint GOB_SIZE_Y_SHIFT = 3; |
|||
const uint GOB_SIZE_Z_SHIFT = 0; |
|||
const uint GOB_SIZE_SHIFT = GOB_SIZE_X_SHIFT + GOB_SIZE_Y_SHIFT + GOB_SIZE_Z_SHIFT; |
|||
const uvec2 SWIZZLE_MASK = uvec2(GOB_SIZE_X - 1u, GOB_SIZE_Y - 1u); |
|||
|
|||
// --- Helpers --- |
|||
uint SwizzleOffset(uvec2 pos) { |
|||
pos &= SWIZZLE_MASK; |
|||
return swizzle_table[pos.y * 64u + pos.x]; |
|||
} |
|||
|
|||
uvec4 ReadTexel(uint offset) { |
|||
uint bpl2 = pc.bytes_per_block_log2; |
|||
switch (bpl2) { |
|||
#if HAS_EXTENDED_TYPES |
|||
case 0u: return uvec4(u8data[offset], 0u, 0u, 0u); |
|||
case 1u: return uvec4(u16data[offset / 2u], 0u, 0u, 0u); |
|||
#else |
|||
case 0u: return uvec4(bitfieldExtract(u32data[offset / 4u], int((offset * 8u) & 24u), 8), 0u, 0u, 0u); |
|||
case 1u: return uvec4(bitfieldExtract(u32data[offset / 4u], int((offset * 8u) & 16u), 16), 0u, 0u, 0u); |
|||
#endif |
|||
case 2u: return uvec4(u32data[offset / 4u], 0u, 0u, 0u); |
|||
case 3u: return uvec4(u64data[offset / 8u], 0u, 0u); |
|||
case 4u: return u128data[offset / 16u]; |
|||
} |
|||
return uvec4(0u); |
|||
} |
|||
|
|||
void main() { |
|||
uvec3 block_coord = gl_GlobalInvocationID; |
|||
if (any(greaterThanEqual(block_coord, pc.blocks_dim))) { |
|||
return; |
|||
} |
|||
|
|||
uint bytes_per_block = 1u << pc.bytes_per_block_log2; |
|||
// Origin is in pixels, divide by 4 for block-space (e.g. BCn formats) |
|||
uvec3 pos; |
|||
pos.x = (block_coord.x + (pc.origin.x >> 2u)) * bytes_per_block; |
|||
pos.y = block_coord.y + (pc.origin.y >> 2u); |
|||
pos.z = block_coord.z + pc.origin.z; |
|||
|
|||
uint swizzle = SwizzleOffset(pos.xy); |
|||
uint block_y = pos.y >> GOB_SIZE_Y_SHIFT; |
|||
uint offset = 0u; |
|||
// Apply block-linear offsets |
|||
offset += (pos.z >> pc.block_depth) * pc.slice_size; |
|||
offset += (pos.z & pc.block_depth_mask) << (GOB_SIZE_SHIFT + pc.block_height); |
|||
offset += (block_y >> pc.block_height) * pc.block_size; |
|||
offset += (block_y & pc.block_height_mask) << GOB_SIZE_SHIFT; |
|||
offset += (pos.x >> GOB_SIZE_X_SHIFT) << pc.x_shift; |
|||
offset += swizzle; |
|||
|
|||
uvec4 texel = ReadTexel(offset); |
|||
|
|||
// Calculate linear output index |
|||
uint block_index = block_coord.x + |
|||
(block_coord.y * pc.blocks_dim.x) + |
|||
(block_coord.z * pc.blocks_dim.x * pc.blocks_dim.y); |
|||
uint out_idx = block_index * (bytes_per_block >> 2u); |
|||
|
|||
out_u32[out_idx] = texel.x; |
|||
out_u32[out_idx + 1u] = texel.y; |
|||
if (pc.bytes_per_block_log2 == 4u) { |
|||
out_u32[out_idx + 2u] = texel.z; |
|||
out_u32[out_idx + 3u] = texel.w; |
|||
} |
|||
} |
|||
Write
Preview
Loading…
Cancel
Save
Reference in new issue