3 changed files with 249 additions and 0 deletions
-
2src/video_core/host_shaders/CMakeLists.txt
-
122src/video_core/host_shaders/block_linear_unswizzle_2d.comp
-
125src/video_core/host_shaders/block_linear_unswizzle_3d.comp
@ -0,0 +1,122 @@ |
|||||
|
// Copyright 2020 yuzu Emulator Project |
||||
|
// Licensed under GPLv2 or any later version |
||||
|
// Refer to the license.txt file included. |
||||
|
|
||||
|
#version 430 |
||||
|
|
||||
|
#ifdef VULKAN |
||||
|
|
||||
|
#extension GL_EXT_shader_16bit_storage : require |
||||
|
#extension GL_EXT_shader_8bit_storage : require |
||||
|
#define HAS_EXTENDED_TYPES 1 |
||||
|
#define BEGIN_PUSH_CONSTANTS layout(push_constant) uniform PushConstants { |
||||
|
#define END_PUSH_CONSTANTS }; |
||||
|
#define UNIFORM(n) |
||||
|
#define BINDING_SWIZZLE_BUFFER 0 |
||||
|
#define BINDING_INPUT_BUFFER 1 |
||||
|
#define BINDING_OUTPUT_IMAGE 2 |
||||
|
|
||||
|
#else // ^^^ Vulkan ^^^ // vvv OpenGL vvv |
||||
|
|
||||
|
#extension GL_NV_gpu_shader5 : enable |
||||
|
#ifdef GL_NV_gpu_shader5 |
||||
|
#define HAS_EXTENDED_TYPES 1 |
||||
|
#else |
||||
|
#define HAS_EXTENDED_TYPES 0 |
||||
|
#endif |
||||
|
#define BEGIN_PUSH_CONSTANTS |
||||
|
#define END_PUSH_CONSTANTS |
||||
|
#define UNIFORM(n) layout (location = n) uniform |
||||
|
#define BINDING_SWIZZLE_BUFFER 0 |
||||
|
#define BINDING_INPUT_BUFFER 1 |
||||
|
#define BINDING_OUTPUT_IMAGE 0 |
||||
|
|
||||
|
#endif |
||||
|
|
||||
|
BEGIN_PUSH_CONSTANTS |
||||
|
UNIFORM(0) uvec3 origin; |
||||
|
UNIFORM(1) ivec3 destination; |
||||
|
UNIFORM(2) uint bytes_per_block_log2; |
||||
|
UNIFORM(3) uint layer_stride; |
||||
|
UNIFORM(4) uint block_size; |
||||
|
UNIFORM(5) uint x_shift; |
||||
|
UNIFORM(6) uint block_height; |
||||
|
UNIFORM(7) uint block_height_mask; |
||||
|
END_PUSH_CONSTANTS |
||||
|
|
||||
|
layout(binding = BINDING_SWIZZLE_BUFFER, std430) readonly buffer SwizzleTable { |
||||
|
uint swizzle_table[]; |
||||
|
}; |
||||
|
|
||||
|
#if HAS_EXTENDED_TYPES |
||||
|
layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU8 { uint8_t u8data[]; }; |
||||
|
layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU16 { uint16_t u16data[]; }; |
||||
|
#endif |
||||
|
layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU32 { uint u32data[]; }; |
||||
|
layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU64 { uvec2 u64data[]; }; |
||||
|
layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU128 { uvec4 u128data[]; }; |
||||
|
|
||||
|
layout(binding = BINDING_OUTPUT_IMAGE) uniform writeonly uimage2DArray output_image; |
||||
|
|
||||
|
layout(local_size_x = 32, local_size_y = 32, local_size_z = 1) in; |
||||
|
|
||||
|
const uint GOB_SIZE_X = 64; |
||||
|
const uint GOB_SIZE_Y = 8; |
||||
|
const uint GOB_SIZE_Z = 1; |
||||
|
const uint GOB_SIZE = GOB_SIZE_X * GOB_SIZE_Y * GOB_SIZE_Z; |
||||
|
|
||||
|
const uint GOB_SIZE_X_SHIFT = 6; |
||||
|
const uint GOB_SIZE_Y_SHIFT = 3; |
||||
|
const uint GOB_SIZE_Z_SHIFT = 0; |
||||
|
const uint GOB_SIZE_SHIFT = GOB_SIZE_X_SHIFT + GOB_SIZE_Y_SHIFT + GOB_SIZE_Z_SHIFT; |
||||
|
|
||||
|
const uvec2 SWIZZLE_MASK = uvec2(GOB_SIZE_X - 1, GOB_SIZE_Y - 1); |
||||
|
|
||||
|
uint SwizzleOffset(uvec2 pos) { |
||||
|
pos = pos & SWIZZLE_MASK; |
||||
|
return swizzle_table[pos.y * 64 + pos.x]; |
||||
|
} |
||||
|
|
||||
|
uvec4 ReadTexel(uint offset) { |
||||
|
switch (bytes_per_block_log2) { |
||||
|
#if HAS_EXTENDED_TYPES |
||||
|
case 0: |
||||
|
return uvec4(u8data[offset], 0, 0, 0); |
||||
|
case 1: |
||||
|
return uvec4(u16data[offset / 2], 0, 0, 0); |
||||
|
#else |
||||
|
case 0: |
||||
|
return uvec4(bitfieldExtract(u32data[offset / 4], int((offset * 8) & 24), 8), 0, 0, 0); |
||||
|
case 1: |
||||
|
return uvec4(bitfieldExtract(u32data[offset / 4], int((offset * 8) & 16), 16), 0, 0, 0); |
||||
|
#endif |
||||
|
case 2: |
||||
|
return uvec4(u32data[offset / 4], 0, 0, 0); |
||||
|
case 3: |
||||
|
return uvec4(u64data[offset / 8], 0, 0); |
||||
|
case 4: |
||||
|
return u128data[offset / 16]; |
||||
|
} |
||||
|
return uvec4(0); |
||||
|
} |
||||
|
|
||||
|
void main() { |
||||
|
uvec3 pos = gl_GlobalInvocationID + origin; |
||||
|
pos.x <<= bytes_per_block_log2; |
||||
|
|
||||
|
// Read as soon as possible due to its latency |
||||
|
const uint swizzle = SwizzleOffset(pos.xy); |
||||
|
|
||||
|
const uint block_y = pos.y >> GOB_SIZE_Y_SHIFT; |
||||
|
|
||||
|
uint offset = 0; |
||||
|
offset += pos.z * layer_stride; |
||||
|
offset += (block_y >> block_height) * block_size; |
||||
|
offset += (block_y & block_height_mask) << GOB_SIZE_SHIFT; |
||||
|
offset += (pos.x >> GOB_SIZE_X_SHIFT) << x_shift; |
||||
|
offset += swizzle; |
||||
|
|
||||
|
const uvec4 texel = ReadTexel(offset); |
||||
|
const ivec3 coord = ivec3(gl_GlobalInvocationID) + destination; |
||||
|
imageStore(output_image, coord, texel); |
||||
|
} |
||||
@ -0,0 +1,125 @@ |
|||||
|
// Copyright 2020 yuzu Emulator Project |
||||
|
// Licensed under GPLv2 or any later version |
||||
|
// Refer to the license.txt file included. |
||||
|
|
||||
|
#version 430 |
||||
|
|
||||
|
#ifdef VULKAN |
||||
|
|
||||
|
#extension GL_EXT_shader_16bit_storage : require |
||||
|
#extension GL_EXT_shader_8bit_storage : require |
||||
|
#define HAS_EXTENDED_TYPES 1 |
||||
|
#define BEGIN_PUSH_CONSTANTS layout(push_constant) uniform PushConstants { |
||||
|
#define END_PUSH_CONSTANTS }; |
||||
|
#define UNIFORM(n) |
||||
|
#define BINDING_SWIZZLE_BUFFER 0 |
||||
|
#define BINDING_INPUT_BUFFER 1 |
||||
|
#define BINDING_OUTPUT_IMAGE 2 |
||||
|
|
||||
|
#else // ^^^ Vulkan ^^^ // vvv OpenGL vvv |
||||
|
|
||||
|
#extension GL_NV_gpu_shader5 : enable |
||||
|
#ifdef GL_NV_gpu_shader5 |
||||
|
#define HAS_EXTENDED_TYPES 1 |
||||
|
#else |
||||
|
#define HAS_EXTENDED_TYPES 0 |
||||
|
#endif |
||||
|
#define BEGIN_PUSH_CONSTANTS |
||||
|
#define END_PUSH_CONSTANTS |
||||
|
#define UNIFORM(n) layout (location = n) uniform |
||||
|
#define BINDING_SWIZZLE_BUFFER 0 |
||||
|
#define BINDING_INPUT_BUFFER 1 |
||||
|
#define BINDING_OUTPUT_IMAGE 0 |
||||
|
|
||||
|
#endif |
||||
|
|
||||
|
BEGIN_PUSH_CONSTANTS |
||||
|
UNIFORM(0) uvec3 origin; |
||||
|
UNIFORM(1) ivec3 destination; |
||||
|
UNIFORM(2) uint bytes_per_block_log2; |
||||
|
UNIFORM(3) uint slice_size; |
||||
|
UNIFORM(4) uint block_size; |
||||
|
UNIFORM(5) uint x_shift; |
||||
|
UNIFORM(6) uint block_height; |
||||
|
UNIFORM(7) uint block_height_mask; |
||||
|
UNIFORM(8) uint block_depth; |
||||
|
UNIFORM(9) uint block_depth_mask; |
||||
|
END_PUSH_CONSTANTS |
||||
|
|
||||
|
layout(binding = BINDING_SWIZZLE_BUFFER, std430) readonly buffer SwizzleTable { |
||||
|
uint swizzle_table[]; |
||||
|
}; |
||||
|
|
||||
|
#if HAS_EXTENDED_TYPES |
||||
|
layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU8 { uint8_t u8data[]; }; |
||||
|
layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU16 { uint16_t u16data[]; }; |
||||
|
#endif |
||||
|
layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU32 { uint u32data[]; }; |
||||
|
layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU64 { uvec2 u64data[]; }; |
||||
|
layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU128 { uvec4 u128data[]; }; |
||||
|
|
||||
|
layout(binding = BINDING_OUTPUT_IMAGE) uniform writeonly uimage3D output_image; |
||||
|
|
||||
|
layout(local_size_x = 16, local_size_y = 8, local_size_z = 8) in; |
||||
|
|
||||
|
const uint GOB_SIZE_X = 64; |
||||
|
const uint GOB_SIZE_Y = 8; |
||||
|
const uint GOB_SIZE_Z = 1; |
||||
|
const uint GOB_SIZE = GOB_SIZE_X * GOB_SIZE_Y * GOB_SIZE_Z; |
||||
|
|
||||
|
const uint GOB_SIZE_X_SHIFT = 6; |
||||
|
const uint GOB_SIZE_Y_SHIFT = 3; |
||||
|
const uint GOB_SIZE_Z_SHIFT = 0; |
||||
|
const uint GOB_SIZE_SHIFT = GOB_SIZE_X_SHIFT + GOB_SIZE_Y_SHIFT + GOB_SIZE_Z_SHIFT; |
||||
|
|
||||
|
const uvec2 SWIZZLE_MASK = uvec2(GOB_SIZE_X - 1, GOB_SIZE_Y - 1); |
||||
|
|
||||
|
uint SwizzleOffset(uvec2 pos) { |
||||
|
pos = pos & SWIZZLE_MASK; |
||||
|
return swizzle_table[pos.y * 64 + pos.x]; |
||||
|
} |
||||
|
|
||||
|
uvec4 ReadTexel(uint offset) { |
||||
|
switch (bytes_per_block_log2) { |
||||
|
#if HAS_EXTENDED_TYPES |
||||
|
case 0: |
||||
|
return uvec4(u8data[offset], 0, 0, 0); |
||||
|
case 1: |
||||
|
return uvec4(u16data[offset / 2], 0, 0, 0); |
||||
|
#else |
||||
|
case 0: |
||||
|
return uvec4(bitfieldExtract(u32data[offset / 4], int((offset * 8) & 24), 8), 0, 0, 0); |
||||
|
case 1: |
||||
|
return uvec4(bitfieldExtract(u32data[offset / 4], int((offset * 8) & 16), 16), 0, 0, 0); |
||||
|
#endif |
||||
|
case 2: |
||||
|
return uvec4(u32data[offset / 4], 0, 0, 0); |
||||
|
case 3: |
||||
|
return uvec4(u64data[offset / 8], 0, 0); |
||||
|
case 4: |
||||
|
return u128data[offset / 16]; |
||||
|
} |
||||
|
return uvec4(0); |
||||
|
} |
||||
|
|
||||
|
void main() { |
||||
|
uvec3 pos = gl_GlobalInvocationID + origin; |
||||
|
pos.x <<= bytes_per_block_log2; |
||||
|
|
||||
|
// Read as soon as possible due to its latency |
||||
|
const uint swizzle = SwizzleOffset(pos.xy); |
||||
|
|
||||
|
const uint block_y = pos.y >> GOB_SIZE_Y_SHIFT; |
||||
|
|
||||
|
uint offset = 0; |
||||
|
offset += (pos.z >> block_depth) * slice_size; |
||||
|
offset += (pos.z & block_depth_mask) << (GOB_SIZE_SHIFT + block_height); |
||||
|
offset += (block_y >> block_height) * block_size; |
||||
|
offset += (block_y & block_height_mask) << GOB_SIZE_SHIFT; |
||||
|
offset += (pos.x >> GOB_SIZE_X_SHIFT) << x_shift; |
||||
|
offset += swizzle; |
||||
|
|
||||
|
const uvec4 texel = ReadTexel(offset); |
||||
|
const ivec3 coord = ivec3(gl_GlobalInvocationID) + destination; |
||||
|
imageStore(output_image, coord, texel); |
||||
|
} |
||||
Write
Preview
Loading…
Cancel
Save
Reference in new issue