Browse Source
Merge pull request #4359 from ReinUsesLisp/clamp-shared
renderer_{opengl,vulkan}: Clamp shared memory to host's limit
pull/15/merge
Rodrigo Locatti
6 years ago
committed by
GitHub
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with
42 additions and
9 deletions
-
src/video_core/renderer_opengl/gl_arb_decompiler.cpp
-
src/video_core/renderer_opengl/gl_device.cpp
-
src/video_core/renderer_opengl/gl_device.h
-
src/video_core/renderer_opengl/gl_shader_decompiler.cpp
-
src/video_core/renderer_vulkan/vk_device.h
-
src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
|
|
|
@ -913,11 +913,19 @@ void ARBDecompiler::DeclareCompute() { |
|
|
|
const ComputeInfo& info = registry.GetComputeInfo(); |
|
|
|
AddLine("GROUP_SIZE {} {} {};", info.workgroup_size[0], info.workgroup_size[1], |
|
|
|
info.workgroup_size[2]); |
|
|
|
if (info.shared_memory_size_in_words > 0) { |
|
|
|
const u32 size_in_bytes = info.shared_memory_size_in_words * 4; |
|
|
|
AddLine("SHARED_MEMORY {};", size_in_bytes); |
|
|
|
AddLine("SHARED shared_mem[] = {{program.sharedmem}};"); |
|
|
|
if (info.shared_memory_size_in_words == 0) { |
|
|
|
return; |
|
|
|
} |
|
|
|
const u32 limit = device.GetMaxComputeSharedMemorySize(); |
|
|
|
u32 size_in_bytes = info.shared_memory_size_in_words * 4; |
|
|
|
if (size_in_bytes > limit) { |
|
|
|
LOG_ERROR(Render_OpenGL, "Shared memory size {} is clamped to host's limit {}", |
|
|
|
size_in_bytes, limit); |
|
|
|
size_in_bytes = limit; |
|
|
|
} |
|
|
|
|
|
|
|
AddLine("SHARED_MEMORY {};", size_in_bytes); |
|
|
|
AddLine("SHARED shared_mem[] = {{program.sharedmem}};"); |
|
|
|
} |
|
|
|
|
|
|
|
void ARBDecompiler::DeclareInputAttributes() { |
|
|
|
|
|
|
|
@ -212,6 +212,7 @@ Device::Device() |
|
|
|
shader_storage_alignment = GetInteger<std::size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT); |
|
|
|
max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS); |
|
|
|
max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS); |
|
|
|
max_compute_shared_memory_size = GetInteger<u32>(GL_MAX_COMPUTE_SHARED_MEMORY_SIZE); |
|
|
|
has_warp_intrinsics = GLAD_GL_NV_gpu_shader5 && GLAD_GL_NV_shader_thread_group && |
|
|
|
GLAD_GL_NV_shader_thread_shuffle; |
|
|
|
has_shader_ballot = GLAD_GL_ARB_shader_ballot; |
|
|
|
@ -250,6 +251,7 @@ Device::Device(std::nullptr_t) { |
|
|
|
shader_storage_alignment = 4; |
|
|
|
max_vertex_attributes = 16; |
|
|
|
max_varyings = 15; |
|
|
|
max_compute_shared_memory_size = 0x10000; |
|
|
|
has_warp_intrinsics = true; |
|
|
|
has_shader_ballot = true; |
|
|
|
has_vertex_viewport_layer = true; |
|
|
|
|
|
|
|
@ -52,6 +52,10 @@ public: |
|
|
|
return max_varyings; |
|
|
|
} |
|
|
|
|
|
|
|
u32 GetMaxComputeSharedMemorySize() const { |
|
|
|
return max_compute_shared_memory_size; |
|
|
|
} |
|
|
|
|
|
|
|
bool HasWarpIntrinsics() const { |
|
|
|
return has_warp_intrinsics; |
|
|
|
} |
|
|
|
@ -118,6 +122,7 @@ private: |
|
|
|
std::size_t shader_storage_alignment{}; |
|
|
|
u32 max_vertex_attributes{}; |
|
|
|
u32 max_varyings{}; |
|
|
|
u32 max_compute_shared_memory_size{}; |
|
|
|
bool has_warp_intrinsics{}; |
|
|
|
bool has_shader_ballot{}; |
|
|
|
bool has_vertex_viewport_layer{}; |
|
|
|
|
|
|
|
@ -602,8 +602,15 @@ private: |
|
|
|
return; |
|
|
|
} |
|
|
|
const auto& info = registry.GetComputeInfo(); |
|
|
|
if (const u32 size = info.shared_memory_size_in_words; size > 0) { |
|
|
|
code.AddLine("shared uint smem[{}];", size); |
|
|
|
if (u32 size = info.shared_memory_size_in_words * 4; size > 0) { |
|
|
|
const u32 limit = device.GetMaxComputeSharedMemorySize(); |
|
|
|
if (size > limit) { |
|
|
|
LOG_ERROR(Render_OpenGL, "Shared memory size {} is clamped to host's limit {}", |
|
|
|
size, limit); |
|
|
|
size = limit; |
|
|
|
} |
|
|
|
|
|
|
|
code.AddLine("shared uint smem[{}];", size / 4); |
|
|
|
code.AddNewLine(); |
|
|
|
} |
|
|
|
code.AddLine("layout (local_size_x = {}, local_size_y = {}, local_size_z = {}) in;", |
|
|
|
|
|
|
|
@ -122,6 +122,11 @@ public: |
|
|
|
return properties.limits.maxPushConstantsSize; |
|
|
|
} |
|
|
|
|
|
|
|
/// Returns the maximum size for shared memory. |
|
|
|
u32 GetMaxComputeSharedMemorySize() const { |
|
|
|
return properties.limits.maxComputeSharedMemorySize; |
|
|
|
} |
|
|
|
|
|
|
|
/// Returns true if ASTC is natively supported. |
|
|
|
bool IsOptimalAstcSupported() const { |
|
|
|
return is_optimal_astc_supported; |
|
|
|
|
|
|
|
@ -685,13 +685,19 @@ private: |
|
|
|
} |
|
|
|
t_smem_uint = TypePointer(spv::StorageClass::Workgroup, t_uint); |
|
|
|
|
|
|
|
const u32 smem_size = specialization.shared_memory_size; |
|
|
|
u32 smem_size = specialization.shared_memory_size * 4; |
|
|
|
if (smem_size == 0) { |
|
|
|
// Avoid declaring an empty array.
|
|
|
|
return; |
|
|
|
} |
|
|
|
const auto element_count = static_cast<u32>(Common::AlignUp(smem_size, 4) / 4); |
|
|
|
const Id type_array = TypeArray(t_uint, Constant(t_uint, element_count)); |
|
|
|
const u32 limit = device.GetMaxComputeSharedMemorySize(); |
|
|
|
if (smem_size > limit) { |
|
|
|
LOG_ERROR(Render_Vulkan, "Shared memory size {} is clamped to host's limit {}", |
|
|
|
smem_size, limit); |
|
|
|
smem_size = limit; |
|
|
|
} |
|
|
|
|
|
|
|
const Id type_array = TypeArray(t_uint, Constant(t_uint, smem_size / 4)); |
|
|
|
const Id type_pointer = TypePointer(spv::StorageClass::Workgroup, type_array); |
|
|
|
Name(type_pointer, "SharedMemory"); |
|
|
|
|
|
|
|
|