From 731965ac8f89904fda3706338fee850e9236f84d Mon Sep 17 00:00:00 2001 From: CamilleLaVey Date: Thu, 4 Dec 2025 06:40:14 -0400 Subject: [PATCH] [vk] SL Sample Count Clamp --- .../renderer_vulkan/vk_graphics_pipeline.cpp | 15 ++-- .../renderer_vulkan/vk_rasterizer.cpp | 34 +++++--- .../vulkan_common/vulkan_device.cpp | 80 +++++++++++++++++++ src/video_core/vulkan_common/vulkan_device.h | 10 +++ .../vulkan_common/vulkan_wrapper.cpp | 1 + src/video_core/vulkan_common/vulkan_wrapper.h | 1 + 6 files changed, 124 insertions(+), 17 deletions(-) diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 170fba1897..88937ef0ec 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -862,14 +862,17 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { const bool alpha_to_one_supported = device.SupportsAlphaToOne(); const auto msaa_mode = key.state.msaa_mode.Value(); const VkSampleCountFlagBits vk_samples = MaxwellToVK::MsaaMode(msaa_mode); - const auto [grid_width, grid_height] = VideoCommon::SampleLocationGridSize(msaa_mode); + const auto [guest_grid_width, guest_grid_height] = VideoCommon::SampleLocationGridSize(msaa_mode); const auto& sample_location_props = device.SampleLocationProperties(); - const bool grid_within_limits = grid_width <= sample_location_props.maxSampleLocationGridSize.width && - grid_height <= sample_location_props.maxSampleLocationGridSize.height; + const VkExtent2D host_grid_limit = device.SampleLocationGridSizeFor(vk_samples); + const VkExtent2D grid_size{ + .width = (std::max)(1u, (std::min)(guest_grid_width, host_grid_limit.width)), + .height = (std::max)(1u, (std::min)(guest_grid_height, host_grid_limit.height)), + }; const bool supports_sample_locations = device.IsExtSampleLocationsSupported() && device.SupportsSampleLocationsFor(vk_samples) && sample_location_props.variableSampleLocations == VK_TRUE && - grid_within_limits; + grid_size.width > 0 && grid_size.height > 0; VkPipelineMultisampleStateCreateInfo multisample_ci{ .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, @@ -895,13 +898,13 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { sample_locations_chain.emplace(); auto& chain = *sample_locations_chain; const u32 samples_per_pixel = static_cast(VideoCommon::NumSamples(msaa_mode)); - const u32 sample_locations_count = grid_width * grid_height * samples_per_pixel; + const u32 sample_locations_count = grid_size.width * grid_size.height * samples_per_pixel; chain.locations.fill(VkSampleLocationEXT{0.5f, 0.5f}); chain.info = VkSampleLocationsInfoEXT{ .sType = VK_STRUCTURE_TYPE_SAMPLE_LOCATIONS_INFO_EXT, .pNext = nullptr, .sampleLocationsPerPixel = vk_samples, - .sampleLocationGridSize = VkExtent2D{grid_width, grid_height}, + .sampleLocationGridSize = grid_size, .sampleLocationsCount = sample_locations_count, .pSampleLocations = chain.locations.data(), }; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index c24ef9f0c7..f338d6f22c 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -1723,16 +1723,20 @@ void RasterizerVulkan::UpdateSampleLocations(Maxwell& regs) { return; } - const auto [grid_width_u32, grid_height_u32] = VideoCommon::SampleLocationGridSize(msaa_mode); - const u32 grid_width = grid_width_u32; - const u32 grid_height = grid_height_u32; - if (grid_width > sample_props.maxSampleLocationGridSize.width || - grid_height > sample_props.maxSampleLocationGridSize.height) { - LOG_WARNING(Render_Vulkan, - "Sample location grid {}x{} exceeds device limit {}x{}, falling back to fixed pattern", - grid_width, grid_height, sample_props.maxSampleLocationGridSize.width, - sample_props.maxSampleLocationGridSize.height); - return; + const auto [guest_grid_width, guest_grid_height] = VideoCommon::SampleLocationGridSize(msaa_mode); + const VkExtent2D host_grid_limit = device.SampleLocationGridSizeFor(vk_samples); + const u32 grid_width = (std::max)(1u, (std::min)(guest_grid_width, host_grid_limit.width)); + const u32 grid_height = (std::max)(1u, (std::min)(guest_grid_height, host_grid_limit.height)); + const bool grid_clamped = grid_width != guest_grid_width || grid_height != guest_grid_height; + if (grid_clamped) { + static bool logged_clamp = false; + if (!logged_clamp) { + LOG_WARNING(Render_Vulkan, + "Host supports sample grid up to {}x{} (requested {}x{}); clamping", + host_grid_limit.width, host_grid_limit.height, guest_grid_width, + guest_grid_height); + logged_clamp = true; + } } const u32 samples_per_pixel = static_cast(VideoCommon::NumSamples(msaa_mode)); const u32 grid_cells = grid_width * grid_height; @@ -1758,8 +1762,16 @@ void RasterizerVulkan::UpdateSampleLocations(Maxwell& regs) { const u32 slot_base = cell * samples_per_pixel; const u32 cell_x = cell % grid_width; const u32 cell_y = cell / grid_width; + const u32 guest_cell_x = guest_grid_width == grid_width + ? cell_x + : (cell_x * guest_grid_width) / grid_width; + const u32 guest_cell_y = guest_grid_height == grid_height + ? cell_y + : (cell_y * guest_grid_height) / grid_height; + const u32 guest_cell = guest_cell_y * guest_grid_width + guest_cell_x; + const u32 guest_slot_base = guest_cell * samples_per_pixel; for (u32 sample = 0; sample < samples_per_pixel; ++sample) { - const VkSampleLocationEXT raw = raw_locations[slot_base + sample]; + const VkSampleLocationEXT raw = raw_locations[guest_slot_base + sample]; const float sample_x = static_cast(cell_x) + raw.x; const float sample_y = static_cast(cell_y) + raw.y; resolved[slot_base + sample] = VkSampleLocationEXT{ diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index a921dc7c9c..1388aba363 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -110,6 +110,24 @@ constexpr std::array R16G16B16A16_UNORM{ } // namespace Alternatives +constexpr std::array + sample_location_query_counts{ + VK_SAMPLE_COUNT_1_BIT, VK_SAMPLE_COUNT_2_BIT, VK_SAMPLE_COUNT_4_BIT, + VK_SAMPLE_COUNT_8_BIT, VK_SAMPLE_COUNT_16_BIT, VK_SAMPLE_COUNT_32_BIT, + VK_SAMPLE_COUNT_64_BIT, + }; + +static_assert(sample_location_query_counts.size() == Device::sample_location_table_size); + +constexpr size_t SampleCountIndex(VkSampleCountFlagBits samples) { + for (size_t index = 0; index < sample_location_query_counts.size(); ++index) { + if (sample_location_query_counts[index] == samples) { + return index; + } + } + return sample_location_query_counts.size(); +} + [[maybe_unused]] constexpr VkShaderStageFlags GraphicsStageMask = VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT | VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT | VK_SHADER_STAGE_GEOMETRY_BIT | @@ -1312,6 +1330,8 @@ bool Device::GetSuitability(bool requires_swapchain) { features.extended_dynamic_state3.extendedDynamicState3LogicOpEnable = false; } + PopulateSampleLocationGrids(); + // Return whether we were suitable. return suitable; } @@ -1572,6 +1592,66 @@ void Device::RemoveUnsuitableExtensions() { RemoveExtensionFeatureIfUnsuitable(extensions.maintenance4, features.maintenance4, VK_KHR_MAINTENANCE_4_EXTENSION_NAME); + +VkExtent2D Device::SampleLocationGridSizeFor(VkSampleCountFlagBits samples) const { + const auto sanitize = [](VkExtent2D grid) { + if (grid.width == 0 || grid.height == 0) { + return VkExtent2D{1, 1}; + } + return grid; + }; + const VkExtent2D fallback = sanitize(properties.sample_locations.maxSampleLocationGridSize); + if (!extensions.sample_locations) { + return fallback; + } + const size_t index = SampleCountIndex(samples); + if (index >= sample_location_grids.size()) { + return fallback; + } + const VkExtent2D grid = sample_location_grids[index]; + return grid.width == 0 || grid.height == 0 ? fallback : grid; +} + +void Device::PopulateSampleLocationGrids() { + for (auto& grid : sample_location_grids) { + grid = VkExtent2D{1, 1}; + } + if (!extensions.sample_locations) { + return; + } + const auto sanitize = [](VkExtent2D grid) { + if (grid.width == 0 || grid.height == 0) { + return VkExtent2D{1, 1}; + } + return grid; + }; + const VkExtent2D fallback = sanitize(properties.sample_locations.maxSampleLocationGridSize); + const VkSampleCountFlags supported_counts = + properties.sample_locations.sampleLocationSampleCounts; + if (supported_counts == 0) { + return; + } + const bool can_query = dld.vkGetPhysicalDeviceMultisamplePropertiesEXT != nullptr; + for (size_t index = 0; index < sample_location_grids.size(); ++index) { + const VkSampleCountFlagBits bit = sample_location_query_counts[index]; + if ((supported_counts & bit) == 0) { + continue; + } + VkExtent2D grid = fallback; + if (can_query) { + VkMultisamplePropertiesEXT props{ + .sType = VK_STRUCTURE_TYPE_MULTISAMPLE_PROPERTIES_EXT, + .pNext = nullptr, + }; + dld.vkGetPhysicalDeviceMultisamplePropertiesEXT(physical, bit, &props); + if (props.maxSampleLocationGridSize.width != 0 && + props.maxSampleLocationGridSize.height != 0) { + grid = props.maxSampleLocationGridSize; + } + } + sample_location_grids[index] = grid; + } +} // VK_KHR_maintenance5 extensions.maintenance5 = features.maintenance5.maintenance5; diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index 811fe8357a..0f1807c710 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -7,6 +7,7 @@ #pragma once #include +#include #include #include #include @@ -350,6 +351,9 @@ public: return properties.sample_locations; } + /// Returns the host-supported sample location grid for the requested sample count. + VkExtent2D SampleLocationGridSizeFor(VkSampleCountFlagBits samples) const; + /// Returns true if ASTC is natively supported. bool IsOptimalAstcSupported() const { return features.features.textureCompressionASTC_LDR; @@ -976,6 +980,8 @@ public: } private: + static constexpr size_t sample_location_table_size = 7; + /// Checks if the physical device is suitable and configures the object state /// with all necessary info about its properties. bool GetSuitability(bool requires_swapchain); @@ -983,6 +989,8 @@ private: // Remove extensions which have incomplete feature support. void RemoveUnsuitableExtensions(); + void PopulateSampleLocationGrids(); + void RemoveExtension(bool& extension, const std::string& extension_name); void RemoveExtensionIfUnsuitable(bool& extension, const std::string& extension_name); @@ -1077,6 +1085,8 @@ private: VkPhysicalDeviceFeatures2 features2{}; VkPhysicalDeviceProperties2 properties2{}; + std::array sample_location_grids{}; + // Misc features bool is_optimal_astc_supported{}; ///< Support for all guest ASTC formats. bool is_blit_depth24_stencil8_supported{}; ///< Support for blitting from and to D24S8. diff --git a/src/video_core/vulkan_common/vulkan_wrapper.cpp b/src/video_core/vulkan_common/vulkan_wrapper.cpp index d0acc3ee2e..72d10f2a53 100644 --- a/src/video_core/vulkan_common/vulkan_wrapper.cpp +++ b/src/video_core/vulkan_common/vulkan_wrapper.cpp @@ -294,6 +294,7 @@ bool Load(VkInstance instance, InstanceDispatch& dld) noexcept { X(vkDestroySurfaceKHR); X(vkGetPhysicalDeviceFeatures2); X(vkGetPhysicalDeviceProperties2); + X(vkGetPhysicalDeviceMultisamplePropertiesEXT); X(vkGetPhysicalDeviceSurfaceCapabilitiesKHR); X(vkGetPhysicalDeviceSurfaceFormatsKHR); X(vkGetPhysicalDeviceSurfacePresentModesKHR); diff --git a/src/video_core/vulkan_common/vulkan_wrapper.h b/src/video_core/vulkan_common/vulkan_wrapper.h index ae013d5214..da82be6aa9 100644 --- a/src/video_core/vulkan_common/vulkan_wrapper.h +++ b/src/video_core/vulkan_common/vulkan_wrapper.h @@ -170,6 +170,7 @@ struct InstanceDispatch { PFN_vkGetPhysicalDeviceProperties vkGetPhysicalDeviceProperties{}; PFN_vkGetPhysicalDeviceProperties2 vkGetPhysicalDeviceProperties2{}; PFN_vkGetPhysicalDeviceToolProperties vkGetPhysicalDeviceToolProperties{}; + PFN_vkGetPhysicalDeviceMultisamplePropertiesEXT vkGetPhysicalDeviceMultisamplePropertiesEXT{}; PFN_vkGetPhysicalDeviceQueueFamilyProperties vkGetPhysicalDeviceQueueFamilyProperties{}; PFN_vkGetPhysicalDeviceSurfaceCapabilitiesKHR vkGetPhysicalDeviceSurfaceCapabilitiesKHR{}; PFN_vkGetPhysicalDeviceSurfaceFormatsKHR vkGetPhysicalDeviceSurfaceFormatsKHR{};