Browse Source

[vk] SL Sample Count Clamp

eds-true-adreno-fixes
CamilleLaVey 3 weeks ago
committed by Caio Oliveira
parent
commit
731965ac8f
No known key found for this signature in database GPG Key ID: AAAE6C7FD4186B0C
  1. 15
      src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
  2. 32
      src/video_core/renderer_vulkan/vk_rasterizer.cpp
  3. 80
      src/video_core/vulkan_common/vulkan_device.cpp
  4. 10
      src/video_core/vulkan_common/vulkan_device.h
  5. 1
      src/video_core/vulkan_common/vulkan_wrapper.cpp
  6. 1
      src/video_core/vulkan_common/vulkan_wrapper.h

15
src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp

@ -862,14 +862,17 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) {
const bool alpha_to_one_supported = device.SupportsAlphaToOne();
const auto msaa_mode = key.state.msaa_mode.Value();
const VkSampleCountFlagBits vk_samples = MaxwellToVK::MsaaMode(msaa_mode);
const auto [grid_width, grid_height] = VideoCommon::SampleLocationGridSize(msaa_mode);
const auto [guest_grid_width, guest_grid_height] = VideoCommon::SampleLocationGridSize(msaa_mode);
const auto& sample_location_props = device.SampleLocationProperties();
const bool grid_within_limits = grid_width <= sample_location_props.maxSampleLocationGridSize.width &&
grid_height <= sample_location_props.maxSampleLocationGridSize.height;
const VkExtent2D host_grid_limit = device.SampleLocationGridSizeFor(vk_samples);
const VkExtent2D grid_size{
.width = (std::max)(1u, (std::min)(guest_grid_width, host_grid_limit.width)),
.height = (std::max)(1u, (std::min)(guest_grid_height, host_grid_limit.height)),
};
const bool supports_sample_locations = device.IsExtSampleLocationsSupported() &&
device.SupportsSampleLocationsFor(vk_samples) &&
sample_location_props.variableSampleLocations == VK_TRUE &&
grid_within_limits;
grid_size.width > 0 && grid_size.height > 0;
VkPipelineMultisampleStateCreateInfo multisample_ci{
.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
@ -895,13 +898,13 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) {
sample_locations_chain.emplace();
auto& chain = *sample_locations_chain;
const u32 samples_per_pixel = static_cast<u32>(VideoCommon::NumSamples(msaa_mode));
const u32 sample_locations_count = grid_width * grid_height * samples_per_pixel;
const u32 sample_locations_count = grid_size.width * grid_size.height * samples_per_pixel;
chain.locations.fill(VkSampleLocationEXT{0.5f, 0.5f});
chain.info = VkSampleLocationsInfoEXT{
.sType = VK_STRUCTURE_TYPE_SAMPLE_LOCATIONS_INFO_EXT,
.pNext = nullptr,
.sampleLocationsPerPixel = vk_samples,
.sampleLocationGridSize = VkExtent2D{grid_width, grid_height},
.sampleLocationGridSize = grid_size,
.sampleLocationsCount = sample_locations_count,
.pSampleLocations = chain.locations.data(),
};

32
src/video_core/renderer_vulkan/vk_rasterizer.cpp

@ -1723,16 +1723,20 @@ void RasterizerVulkan::UpdateSampleLocations(Maxwell& regs) {
return;
}
const auto [grid_width_u32, grid_height_u32] = VideoCommon::SampleLocationGridSize(msaa_mode);
const u32 grid_width = grid_width_u32;
const u32 grid_height = grid_height_u32;
if (grid_width > sample_props.maxSampleLocationGridSize.width ||
grid_height > sample_props.maxSampleLocationGridSize.height) {
const auto [guest_grid_width, guest_grid_height] = VideoCommon::SampleLocationGridSize(msaa_mode);
const VkExtent2D host_grid_limit = device.SampleLocationGridSizeFor(vk_samples);
const u32 grid_width = (std::max)(1u, (std::min)(guest_grid_width, host_grid_limit.width));
const u32 grid_height = (std::max)(1u, (std::min)(guest_grid_height, host_grid_limit.height));
const bool grid_clamped = grid_width != guest_grid_width || grid_height != guest_grid_height;
if (grid_clamped) {
static bool logged_clamp = false;
if (!logged_clamp) {
LOG_WARNING(Render_Vulkan,
"Sample location grid {}x{} exceeds device limit {}x{}, falling back to fixed pattern",
grid_width, grid_height, sample_props.maxSampleLocationGridSize.width,
sample_props.maxSampleLocationGridSize.height);
return;
"Host supports sample grid up to {}x{} (requested {}x{}); clamping",
host_grid_limit.width, host_grid_limit.height, guest_grid_width,
guest_grid_height);
logged_clamp = true;
}
}
const u32 samples_per_pixel = static_cast<u32>(VideoCommon::NumSamples(msaa_mode));
const u32 grid_cells = grid_width * grid_height;
@ -1758,8 +1762,16 @@ void RasterizerVulkan::UpdateSampleLocations(Maxwell& regs) {
const u32 slot_base = cell * samples_per_pixel;
const u32 cell_x = cell % grid_width;
const u32 cell_y = cell / grid_width;
const u32 guest_cell_x = guest_grid_width == grid_width
? cell_x
: (cell_x * guest_grid_width) / grid_width;
const u32 guest_cell_y = guest_grid_height == grid_height
? cell_y
: (cell_y * guest_grid_height) / grid_height;
const u32 guest_cell = guest_cell_y * guest_grid_width + guest_cell_x;
const u32 guest_slot_base = guest_cell * samples_per_pixel;
for (u32 sample = 0; sample < samples_per_pixel; ++sample) {
const VkSampleLocationEXT raw = raw_locations[slot_base + sample];
const VkSampleLocationEXT raw = raw_locations[guest_slot_base + sample];
const float sample_x = static_cast<float>(cell_x) + raw.x;
const float sample_y = static_cast<float>(cell_y) + raw.y;
resolved[slot_base + sample] = VkSampleLocationEXT{

80
src/video_core/vulkan_common/vulkan_device.cpp

@ -110,6 +110,24 @@ constexpr std::array R16G16B16A16_UNORM{
} // namespace Alternatives
constexpr std::array<VkSampleCountFlagBits, Device::sample_location_table_size>
sample_location_query_counts{
VK_SAMPLE_COUNT_1_BIT, VK_SAMPLE_COUNT_2_BIT, VK_SAMPLE_COUNT_4_BIT,
VK_SAMPLE_COUNT_8_BIT, VK_SAMPLE_COUNT_16_BIT, VK_SAMPLE_COUNT_32_BIT,
VK_SAMPLE_COUNT_64_BIT,
};
static_assert(sample_location_query_counts.size() == Device::sample_location_table_size);
constexpr size_t SampleCountIndex(VkSampleCountFlagBits samples) {
for (size_t index = 0; index < sample_location_query_counts.size(); ++index) {
if (sample_location_query_counts[index] == samples) {
return index;
}
}
return sample_location_query_counts.size();
}
[[maybe_unused]] constexpr VkShaderStageFlags GraphicsStageMask =
VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT |
VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT | VK_SHADER_STAGE_GEOMETRY_BIT |
@ -1312,6 +1330,8 @@ bool Device::GetSuitability(bool requires_swapchain) {
features.extended_dynamic_state3.extendedDynamicState3LogicOpEnable = false;
}
PopulateSampleLocationGrids();
// Return whether we were suitable.
return suitable;
}
@ -1572,6 +1592,66 @@ void Device::RemoveUnsuitableExtensions() {
RemoveExtensionFeatureIfUnsuitable(extensions.maintenance4, features.maintenance4,
VK_KHR_MAINTENANCE_4_EXTENSION_NAME);
VkExtent2D Device::SampleLocationGridSizeFor(VkSampleCountFlagBits samples) const {
const auto sanitize = [](VkExtent2D grid) {
if (grid.width == 0 || grid.height == 0) {
return VkExtent2D{1, 1};
}
return grid;
};
const VkExtent2D fallback = sanitize(properties.sample_locations.maxSampleLocationGridSize);
if (!extensions.sample_locations) {
return fallback;
}
const size_t index = SampleCountIndex(samples);
if (index >= sample_location_grids.size()) {
return fallback;
}
const VkExtent2D grid = sample_location_grids[index];
return grid.width == 0 || grid.height == 0 ? fallback : grid;
}
void Device::PopulateSampleLocationGrids() {
for (auto& grid : sample_location_grids) {
grid = VkExtent2D{1, 1};
}
if (!extensions.sample_locations) {
return;
}
const auto sanitize = [](VkExtent2D grid) {
if (grid.width == 0 || grid.height == 0) {
return VkExtent2D{1, 1};
}
return grid;
};
const VkExtent2D fallback = sanitize(properties.sample_locations.maxSampleLocationGridSize);
const VkSampleCountFlags supported_counts =
properties.sample_locations.sampleLocationSampleCounts;
if (supported_counts == 0) {
return;
}
const bool can_query = dld.vkGetPhysicalDeviceMultisamplePropertiesEXT != nullptr;
for (size_t index = 0; index < sample_location_grids.size(); ++index) {
const VkSampleCountFlagBits bit = sample_location_query_counts[index];
if ((supported_counts & bit) == 0) {
continue;
}
VkExtent2D grid = fallback;
if (can_query) {
VkMultisamplePropertiesEXT props{
.sType = VK_STRUCTURE_TYPE_MULTISAMPLE_PROPERTIES_EXT,
.pNext = nullptr,
};
dld.vkGetPhysicalDeviceMultisamplePropertiesEXT(physical, bit, &props);
if (props.maxSampleLocationGridSize.width != 0 &&
props.maxSampleLocationGridSize.height != 0) {
grid = props.maxSampleLocationGridSize;
}
}
sample_location_grids[index] = grid;
}
}
// VK_KHR_maintenance5
extensions.maintenance5 = features.maintenance5.maintenance5;

10
src/video_core/vulkan_common/vulkan_device.h

@ -7,6 +7,7 @@
#pragma once
#include <algorithm>
#include <array>
#include <optional>
#include <set>
#include <span>
@ -350,6 +351,9 @@ public:
return properties.sample_locations;
}
/// Returns the host-supported sample location grid for the requested sample count.
VkExtent2D SampleLocationGridSizeFor(VkSampleCountFlagBits samples) const;
/// Returns true if ASTC is natively supported.
bool IsOptimalAstcSupported() const {
return features.features.textureCompressionASTC_LDR;
@ -976,6 +980,8 @@ public:
}
private:
static constexpr size_t sample_location_table_size = 7;
/// Checks if the physical device is suitable and configures the object state
/// with all necessary info about its properties.
bool GetSuitability(bool requires_swapchain);
@ -983,6 +989,8 @@ private:
// Remove extensions which have incomplete feature support.
void RemoveUnsuitableExtensions();
void PopulateSampleLocationGrids();
void RemoveExtension(bool& extension, const std::string& extension_name);
void RemoveExtensionIfUnsuitable(bool& extension, const std::string& extension_name);
@ -1077,6 +1085,8 @@ private:
VkPhysicalDeviceFeatures2 features2{};
VkPhysicalDeviceProperties2 properties2{};
std::array<VkExtent2D, sample_location_table_size> sample_location_grids{};
// Misc features
bool is_optimal_astc_supported{}; ///< Support for all guest ASTC formats.
bool is_blit_depth24_stencil8_supported{}; ///< Support for blitting from and to D24S8.

1
src/video_core/vulkan_common/vulkan_wrapper.cpp

@ -294,6 +294,7 @@ bool Load(VkInstance instance, InstanceDispatch& dld) noexcept {
X(vkDestroySurfaceKHR);
X(vkGetPhysicalDeviceFeatures2);
X(vkGetPhysicalDeviceProperties2);
X(vkGetPhysicalDeviceMultisamplePropertiesEXT);
X(vkGetPhysicalDeviceSurfaceCapabilitiesKHR);
X(vkGetPhysicalDeviceSurfaceFormatsKHR);
X(vkGetPhysicalDeviceSurfacePresentModesKHR);

1
src/video_core/vulkan_common/vulkan_wrapper.h

@ -170,6 +170,7 @@ struct InstanceDispatch {
PFN_vkGetPhysicalDeviceProperties vkGetPhysicalDeviceProperties{};
PFN_vkGetPhysicalDeviceProperties2 vkGetPhysicalDeviceProperties2{};
PFN_vkGetPhysicalDeviceToolProperties vkGetPhysicalDeviceToolProperties{};
PFN_vkGetPhysicalDeviceMultisamplePropertiesEXT vkGetPhysicalDeviceMultisamplePropertiesEXT{};
PFN_vkGetPhysicalDeviceQueueFamilyProperties vkGetPhysicalDeviceQueueFamilyProperties{};
PFN_vkGetPhysicalDeviceSurfaceCapabilitiesKHR vkGetPhysicalDeviceSurfaceCapabilitiesKHR{};
PFN_vkGetPhysicalDeviceSurfaceFormatsKHR vkGetPhysicalDeviceSurfaceFormatsKHR{};

Loading…
Cancel
Save