Browse Source

[vk] Re-introduction to MSAA - Sample Locations

eds-true-adreno-fixes
CamilleLaVey 3 weeks ago
committed by Caio Oliveira
parent
commit
6b0b72e034
No known key found for this signature in database GPG Key ID: AAAE6C7FD4186B0C
  1. 38
      src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
  2. 92
      src/video_core/renderer_vulkan/vk_rasterizer.cpp
  3. 13
      src/video_core/renderer_vulkan/vk_rasterizer.h
  4. 8
      src/video_core/renderer_vulkan/vk_state_tracker.cpp
  5. 5
      src/video_core/renderer_vulkan/vk_state_tracker.h

38
src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp

@ -862,6 +862,9 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) {
const bool alpha_to_one_supported = device.SupportsAlphaToOne();
const auto msaa_mode = key.state.msaa_mode.Value();
const VkSampleCountFlagBits vk_samples = MaxwellToVK::MsaaMode(msaa_mode);
const bool supports_sample_locations =
device.IsExtSampleLocationsSupported() && device.SupportsSampleLocationsFor(vk_samples);
VkPipelineMultisampleStateCreateInfo multisample_ci{
.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
.pNext = nullptr,
@ -876,6 +879,37 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) {
key.state.alpha_to_one_enabled != 0 ? VK_TRUE : VK_FALSE,
};
struct SampleLocationsChain {
std::array<VkSampleLocationEXT, VideoCommon::MaxSampleLocationSlots> locations;
VkSampleLocationsInfoEXT info;
VkPipelineSampleLocationsStateCreateInfoEXT create;
};
std::optional<SampleLocationsChain> sample_locations_chain;
if (supports_sample_locations) {
sample_locations_chain.emplace();
auto& chain = *sample_locations_chain;
const auto [grid_width, grid_height] = VideoCommon::SampleLocationGridSize(msaa_mode);
const u32 samples_per_pixel = static_cast<u32>(VideoCommon::NumSamples(msaa_mode));
const u32 sample_locations_count = grid_width * grid_height * samples_per_pixel;
chain.locations.fill(VkSampleLocationEXT{0.5f, 0.5f});
chain.info = VkSampleLocationsInfoEXT{
.sType = VK_STRUCTURE_TYPE_SAMPLE_LOCATIONS_INFO_EXT,
.pNext = nullptr,
.sampleLocationsPerPixel = vk_samples,
.sampleLocationGridSize = VkExtent2D{grid_width, grid_height},
.sampleLocationsCount = sample_locations_count,
.pSampleLocations = chain.locations.data(),
};
chain.create = VkPipelineSampleLocationsStateCreateInfoEXT{
.sType = VK_STRUCTURE_TYPE_PIPELINE_SAMPLE_LOCATIONS_STATE_CREATE_INFO_EXT,
.pNext = nullptr,
.sampleLocationsEnable = VK_TRUE,
.sampleLocationsInfo = chain.info,
};
chain.create.sampleLocationsInfo.pSampleLocations = chain.locations.data();
chain.create.pNext = std::exchange(multisample_ci.pNext, &chain.create);
}
const VkPipelineDepthStencilStateCreateInfo depth_stencil_ci{
.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
.pNext = nullptr,
@ -961,6 +995,10 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) {
VK_DYNAMIC_STATE_LINE_WIDTH,
};
if (supports_sample_locations) {
dynamic_states.push_back(VK_DYNAMIC_STATE_SAMPLE_LOCATIONS_EXT);
}
if (device.UsesAdvancedCoreDynamicState()) {
static constexpr std::array core_dynamic_states{
VK_DYNAMIC_STATE_BLEND_CONSTANTS,

92
src/video_core/renderer_vulkan/vk_rasterizer.cpp

@ -51,6 +51,24 @@ using VideoCommon::ImageViewType;
namespace {
constexpr float SAMPLE_LOCATION_SCALE = 1.0f / 16.0f;
std::array<VkSampleLocationEXT, VideoCommon::MaxSampleLocationSlots>
DecodeSampleLocationRegisters(const Maxwell& regs) {
std::array<VkSampleLocationEXT, VideoCommon::MaxSampleLocationSlots> decoded{};
size_t index = 0;
for (const auto& packed : regs.multisample_sample_locations) {
for (int slot = 0; slot < 4 && index < decoded.size(); ++slot, ++index) {
const auto [raw_x, raw_y] = packed.Location(slot);
decoded[index] = VkSampleLocationEXT{
.x = static_cast<float>(raw_x) * SAMPLE_LOCATION_SCALE,
.y = static_cast<float>(raw_y) * SAMPLE_LOCATION_SCALE,
};
}
}
return decoded;
}
struct DrawParams {
u32 base_instance;
u32 num_instances;
@ -1015,6 +1033,7 @@ void RasterizerVulkan::UpdateDynamicStates() {
UpdateDepthBounds(regs);
UpdateStencilFaces(regs);
UpdateLineWidth(regs);
UpdateSampleLocations(regs);
// EDS1: CullMode, DepthCompare, FrontFace, StencilOp, DepthBoundsTest, DepthTest, DepthWrite, StencilTest
if (device.IsExtExtendedDynamicStateSupported()) {
@ -1663,6 +1682,79 @@ void RasterizerVulkan::UpdateStencilOp(Tegra::Engines::Maxwell3D::Regs& regs) {
}
}
void RasterizerVulkan::UpdateSampleLocations(Maxwell::Regs& regs) {
if (!device.IsExtSampleLocationsSupported()) {
return;
}
const auto msaa_mode = regs.anti_alias_samples_mode;
const VkSampleCountFlagBits vk_samples = MaxwellToVK::MsaaMode(msaa_mode);
if (!device.SupportsSampleLocationsFor(vk_samples)) {
return;
}
const auto [grid_width_u32, grid_height_u32] = VideoCommon::SampleLocationGridSize(msaa_mode);
const u32 grid_width = grid_width_u32;
const u32 grid_height = grid_height_u32;
const u32 samples_per_pixel = static_cast<u32>(VideoCommon::NumSamples(msaa_mode));
const u32 grid_cells = grid_width * grid_height;
const u32 sample_locations_count = grid_cells * samples_per_pixel;
ASSERT(sample_locations_count <= VideoCommon::MaxSampleLocationSlots);
const auto raw_locations = DecodeSampleLocationRegisters(regs);
std::array<VkSampleLocationEXT, VideoCommon::MaxSampleLocationSlots> resolved{};
for (u32 cell = 0; cell < grid_cells; ++cell) {
const u32 slot_base = cell * samples_per_pixel;
const u32 cell_x = cell % grid_width;
const u32 cell_y = cell / grid_width;
for (u32 sample = 0; sample < samples_per_pixel; ++sample) {
const VkSampleLocationEXT raw = raw_locations[slot_base + sample];
resolved[slot_base + sample] = VkSampleLocationEXT{
.x = static_cast<float>(cell_x) + raw.x,
.y = static_cast<float>(cell_y) + raw.y,
};
}
}
const VkExtent2D grid_size{
.width = grid_width,
.height = grid_height,
};
const bool pattern_changed = !sample_location_state.initialized ||
sample_location_state.msaa_mode != msaa_mode ||
sample_location_state.grid_size.width != grid_size.width ||
sample_location_state.grid_size.height != grid_size.height ||
sample_location_state.samples_per_pixel != vk_samples ||
sample_location_state.locations_count != sample_locations_count ||
sample_location_state.locations != resolved;
const bool dirty = state_tracker.TouchSampleLocations() || pattern_changed;
if (!dirty) {
return;
}
scheduler.Record([resolved, grid_size, vk_samples, sample_locations_count](
vk::CommandBuffer cmdbuf) {
VkSampleLocationsInfoEXT info{
.sType = VK_STRUCTURE_TYPE_SAMPLE_LOCATIONS_INFO_EXT,
.pNext = nullptr,
.sampleLocationsPerPixel = vk_samples,
.sampleLocationGridSize = grid_size,
.sampleLocationsCount = sample_locations_count,
.pSampleLocations = resolved.data(),
};
cmdbuf.SetSampleLocationsEXT(info);
});
sample_location_state.msaa_mode = msaa_mode;
sample_location_state.grid_size = grid_size;
sample_location_state.samples_per_pixel = vk_samples;
sample_location_state.locations_count = sample_locations_count;
sample_location_state.locations = resolved;
sample_location_state.initialized = true;
}
void RasterizerVulkan::UpdateLogicOp(Tegra::Engines::Maxwell3D::Regs& regs) {
if (!state_tracker.TouchLogicOp()) {
return;

13
src/video_core/renderer_vulkan/vk_rasterizer.h

@ -25,6 +25,7 @@
#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
#include "video_core/renderer_vulkan/vk_texture_cache.h"
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
#include "video_core/texture_cache/samples_helper.h"
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
@ -168,6 +169,7 @@ private:
void UpdateDepthBounds(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateStencilFaces(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateLineWidth(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateSampleLocations(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateCullMode(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateDepthBoundsTestEnable(Tegra::Engines::Maxwell3D::Regs& regs);
@ -193,6 +195,17 @@ private:
void UpdateVertexInput(Tegra::Engines::Maxwell3D::Regs& regs);
struct SampleLocationState {
Tegra::Texture::MsaaMode msaa_mode{Tegra::Texture::MsaaMode::Msaa1x1};
VkExtent2D grid_size{1u, 1u};
VkSampleCountFlagBits samples_per_pixel{VK_SAMPLE_COUNT_1_BIT};
u32 locations_count{VideoCommon::MaxSampleLocationSlots};
std::array<VkSampleLocationEXT, VideoCommon::MaxSampleLocationSlots> locations{};
bool initialized = false;
};
SampleLocationState sample_location_state{};
Tegra::GPU& gpu;
Tegra::MaxwellDeviceMemoryManager& device_memory;

8
src/video_core/renderer_vulkan/vk_state_tracker.cpp

@ -40,6 +40,7 @@ Flags MakeInvalidationFlags() {
StencilWriteMask,
StencilCompare,
LineWidth,
SampleLocations,
CullMode,
DepthBoundsEnable,
DepthTestEnable,
@ -129,6 +130,12 @@ void SetupDirtyLineWidth(Tables& tables) {
tables[0][OFF(line_width_aliased)] = LineWidth;
}
void SetupDirtySampleLocations(Tables& tables) {
FillBlock(tables[0], OFF(multisample_sample_locations),
NUM(multisample_sample_locations), SampleLocations);
tables[0][OFF(anti_alias_samples_mode)] = SampleLocations;
}
void SetupDirtyCullMode(Tables& tables) {
auto& table = tables[0];
table[OFF(gl_cull_face)] = CullMode;
@ -246,6 +253,7 @@ void StateTracker::SetupTables(Tegra::Control::ChannelState& channel_state) {
SetupDirtyDepthBounds(tables);
SetupDirtyStencilProperties(tables);
SetupDirtyLineWidth(tables);
SetupDirtySampleLocations(tables);
SetupDirtyCullMode(tables);
SetupDirtyStateEnable(tables);
SetupDirtyDepthCompareOp(tables);

5
src/video_core/renderer_vulkan/vk_state_tracker.h

@ -42,6 +42,7 @@ enum : u8 {
StencilWriteMask,
StencilCompare,
LineWidth,
SampleLocations,
CullMode,
DepthBoundsEnable,
@ -185,6 +186,10 @@ public:
return Exchange(Dirty::LineWidth, false);
}
bool TouchSampleLocations() const {
return Exchange(Dirty::SampleLocations, false);
}
bool TouchCullMode() {
return Exchange(Dirty::CullMode, false);
}

Loading…
Cancel
Save