diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index dca263b322..d89a992281 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -862,6 +862,9 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { const bool alpha_to_one_supported = device.SupportsAlphaToOne(); const auto msaa_mode = key.state.msaa_mode.Value(); const VkSampleCountFlagBits vk_samples = MaxwellToVK::MsaaMode(msaa_mode); + const bool supports_sample_locations = + device.IsExtSampleLocationsSupported() && device.SupportsSampleLocationsFor(vk_samples); + VkPipelineMultisampleStateCreateInfo multisample_ci{ .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, .pNext = nullptr, @@ -876,6 +879,37 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { key.state.alpha_to_one_enabled != 0 ? VK_TRUE : VK_FALSE, }; + struct SampleLocationsChain { + std::array locations; + VkSampleLocationsInfoEXT info; + VkPipelineSampleLocationsStateCreateInfoEXT create; + }; + std::optional sample_locations_chain; + if (supports_sample_locations) { + sample_locations_chain.emplace(); + auto& chain = *sample_locations_chain; + const auto [grid_width, grid_height] = VideoCommon::SampleLocationGridSize(msaa_mode); + const u32 samples_per_pixel = static_cast(VideoCommon::NumSamples(msaa_mode)); + const u32 sample_locations_count = grid_width * grid_height * samples_per_pixel; + chain.locations.fill(VkSampleLocationEXT{0.5f, 0.5f}); + chain.info = VkSampleLocationsInfoEXT{ + .sType = VK_STRUCTURE_TYPE_SAMPLE_LOCATIONS_INFO_EXT, + .pNext = nullptr, + .sampleLocationsPerPixel = vk_samples, + .sampleLocationGridSize = VkExtent2D{grid_width, grid_height}, + .sampleLocationsCount = sample_locations_count, + .pSampleLocations = chain.locations.data(), + }; + chain.create = VkPipelineSampleLocationsStateCreateInfoEXT{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_SAMPLE_LOCATIONS_STATE_CREATE_INFO_EXT, + .pNext = nullptr, + .sampleLocationsEnable = VK_TRUE, + .sampleLocationsInfo = chain.info, + }; + chain.create.sampleLocationsInfo.pSampleLocations = chain.locations.data(); + chain.create.pNext = std::exchange(multisample_ci.pNext, &chain.create); + } + const VkPipelineDepthStencilStateCreateInfo depth_stencil_ci{ .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, .pNext = nullptr, @@ -961,6 +995,10 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { VK_DYNAMIC_STATE_LINE_WIDTH, }; + if (supports_sample_locations) { + dynamic_states.push_back(VK_DYNAMIC_STATE_SAMPLE_LOCATIONS_EXT); + } + if (device.UsesAdvancedCoreDynamicState()) { static constexpr std::array core_dynamic_states{ VK_DYNAMIC_STATE_BLEND_CONSTANTS, diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 1ebb36f707..91fc570cda 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -51,6 +51,24 @@ using VideoCommon::ImageViewType; namespace { +constexpr float SAMPLE_LOCATION_SCALE = 1.0f / 16.0f; + +std::array +DecodeSampleLocationRegisters(const Maxwell& regs) { + std::array decoded{}; + size_t index = 0; + for (const auto& packed : regs.multisample_sample_locations) { + for (int slot = 0; slot < 4 && index < decoded.size(); ++slot, ++index) { + const auto [raw_x, raw_y] = packed.Location(slot); + decoded[index] = VkSampleLocationEXT{ + .x = static_cast(raw_x) * SAMPLE_LOCATION_SCALE, + .y = static_cast(raw_y) * SAMPLE_LOCATION_SCALE, + }; + } + } + return decoded; +} + struct DrawParams { u32 base_instance; u32 num_instances; @@ -1015,6 +1033,7 @@ void RasterizerVulkan::UpdateDynamicStates() { UpdateDepthBounds(regs); UpdateStencilFaces(regs); UpdateLineWidth(regs); + UpdateSampleLocations(regs); // EDS1: CullMode, DepthCompare, FrontFace, StencilOp, DepthBoundsTest, DepthTest, DepthWrite, StencilTest if (device.IsExtExtendedDynamicStateSupported()) { @@ -1663,6 +1682,79 @@ void RasterizerVulkan::UpdateStencilOp(Tegra::Engines::Maxwell3D::Regs& regs) { } } +void RasterizerVulkan::UpdateSampleLocations(Maxwell::Regs& regs) { + if (!device.IsExtSampleLocationsSupported()) { + return; + } + + const auto msaa_mode = regs.anti_alias_samples_mode; + const VkSampleCountFlagBits vk_samples = MaxwellToVK::MsaaMode(msaa_mode); + if (!device.SupportsSampleLocationsFor(vk_samples)) { + return; + } + + const auto [grid_width_u32, grid_height_u32] = VideoCommon::SampleLocationGridSize(msaa_mode); + const u32 grid_width = grid_width_u32; + const u32 grid_height = grid_height_u32; + const u32 samples_per_pixel = static_cast(VideoCommon::NumSamples(msaa_mode)); + const u32 grid_cells = grid_width * grid_height; + const u32 sample_locations_count = grid_cells * samples_per_pixel; + ASSERT(sample_locations_count <= VideoCommon::MaxSampleLocationSlots); + + const auto raw_locations = DecodeSampleLocationRegisters(regs); + std::array resolved{}; + for (u32 cell = 0; cell < grid_cells; ++cell) { + const u32 slot_base = cell * samples_per_pixel; + const u32 cell_x = cell % grid_width; + const u32 cell_y = cell / grid_width; + for (u32 sample = 0; sample < samples_per_pixel; ++sample) { + const VkSampleLocationEXT raw = raw_locations[slot_base + sample]; + resolved[slot_base + sample] = VkSampleLocationEXT{ + .x = static_cast(cell_x) + raw.x, + .y = static_cast(cell_y) + raw.y, + }; + } + } + + const VkExtent2D grid_size{ + .width = grid_width, + .height = grid_height, + }; + + const bool pattern_changed = !sample_location_state.initialized || + sample_location_state.msaa_mode != msaa_mode || + sample_location_state.grid_size.width != grid_size.width || + sample_location_state.grid_size.height != grid_size.height || + sample_location_state.samples_per_pixel != vk_samples || + sample_location_state.locations_count != sample_locations_count || + sample_location_state.locations != resolved; + + const bool dirty = state_tracker.TouchSampleLocations() || pattern_changed; + if (!dirty) { + return; + } + + scheduler.Record([resolved, grid_size, vk_samples, sample_locations_count]( + vk::CommandBuffer cmdbuf) { + VkSampleLocationsInfoEXT info{ + .sType = VK_STRUCTURE_TYPE_SAMPLE_LOCATIONS_INFO_EXT, + .pNext = nullptr, + .sampleLocationsPerPixel = vk_samples, + .sampleLocationGridSize = grid_size, + .sampleLocationsCount = sample_locations_count, + .pSampleLocations = resolved.data(), + }; + cmdbuf.SetSampleLocationsEXT(info); + }); + + sample_location_state.msaa_mode = msaa_mode; + sample_location_state.grid_size = grid_size; + sample_location_state.samples_per_pixel = vk_samples; + sample_location_state.locations_count = sample_locations_count; + sample_location_state.locations = resolved; + sample_location_state.initialized = true; +} + void RasterizerVulkan::UpdateLogicOp(Tegra::Engines::Maxwell3D::Regs& regs) { if (!state_tracker.TouchLogicOp()) { return; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index b689c6b660..9c9809c0da 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -25,6 +25,7 @@ #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" #include "video_core/renderer_vulkan/vk_texture_cache.h" #include "video_core/renderer_vulkan/vk_update_descriptor.h" +#include "video_core/texture_cache/samples_helper.h" #include "video_core/vulkan_common/vulkan_memory_allocator.h" #include "video_core/vulkan_common/vulkan_wrapper.h" @@ -168,6 +169,7 @@ private: void UpdateDepthBounds(Tegra::Engines::Maxwell3D::Regs& regs); void UpdateStencilFaces(Tegra::Engines::Maxwell3D::Regs& regs); void UpdateLineWidth(Tegra::Engines::Maxwell3D::Regs& regs); + void UpdateSampleLocations(Tegra::Engines::Maxwell3D::Regs& regs); void UpdateCullMode(Tegra::Engines::Maxwell3D::Regs& regs); void UpdateDepthBoundsTestEnable(Tegra::Engines::Maxwell3D::Regs& regs); @@ -193,6 +195,17 @@ private: void UpdateVertexInput(Tegra::Engines::Maxwell3D::Regs& regs); + struct SampleLocationState { + Tegra::Texture::MsaaMode msaa_mode{Tegra::Texture::MsaaMode::Msaa1x1}; + VkExtent2D grid_size{1u, 1u}; + VkSampleCountFlagBits samples_per_pixel{VK_SAMPLE_COUNT_1_BIT}; + u32 locations_count{VideoCommon::MaxSampleLocationSlots}; + std::array locations{}; + bool initialized = false; + }; + + SampleLocationState sample_location_state{}; + Tegra::GPU& gpu; Tegra::MaxwellDeviceMemoryManager& device_memory; diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.cpp b/src/video_core/renderer_vulkan/vk_state_tracker.cpp index 79967d540a..1610827598 100644 --- a/src/video_core/renderer_vulkan/vk_state_tracker.cpp +++ b/src/video_core/renderer_vulkan/vk_state_tracker.cpp @@ -40,6 +40,7 @@ Flags MakeInvalidationFlags() { StencilWriteMask, StencilCompare, LineWidth, + SampleLocations, CullMode, DepthBoundsEnable, DepthTestEnable, @@ -129,6 +130,12 @@ void SetupDirtyLineWidth(Tables& tables) { tables[0][OFF(line_width_aliased)] = LineWidth; } +void SetupDirtySampleLocations(Tables& tables) { + FillBlock(tables[0], OFF(multisample_sample_locations), + NUM(multisample_sample_locations), SampleLocations); + tables[0][OFF(anti_alias_samples_mode)] = SampleLocations; +} + void SetupDirtyCullMode(Tables& tables) { auto& table = tables[0]; table[OFF(gl_cull_face)] = CullMode; @@ -246,6 +253,7 @@ void StateTracker::SetupTables(Tegra::Control::ChannelState& channel_state) { SetupDirtyDepthBounds(tables); SetupDirtyStencilProperties(tables); SetupDirtyLineWidth(tables); + SetupDirtySampleLocations(tables); SetupDirtyCullMode(tables); SetupDirtyStateEnable(tables); SetupDirtyDepthCompareOp(tables); diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.h b/src/video_core/renderer_vulkan/vk_state_tracker.h index 74bae9e181..e1c54eb3fc 100644 --- a/src/video_core/renderer_vulkan/vk_state_tracker.h +++ b/src/video_core/renderer_vulkan/vk_state_tracker.h @@ -42,6 +42,7 @@ enum : u8 { StencilWriteMask, StencilCompare, LineWidth, + SampleLocations, CullMode, DepthBoundsEnable, @@ -185,6 +186,10 @@ public: return Exchange(Dirty::LineWidth, false); } + bool TouchSampleLocations() const { + return Exchange(Dirty::SampleLocations, false); + } + bool TouchCullMode() { return Exchange(Dirty::CullMode, false); }