From a134ad3fbd359ebaab8e72d4080eda96fed78497 Mon Sep 17 00:00:00 2001 From: CamilleLaVey Date: Tue, 10 Mar 2026 04:58:11 -0400 Subject: [PATCH] [vulkan] Added Line loop + topology emulation accuracy increased by changing triangle assumption --- .../renderer_vulkan/maxwell_to_vk.cpp | 2 +- .../renderer_vulkan/vk_rasterizer.cpp | 229 ++++++++++++++++++ .../renderer_vulkan/vk_rasterizer.h | 6 + 3 files changed, 236 insertions(+), 1 deletion(-) diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp index 0538102e4a..8a77c8296c 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp @@ -323,7 +323,7 @@ VkPrimitiveTopology PrimitiveTopology([[maybe_unused]] const Device& device, case Maxwell::PrimitiveTopology::Lines: return VK_PRIMITIVE_TOPOLOGY_LINE_LIST; case Maxwell::PrimitiveTopology::LineLoop: - return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST; + return VK_PRIMITIVE_TOPOLOGY_LINE_STRIP; case Maxwell::PrimitiveTopology::LineStrip: return VK_PRIMITIVE_TOPOLOGY_LINE_STRIP; case Maxwell::PrimitiveTopology::Triangles: diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 73d2d07c08..dde89f5574 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -6,6 +6,8 @@ #include #include +#include +#include #include #include @@ -24,6 +26,7 @@ #include "video_core/engines/kepler_compute.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/host1x/gpu_device_memory_manager.h" +#include "video_core/memory_manager.h" #include "video_core/renderer_vulkan/blit_image.h" #include "video_core/renderer_vulkan/fixed_pipeline_state.h" #include "video_core/renderer_vulkan/maxwell_to_vk.h" @@ -61,6 +64,58 @@ struct DrawParams { bool is_indexed; }; +[[nodiscard]] bool IsLineLoop(Maxwell::PrimitiveTopology topology) { + return topology == Maxwell::PrimitiveTopology::LineLoop; +} + +[[nodiscard]] u32 PrimitiveRestartIndex(Maxwell::IndexFormat format) { + switch (format) { + case Maxwell::IndexFormat::UnsignedByte: + return std::numeric_limits::max(); + case Maxwell::IndexFormat::UnsignedShort: + return std::numeric_limits::max(); + case Maxwell::IndexFormat::UnsignedInt: + return std::numeric_limits::max(); + } + ASSERT(false); + return std::numeric_limits::max(); +} + +template +bool ReadGuestObject(Tegra::MemoryManager* gpu_memory, GPUVAddr address, T& value) { + if (gpu_memory == nullptr) { + return false; + } + gpu_memory->ReadBlockUnsafe(address, &value, sizeof(T)); + return true; +} + +bool ReadGuestIndex(Tegra::MemoryManager* gpu_memory, GPUVAddr address, Maxwell::IndexFormat format, + u32& value) { + switch (format) { + case Maxwell::IndexFormat::UnsignedByte: { + u8 result{}; + if (!ReadGuestObject(gpu_memory, address, result)) { + return false; + } + value = result; + return true; + } + case Maxwell::IndexFormat::UnsignedShort: { + u16 result{}; + if (!ReadGuestObject(gpu_memory, address, result)) { + return false; + } + value = result; + return true; + } + case Maxwell::IndexFormat::UnsignedInt: + return ReadGuestObject(gpu_memory, address, value); + } + ASSERT(false); + return false; +} + VkViewport GetViewportState(const Device& device, const Maxwell& regs, size_t index, float scale) { const auto& src = regs.viewport_transform[index]; const auto conv = [scale](float value) { @@ -343,6 +398,21 @@ void RasterizerVulkan::Draw(bool is_indexed, u32 instance_count) { GPU::Logging::GPULogger::GetInstance().LogVulkanCall( is_indexed ? "vkCmdDrawIndexed" : "vkCmdDraw", params, VK_SUCCESS); } + + if (IsLineLoop(draw_state.topology) && draw_params.num_vertices >= 2) { + if (maxwell3d->regs.transform_feedback_enabled != 0) { + query_cache.CounterEnable(VideoCommon::QueryType::StreamingByteCount, false); + } + scheduler.Record([](vk::CommandBuffer cmdbuf) { + cmdbuf.SetPrimitiveTopologyEXT(VK_PRIMITIVE_TOPOLOGY_LINE_LIST); + }); + DrawLineLoopClosure(draw_state, draw_params.base_instance, draw_params.num_instances, + static_cast(draw_params.base_vertex), + draw_params.num_vertices, draw_params.is_indexed); + scheduler.Record([](vk::CommandBuffer cmdbuf) { + cmdbuf.SetPrimitiveTopologyEXT(VK_PRIMITIVE_TOPOLOGY_LINE_STRIP); + }); + } }); } @@ -350,6 +420,7 @@ void RasterizerVulkan::DrawIndirect() { const auto& params = maxwell3d->draw_manager->GetIndirectParams(); buffer_cache.SetDrawIndirect(¶ms); PrepareDraw(params.is_indexed, [this, ¶ms] { + const auto& draw_state = maxwell3d->draw_manager->GetDrawState(); const auto indirect_buffer = buffer_cache.GetDrawIndirectBuffer(); const auto& buffer = indirect_buffer.first; const auto& offset = indirect_buffer.second; @@ -385,6 +456,9 @@ void RasterizerVulkan::DrawIndirect() { static_cast(params.stride)); } }); + if (IsLineLoop(draw_state.topology)) { + DrawIndirectLineLoopClosures(draw_state, params); + } return; } scheduler.Record([buffer_obj = buffer->Handle(), offset, params](vk::CommandBuffer cmdbuf) { @@ -407,10 +481,165 @@ void RasterizerVulkan::DrawIndirect() { params.is_indexed ? "vkCmdDrawIndexedIndirect" : "vkCmdDrawIndirect", log_params, VK_SUCCESS); } + + if (IsLineLoop(draw_state.topology)) { + DrawIndirectLineLoopClosures(draw_state, params); + } }); buffer_cache.SetDrawIndirect(nullptr); } +bool RasterizerVulkan::DrawLineLoopClosure(const MaxwellDrawState& draw_state, u32 base_instance, + u32 num_instances, s32 base_vertex, + u32 num_vertices, bool is_indexed) { + if (!IsLineLoop(draw_state.topology) || num_instances == 0 || num_vertices < 2) { + return false; + } + + std::array closure_indices{}; + if (!is_indexed) { + closure_indices = {num_vertices - 1, 0}; + } else if (!draw_state.inline_index_draw_indexes.empty()) { + const size_t last_offset = (static_cast(num_vertices) - 1) * sizeof(u32); + if (draw_state.inline_index_draw_indexes.size() < last_offset + sizeof(u32)) { + return false; + } + std::memcpy(&closure_indices[0], draw_state.inline_index_draw_indexes.data() + last_offset, + sizeof(u32)); + std::memcpy(&closure_indices[1], draw_state.inline_index_draw_indexes.data(), + sizeof(u32)); + } else { + const auto index_format = draw_state.index_buffer.format; + const size_t index_size = draw_state.index_buffer.FormatSizeInBytes(); + const GPUVAddr first_address = + draw_state.index_buffer.StartAddress() + + static_cast(draw_state.index_buffer.first) * index_size; + const GPUVAddr last_address = + first_address + static_cast(num_vertices - 1) * index_size; + if (!ReadGuestIndex(gpu_memory, last_address, index_format, closure_indices[0]) || + !ReadGuestIndex(gpu_memory, first_address, index_format, closure_indices[1])) { + return false; + } + if (maxwell3d->regs.primitive_restart.enabled != 0) { + const u32 restart_index = PrimitiveRestartIndex(index_format); + if (closure_indices[0] == restart_index || closure_indices[1] == restart_index) { + return false; + } + } + } + + const auto upload = staging_pool.Request(sizeof(closure_indices), MemoryUsage::Upload); + std::memcpy(upload.mapped_span.data(), closure_indices.data(), sizeof(closure_indices)); + + scheduler.Record([buffer = upload.buffer, offset = upload.offset](vk::CommandBuffer cmdbuf) { + cmdbuf.BindIndexBuffer(buffer, offset, VK_INDEX_TYPE_UINT32); + }); + scheduler.Record([base_instance, num_instances, base_vertex](vk::CommandBuffer cmdbuf) { + cmdbuf.DrawIndexed(2, num_instances, 0, base_vertex, base_instance); + }); + return true; +} + +void RasterizerVulkan::DrawIndirectLineLoopClosures( + const MaxwellDrawState& draw_state, const Tegra::Engines::DrawManager::IndirectParams& params) { + if (!IsLineLoop(draw_state.topology) || params.is_byte_count) { + return; + } + + u32 draw_count = static_cast(params.max_draw_counts); + if (params.include_count) { + gpu_memory->ReadBlockUnsafe(params.count_start_address, &draw_count, sizeof(draw_count)); + draw_count = std::min(draw_count, static_cast(params.max_draw_counts)); + } + if (draw_count == 0) { + return; + } + + bool emitted_closure = false; + if (params.is_indexed) { + const u32 command_stride = + params.stride != 0 ? static_cast(params.stride) : sizeof(VkDrawIndexedIndirectCommand); + for (u32 i = 0; i < draw_count; ++i) { + VkDrawIndexedIndirectCommand command{}; + gpu_memory->ReadBlockUnsafe(params.indirect_start_address + + static_cast(i) * command_stride, + &command, sizeof(command)); + if (command.indexCount < 2 || command.instanceCount == 0) { + continue; + } + + std::array closure_indices{}; + const auto index_format = draw_state.index_buffer.format; + const size_t index_size = draw_state.index_buffer.FormatSizeInBytes(); + const GPUVAddr first_address = draw_state.index_buffer.StartAddress() + + static_cast(command.firstIndex) * index_size; + const GPUVAddr last_address = + first_address + static_cast(command.indexCount - 1) * index_size; + if (!ReadGuestIndex(gpu_memory, last_address, index_format, closure_indices[0]) || + !ReadGuestIndex(gpu_memory, first_address, index_format, closure_indices[1])) { + continue; + } + if (maxwell3d->regs.primitive_restart.enabled != 0) { + const u32 restart_index = PrimitiveRestartIndex(index_format); + if (closure_indices[0] == restart_index || closure_indices[1] == restart_index) { + continue; + } + } + + if (!emitted_closure) { + if (maxwell3d->regs.transform_feedback_enabled != 0) { + query_cache.CounterEnable(VideoCommon::QueryType::StreamingByteCount, false); + } + scheduler.Record([](vk::CommandBuffer cmdbuf) { + cmdbuf.SetPrimitiveTopologyEXT(VK_PRIMITIVE_TOPOLOGY_LINE_LIST); + }); + emitted_closure = true; + } + + const auto upload = staging_pool.Request(sizeof(closure_indices), MemoryUsage::Upload); + std::memcpy(upload.mapped_span.data(), closure_indices.data(), sizeof(closure_indices)); + scheduler.Record( + [buffer = upload.buffer, offset = upload.offset](vk::CommandBuffer cmdbuf) { + cmdbuf.BindIndexBuffer(buffer, offset, VK_INDEX_TYPE_UINT32); + }); + scheduler.Record([command](vk::CommandBuffer cmdbuf) { + cmdbuf.DrawIndexed(2, command.instanceCount, 0, command.vertexOffset, + command.firstInstance); + }); + } + } else { + const u32 command_stride = + params.stride != 0 ? static_cast(params.stride) : sizeof(VkDrawIndirectCommand); + for (u32 i = 0; i < draw_count; ++i) { + VkDrawIndirectCommand command{}; + gpu_memory->ReadBlockUnsafe(params.indirect_start_address + + static_cast(i) * command_stride, + &command, sizeof(command)); + if (command.vertexCount < 2 || command.instanceCount == 0) { + continue; + } + if (!emitted_closure) { + if (maxwell3d->regs.transform_feedback_enabled != 0) { + query_cache.CounterEnable(VideoCommon::QueryType::StreamingByteCount, false); + } + scheduler.Record([](vk::CommandBuffer cmdbuf) { + cmdbuf.SetPrimitiveTopologyEXT(VK_PRIMITIVE_TOPOLOGY_LINE_LIST); + }); + emitted_closure = true; + } + DrawLineLoopClosure(draw_state, command.firstInstance, command.instanceCount, + static_cast(command.firstVertex), command.vertexCount, + false); + } + } + + if (emitted_closure) { + scheduler.Record([](vk::CommandBuffer cmdbuf) { + cmdbuf.SetPrimitiveTopologyEXT(VK_PRIMITIVE_TOPOLOGY_LINE_STRIP); + }); + } +} + void RasterizerVulkan::DrawTexture() { SCOPE_EXIT { diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index bdea6510c0..7de2039bb9 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -155,6 +155,12 @@ private: template void PrepareDraw(bool is_indexed, Func&&); + bool DrawLineLoopClosure(const MaxwellDrawState& draw_state, u32 base_instance, + u32 num_instances, s32 base_vertex, u32 num_vertices, + bool is_indexed); + void DrawIndirectLineLoopClosures(const MaxwellDrawState& draw_state, + const Tegra::Engines::DrawManager::IndirectParams& params); + void FlushWork(); void UpdateDynamicStates();