From 33073f4893885e6ba937cd6cc5e6e1ec81ea43b9 Mon Sep 17 00:00:00 2001 From: crueter Date: Tue, 13 Jan 2026 14:45:14 -0500 Subject: [PATCH] optimize & fix markdown sanitizer - bullets and such - just use boost kek - also get rid of backticks and use better heuristics to get rid of stuff Signed-off-by: crueter --- .../hle/service/bcat/news/builtin_news.cpp | 94 ++++++------------- 1 file changed, 31 insertions(+), 63 deletions(-) diff --git a/src/core/hle/service/bcat/news/builtin_news.cpp b/src/core/hle/service/bcat/news/builtin_news.cpp index e57fab94c1..d86020cbe0 100644 --- a/src/core/hle/service/bcat/news/builtin_news.cpp +++ b/src/core/hle/service/bcat/news/builtin_news.cpp @@ -9,11 +9,15 @@ #include "common/fs/path_util.h" #include "common/logging/log.h" +#include +#include +#include #include #include #include #include +#include #include #include #include @@ -167,80 +171,44 @@ std::string SanitizeMarkdown(std::string_view markdown) { text = text.substr(0, pos); } - std::istringstream stream(text); - std::string line; - bool first_line = true; + // Fix line endings + boost::replace_all(text, "\r", ""); - while (std::getline(stream, line)) { - if (!line.empty() && line.back() == '\r') { - line.pop_back(); - } + // Remove backticks + boost::replace_all(text, "`", ""); - if (first_line && line.empty()) continue; + // Remove excessive newlines + static const boost::regex newlines(R"(\n\n\n+)"); + text = boost::regex_replace(text, newlines, "\n\n"); - // Remove markdown headers - size_t start = 0; - while (start < line.size() && line[start] == '#') start++; - if (start > 0 && start < line.size() && line[start] == ' ') { - line = line.substr(start + 1); - } + // Remove markdown headers + static const boost::regex headers(R"(^#+ )"); + text = boost::regex_replace(text, headers, ""); - // Remove bold/italic marker - std::string cleaned; - size_t i = 0; - while (i < line.size()) { - if (i + 1 < line.size() && line[i] == '*' && line[i + 1] == '*') { - i += 2; - } else if (line[i] == '*' || line[i] == '_') { - i++; - } else { - cleaned += line[i++]; - } - } - line = cleaned; - - // Remove links and convert it to text - std::string no_links; - i = 0; - while (i < line.size()) { - if (line[i] == '[') { - auto close = line.find(']', i); - if (close != std::string::npos && close + 1 < line.size() && line[close + 1] == '(') { - no_links += line.substr(i + 1, close - i - 1); - auto paren_close = line.find(')', close + 2); - i = (paren_close != std::string::npos) ? paren_close + 1 : close + 1; - } else { - no_links += line[i++]; - } - } else { - no_links += line[i++]; - } - } - line = no_links; + // Convert bullet points to something nicer + static const boost::regex list1(R"(^- )"); + text = boost::regex_replace(text, list1, "• "); - // Convert bullet points to something nicer - if (line.size() >= 2 && line[0] == '-' && line[1] == ' ') { - line = "• " + line.substr(2); - } + static const boost::regex list2(R"(^ \* )"); + text = boost::regex_replace(text, list2, " • "); - if (!first_line) { - result += '\n'; - } - result += line; - first_line = false; - } + // Convert bold/italic text into normal text + static const boost::regex bold(R"(\*\*(.*?)\*\*)"); + text = boost::regex_replace(text, bold, "$1"); - // Remove excessive newlines, we are heavy char limited... - while (result.find("\n\n\n") != std::string::npos) { - result.replace(result.find("\n\n\n"), 3, "\n\n"); - } + static const boost::regex italic(R"(\*(.*?)\*)"); + text = boost::regex_replace(text, italic, "$1"); + + // Remove links and convert to normal text + static const boost::regex link(R"([(.*?)]\(.*\))"); + text = boost::regex_replace(text, link, "$1"); // Trim trailing whitespace/newlines - while (!result.empty() && (result.back() == '\n' || result.back() == ' ')) { - result.pop_back(); + while (!text.empty() && (text.back() == '\n' || text.back() == ' ')) { + text.pop_back(); } - return result; + return text; } std::string FormatBody(const nlohmann::json& release, std::string_view title) {