tetratto/crates/shared/src/markdown.rs
2025-07-26 22:18:32 -04:00

225 lines
6.7 KiB
Rust

use ammonia::Builder;
use pulldown_cmark::{Parser, Options, html::push_html};
use std::collections::HashSet;
pub fn render_markdown_dirty(input: &str) -> String {
let input = &autolinks(&parse_alignment(&parse_backslash_breaks(input)));
let mut options = Options::empty();
options.insert(Options::ENABLE_STRIKETHROUGH);
options.insert(Options::ENABLE_GFM);
options.insert(Options::ENABLE_FOOTNOTES);
options.insert(Options::ENABLE_TABLES);
options.insert(Options::ENABLE_HEADING_ATTRIBUTES);
options.insert(Options::ENABLE_SUBSCRIPT);
let parser = Parser::new_ext(input, options);
let mut html = String::new();
push_html(&mut html, parser);
html
}
pub fn clean_html(html: String, allowed_attributes: HashSet<&str>) -> String {
Builder::default()
.generic_attributes(allowed_attributes)
.add_tags(&[
"video", "source", "img", "b", "span", "p", "i", "strong", "em", "a", "align",
])
.rm_tags(&["script", "style", "link", "canvas"])
.add_tag_attributes("a", &["href", "target"])
.add_url_schemes(&["atto"])
.clean(&html.replace("<video ", "<video controls "))
.to_string()
}
/// Render markdown input into HTML
pub fn render_markdown(input: &str, proxy_images: bool) -> String {
let html = render_markdown_dirty(input);
let mut allowed_attributes = HashSet::new();
allowed_attributes.insert("id");
allowed_attributes.insert("class");
allowed_attributes.insert("ref");
allowed_attributes.insert("aria-label");
allowed_attributes.insert("lang");
allowed_attributes.insert("title");
allowed_attributes.insert("align");
allowed_attributes.insert("src");
let output = clean_html(html, allowed_attributes);
if proxy_images {
output.replace(
"src=\"http",
"loading=\"lazy\" src=\"/api/v1/util/proxy?url=http",
)
} else {
output
}
}
fn parse_alignment_line(line: &str, output: &mut String, buffer: &mut String, is_in_pre: bool) {
if is_in_pre {
output.push_str(&format!("{line}\n"));
return;
}
let mut is_alignment_waiting: bool = false;
let mut alignment_center: bool = false;
let mut has_dash: bool = false;
let mut escape: bool = false;
for char in line.chars() {
if alignment_center && char != '-' {
// last char was <, but we didn't receive a hyphen directly after
alignment_center = false;
buffer.push('<');
}
if has_dash && char != '>' {
// the last char was -, meaning we need to flip has_dash and push the char since we haven't used it
has_dash = false;
buffer.push('-');
}
match char {
'\\' => {
escape = true;
continue;
}
'-' => {
if escape {
buffer.push(char);
escape = false;
continue;
}
if alignment_center && is_alignment_waiting {
// this means the previous element was <, so we're wrapping up alignment now
alignment_center = false;
is_alignment_waiting = false;
output.push_str(&format!("<align class=\"center\">{buffer}</align>"));
buffer.clear();
continue;
}
has_dash = true;
if !is_alignment_waiting {
// we need to go ahead and push/clear the buffer so we don't capture the stuff that came before this
// this only needs to be done on the first of these for a single alignment block
output.push_str(&buffer);
buffer.clear();
}
}
'<' => {
if escape {
buffer.push(char);
escape = false;
continue;
}
alignment_center = true;
continue;
}
'>' => {
if escape {
buffer.push(char);
escape = false;
continue;
}
if has_dash {
has_dash = false;
// if we're already waiting for aligmment, this means this is the SECOND aligner arrow
if is_alignment_waiting {
is_alignment_waiting = false;
output.push_str(&format!("<align class=\"right\">{buffer}</align>"));
buffer.clear();
continue;
}
// we're now waiting for the next aligner
is_alignment_waiting = true;
continue;
} else {
buffer.push('>');
}
}
_ => buffer.push(char),
}
escape = false;
}
output.push_str(&format!("{buffer}\n"));
buffer.clear();
}
pub fn parse_alignment(input: &str) -> String {
let lines = input.split("\n");
let mut is_in_pre: bool = false;
let mut output = String::new();
let mut buffer = String::new();
for line in lines {
if line.starts_with("```") {
is_in_pre = !is_in_pre;
output.push_str(&format!("{line}\n"));
} else {
parse_alignment_line(line, &mut output, &mut buffer, is_in_pre)
}
}
output.push_str(&buffer);
output
}
pub fn parse_backslash_breaks(input: &str) -> String {
let mut in_pre_block = false;
let mut output = String::new();
for line in input.split("\n") {
if line.starts_with("```") {
in_pre_block = !in_pre_block;
output.push_str(&format!("{line}\n"));
continue;
}
if in_pre_block {
output.push_str(&format!("{line}\n"));
continue;
}
if line.trim_end().ends_with("\\") {
output.push_str(&format!("{line}<br />\n"));
} else {
output.push_str(&format!("{line}\n"));
}
}
output
}
/// Adapted from <https://git.cypr.io/oz/autolink-rust>.
///
/// The only real change here is that autolinks require a whitespace OR end the
/// end of the pattern to match here.
pub fn autolinks(input: &str) -> String {
if input.len() == 0 {
return String::new();
}
let pattern = regex::Regex::new(
r"(?ix)\b(([\w-]+://?|www[.])[^\s()<>]+(?:\([\w\d]+\)|([^[:punct:]\s]|/)))(\s|$)",
)
.unwrap();
pattern
.replace_all(input, "<a href=\"$0\">$0</a> ")
.to_string()
}