tetratto/crates/shared/src/markdown.rs

202 lines
6.1 KiB
Rust
Raw Normal View History

use ammonia::Builder;
2025-07-20 15:28:44 -04:00
use pulldown_cmark::{Parser, Options, html::push_html};
use std::collections::HashSet;
2025-07-21 22:29:16 -04:00
pub fn render_markdown_dirty(input: &str) -> String {
2025-07-20 16:18:56 -04:00
let input = &autolinks(&parse_alignment(input));
2025-07-20 15:28:44 -04:00
let mut options = Options::empty();
options.insert(Options::ENABLE_STRIKETHROUGH);
options.insert(Options::ENABLE_GFM);
options.insert(Options::ENABLE_FOOTNOTES);
options.insert(Options::ENABLE_TABLES);
options.insert(Options::ENABLE_HEADING_ATTRIBUTES);
options.insert(Options::ENABLE_SUBSCRIPT);
options.insert(Options::ENABLE_SUPERSCRIPT);
let parser = Parser::new_ext(input, options);
let mut html = String::new();
push_html(&mut html, parser);
2025-07-21 22:29:16 -04:00
html
}
2025-07-21 22:29:16 -04:00
pub fn clean_html(html: String, allowed_attributes: HashSet<&str>) -> String {
Builder::default()
.generic_attributes(allowed_attributes)
2025-04-26 21:12:29 -04:00
.add_tags(&[
"video", "source", "img", "b", "span", "p", "i", "strong", "em", "a", "align",
2025-04-26 21:12:29 -04:00
])
.rm_tags(&["script", "style", "link", "canvas"])
.add_tag_attributes("a", &["href", "target"])
2025-07-08 13:35:23 -04:00
.add_url_schemes(&["atto"])
.clean(&html)
.to_string()
2025-07-21 22:29:16 -04:00
.replace("<video loading=", "<video controls loading=")
}
/// Render markdown input into HTML
pub fn render_markdown(input: &str, proxy_images: bool) -> String {
let html = render_markdown_dirty(input);
let mut allowed_attributes = HashSet::new();
allowed_attributes.insert("id");
allowed_attributes.insert("class");
allowed_attributes.insert("ref");
allowed_attributes.insert("aria-label");
allowed_attributes.insert("lang");
allowed_attributes.insert("title");
allowed_attributes.insert("align");
allowed_attributes.insert("src");
let output = clean_html(html, allowed_attributes);
2025-07-20 15:28:44 -04:00
if proxy_images {
output.replace(
"src=\"http",
"loading=\"lazy\" src=\"/api/v1/util/proxy?url=http",
)
2025-07-20 15:28:44 -04:00
} else {
output
}
}
fn parse_alignment_line(line: &str, output: &mut String, buffer: &mut String, is_in_pre: bool) {
if is_in_pre {
output.push_str(&format!("{line}\n"));
return;
}
let mut is_alignment_waiting: bool = false;
let mut alignment_center: bool = false;
let mut has_dash: bool = false;
let mut escape: bool = false;
for char in line.chars() {
if alignment_center && char != '-' {
// last char was <, but we didn't receive a hyphen directly after
alignment_center = false;
buffer.push('<');
}
if has_dash && char != '>' {
// the last char was -, meaning we need to flip has_dash and push the char since we haven't used it
has_dash = false;
buffer.push('-');
}
match char {
'\\' => {
escape = true;
continue;
}
'-' => {
if escape {
buffer.push(char);
escape = false;
continue;
}
if alignment_center && is_alignment_waiting {
// this means the previous element was <, so we're wrapping up alignment now
alignment_center = false;
is_alignment_waiting = false;
output.push_str(&format!("<align class=\"center\">{buffer}</align>"));
buffer.clear();
continue;
}
has_dash = true;
if !is_alignment_waiting {
// we need to go ahead and push/clear the buffer so we don't capture the stuff that came before this
// this only needs to be done on the first of these for a single alignment block
2025-07-20 16:18:56 -04:00
output.push_str(&buffer);
buffer.clear();
}
}
'<' => {
if escape {
buffer.push(char);
escape = false;
continue;
}
alignment_center = true;
continue;
}
'>' => {
if escape {
buffer.push(char);
escape = false;
continue;
}
if has_dash {
has_dash = false;
// if we're already waiting for aligmment, this means this is the SECOND aligner arrow
if is_alignment_waiting {
is_alignment_waiting = false;
output.push_str(&format!("<align class=\"right\">{buffer}</align>"));
buffer.clear();
continue;
}
// we're now waiting for the next aligner
is_alignment_waiting = true;
continue;
} else {
buffer.push('>');
}
}
_ => buffer.push(char),
}
escape = false;
}
output.push_str(&format!("{buffer}\n"));
buffer.clear();
}
pub fn parse_alignment(input: &str) -> String {
let lines = input.split("\n");
let mut is_in_pre: bool = false;
let mut output = String::new();
let mut buffer = String::new();
for line in lines {
2025-07-20 20:19:33 -04:00
if line.starts_with("```") {
is_in_pre = !is_in_pre;
output.push_str(&format!("{line}\n"));
} else {
parse_alignment_line(line, &mut output, &mut buffer, is_in_pre)
}
}
output.push_str(&buffer);
output
}
2025-07-20 16:18:56 -04:00
/// Adapted from <https://git.cypr.io/oz/autolink-rust>.
2025-07-20 16:41:50 -04:00
///
/// The only real change here is that autolinks require a whitespace OR end the
/// end of the pattern to match here.
2025-07-20 16:18:56 -04:00
pub fn autolinks(input: &str) -> String {
if input.len() == 0 {
return String::new();
}
let pattern = regex::Regex::new(
2025-07-20 16:41:50 -04:00
r"(?ix)\b(([\w-]+://?|www[.])[^\s()<>]+(?:\([\w\d]+\)|([^[:punct:]\s]|/)))(\s|$)",
2025-07-20 16:18:56 -04:00
)
.unwrap();
pattern
2025-07-20 16:41:50 -04:00
.replace_all(input, "<a href=\"$0\">$0</a> ")
2025-07-20 16:18:56 -04:00
.to_string()
}