malachite/src/markdown.rs
2025-08-20 00:26:44 -04:00

1042 lines
30 KiB
Rust

use std::collections::HashSet;
pub fn render_markdown(input: &str) -> String {
let html = tetratto_shared::markdown::render_markdown_dirty(&parse_page(&parse_details(
&parse_text_color(&parse_highlight(&parse_link(&parse_image(
&parse_image_size(&parse_toc(&parse_underline(&parse_markdown_element(
&parse_comment(&input.replace("[/]", "<br />")),
)))),
)))),
)))
.replace("$per", "%");
let mut allowed_attributes = HashSet::new();
allowed_attributes.insert("id");
allowed_attributes.insert("class");
allowed_attributes.insert("ref");
allowed_attributes.insert("aria-label");
allowed_attributes.insert("lang");
allowed_attributes.insert("title");
allowed_attributes.insert("align");
allowed_attributes.insert("src");
allowed_attributes.insert("style");
allowed_attributes.insert("controls");
allowed_attributes.insert("autoplay");
allowed_attributes.insert("loop");
tetratto_shared::markdown::clean_html(
html.replace("<style>", "<span>:temp_style")
.replace("</style>", "</span>:temp_style")
.replace("<audio", ":temp_audio<span")
.replace("</audio>", "</span>:temp_audio"),
allowed_attributes,
)
.replace("<span>:temp_style", "<style>")
.replace("</span>:temp_style", "</style>")
.replace(":temp_audio<span", "<audio")
.replace("</span>:temp_audio", "</audio>")
}
pub(crate) fn is_numeric(value: &str) -> bool {
let mut is_numeric = false;
for char in value.chars() {
is_numeric = char.is_numeric();
}
is_numeric
}
pub(crate) fn slice(x: &str, range: core::ops::RangeFrom<usize>) -> String {
(&x.chars().collect::<Vec<char>>()[range])
.iter()
.collect::<String>()
}
fn parse_text_color_line(output: &mut String, buffer: &mut String, line: &str) {
let mut in_color_buffer = false;
let mut in_main_buffer = false;
let mut color_buffer = String::new();
let mut close_1 = false;
for (i, char) in line.chars().enumerate() {
if close_1 && char != '%' {
// we expected to see another percentage to close the main buffer,
// not getting that means this wasn't meant to be a color
buffer.push('%');
in_main_buffer = false;
close_1 = false;
}
match char {
'%' => {
if in_color_buffer {
in_color_buffer = false;
in_main_buffer = true;
continue;
}
if in_main_buffer {
// ending
if !close_1 {
close_1 = true;
continue;
}
// by this point, we have: !
// %color_buffer%main_buffer%%
output.push_str(&format!(
"<span style=\"color: {color_buffer}\" class=\"color_block\">{buffer}</span>"
));
color_buffer.clear();
buffer.clear();
// ...
in_main_buffer = false;
close_1 = false;
continue;
}
// start
// scan ahead
let ahead = slice(line, i..);
if !ahead.contains("%%") {
// no closing sequence, we're done
buffer.push(char);
continue;
}
// flush buffer
output.push_str(&buffer);
buffer.clear();
// toggle open
in_color_buffer = true;
}
' ' => {
if in_color_buffer == true {
buffer.push_str(&color_buffer);
color_buffer.clear();
}
buffer.push(char);
}
_ => {
if in_color_buffer {
color_buffer.push(char)
} else {
buffer.push(char)
}
}
}
}
}
fn parse_highlight_line(output: &mut String, buffer: &mut String, line: &str) {
let mut open_1 = false;
let mut open_2 = false;
let mut close_1 = false;
let mut is_open = false;
for char in line.chars() {
if close_1 && char != '=' {
buffer.push('=');
close_1 = false;
}
if open_1 && char != '=' {
buffer.push('=');
open_1 = false;
is_open = false;
}
match char {
'=' => {
if !is_open {
// flush buffer
output.push_str(&buffer);
buffer.clear();
// toggle open
open_1 = true;
is_open = true;
} else {
if open_1 {
// this is the second open we've recieved
open_2 = true;
open_1 = false;
continue;
}
if close_1 {
// this is the second close we've received
output.push_str(&format!("<mark>{buffer}</mark>\n"));
buffer.clear();
open_1 = false;
open_2 = false;
close_1 = false;
is_open = false;
continue;
}
close_1 = true;
}
}
_ => {
if open_1 {
open_1 = false;
buffer.push('=');
}
if open_2 && is_open {
open_2 = false;
}
buffer.push(char);
}
}
}
}
fn parse_underline_line(output: &mut String, buffer: &mut String, line: &str) {
let mut open_1 = false;
let mut is_open = false;
let mut close_1 = false;
for char in line.chars() {
if open_1 && char != '~' {
is_open = false;
open_1 = false;
if char == '[' {
// image
buffer.push('!');
} else {
buffer.push_str("&excl;");
}
}
if close_1 && char != '!' {
is_open = false;
close_1 = false;
buffer.push('~');
}
match char {
'~' => {
if open_1 {
open_1 = false;
is_open = true;
} else if is_open {
// open close
close_1 = true;
}
}
'!' => {
if close_1 {
// close
let mut s: Vec<&str> = buffer.split(";").collect();
let text = s.pop().unwrap_or(&"").trim();
let mut style = String::new();
for (i, mut x) in s.iter().enumerate() {
if i == 0 {
// color
if x == &"default" {
x = &"currentColor";
}
style.push_str(&format!("text-decoration-color: {x};"));
} else if i == 1 {
// style
if x == &"default" {
x = &"solid";
}
style.push_str(&format!("text-decoration-style: {x};"));
} else if i == 2 {
// line
if x == &"default" {
x = &"underline";
}
style.push_str(&format!("text-decoration-line: {x};"));
} else if i == 3 {
// thickness
if x == &"default" {
x = &"1px";
}
style.push_str(&format!("text-decoration-thickness: {x}px;"));
}
}
// defaults
if s.get(1).is_none() {
style.push_str(&format!("text-decoration-style: solid;"));
}
if s.get(2).is_none() {
style.push_str(&format!("text-decoration-line: underline;"));
}
if s.get(3).is_none() {
style.push_str(&format!("text-decoration-thickness: 1px;"));
}
// ...
output.push_str(&format!("<span style=\"{style}\">{text}</span>"));
buffer.clear();
open_1 = false;
is_open = false;
close_1 = false;
continue;
} else if is_open {
buffer.push(char);
continue;
}
// open
open_1 = true;
// flush buffer
output.push_str(&buffer);
buffer.clear();
}
_ => buffer.push(char),
}
}
}
fn parse_comment_line(output: &mut String, _: &mut String, line: &str) {
if line.contains("]:") && line.starts_with("[") {
return;
}
if line == "[..]" {
output.push_str(" ");
return;
}
output.push_str(line);
}
fn parse_image_size_line(output: &mut String, buffer: &mut String, line: &str) {
let mut image_possible = false;
let mut in_image = false;
let mut in_size = false;
let mut in_size_rhs = false;
let mut size_lhs = String::new();
let mut size_rhs = String::new();
if !line.contains("{") {
output.push_str(line);
return;
}
for char in line.chars() {
if image_possible && char != '[' {
image_possible = false;
output.push('!');
}
match char {
'[' => {
if image_possible {
in_image = true;
image_possible = false;
continue;
}
if in_image {
buffer.push(char);
} else {
output.push(char);
}
}
'{' => {
if in_image {
in_size = true;
continue;
}
if in_image {
buffer.push(char);
} else {
output.push(char);
}
}
':' => {
if in_size {
in_size_rhs = true;
continue;
}
if in_image {
buffer.push(char);
} else {
output.push(char);
}
}
'}' => {
if in_size && in_size_rhs {
// end
output.push_str(&format!(
"<span style=\"width: {}; height: {}; float: {}\" class=\"img_sizer\">![{buffer}</span>",
if is_numeric(&size_lhs) {
format!("{size_lhs}px")
} else {
size_lhs
},
if is_numeric(&size_rhs) {
format!("{size_rhs}px")
} else {
size_rhs
},
if buffer.ends_with("#left)") {
"left"
} else if buffer.ends_with("#right)") {
"right"
} else {
"unset"
}
));
size_lhs = String::new();
size_rhs = String::new();
in_image = false;
in_size = false;
in_size_rhs = false;
image_possible = false;
buffer.clear();
continue;
}
if in_image {
buffer.push(char);
} else {
output.push(char);
}
}
'!' => {
// flush buffer
output.push_str(&buffer);
buffer.clear();
// ...
image_possible = true
}
_ => {
if in_image {
if in_size {
if in_size_rhs {
size_rhs.push(char);
} else {
size_lhs.push(char);
}
} else {
buffer.push(char);
}
} else {
output.push(char)
}
}
}
}
}
fn parse_image_line(output: &mut String, buffer: &mut String, line: &str) {
let mut image_possible = false;
let mut in_image = false;
let mut in_alt = false;
let mut in_src = false;
let mut alt = String::new();
for char in line.chars() {
if image_possible && char != '[' {
image_possible = false;
output.push('!');
}
match char {
'[' => {
if image_possible {
in_image = true;
image_possible = false;
in_alt = true;
continue;
}
if in_image {
buffer.push(char);
} else {
output.push(char);
}
}
']' => {
if in_alt {
in_alt = false;
in_src = true;
continue;
}
output.push(char);
}
'(' => {
if in_src {
continue;
}
if in_image {
buffer.push(char);
} else {
output.push(char);
}
}
')' => {
if in_image {
// end
output.push_str(&format!(
"<img loading=\"lazy\" alt=\"{alt}\" src=\"{}\" style=\"float: {}\" />",
buffer.replace(" ", "$per20"),
if buffer.ends_with("#left") {
"left"
} else if buffer.ends_with("#right") {
"right"
} else {
"unset"
}
));
alt = String::new();
in_alt = false;
in_src = false;
in_image = false;
image_possible = false;
buffer.clear();
continue;
}
output.push(char);
}
'!' => {
// flush buffer
output.push_str(&buffer);
buffer.clear();
// ...
image_possible = true;
}
_ => {
if in_image {
if in_alt {
alt.push(char)
} else {
buffer.push(char);
}
} else {
output.push(char)
}
}
}
}
}
fn parse_link_line(output: &mut String, buffer: &mut String, line: &str) {
let mut in_link = false;
let mut in_text = false;
let mut in_src = false;
let mut text = String::new();
for (i, char) in line.chars().enumerate() {
match char {
'[' => {
// flush buffer
output.push_str(&buffer);
buffer.clear();
// scan for closing, otherwise quit
let haystack = slice(line, i..);
if !haystack.contains("]") {
output.push('[');
continue;
}
// ...
in_link = true;
in_text = true;
}
']' => {
if in_text {
in_text = false;
in_src = true;
continue;
}
output.push(char);
}
'(' => {
if in_src {
continue;
}
if in_link {
buffer.push(char);
} else {
output.push(char);
}
}
')' => {
if in_link {
// end
output.push_str(&format!(
"<a href=\"{buffer}\" rel=\"noopener noreferrer\">{text}</a>"
));
text = String::new();
in_text = false;
in_src = false;
in_link = false;
buffer.clear();
continue;
}
output.push(char);
}
_ => {
if in_link {
if in_text {
text.push(char)
} else {
buffer.push(char);
}
} else {
output.push(char)
}
}
}
}
}
/// Helper macro to quickly allow parsers to ignore fenced code blocks.
macro_rules! parser_ignores_pre {
($body:ident, $input:ident) => {{
let mut in_pre_block = false;
let mut output = String::new();
let mut buffer = String::new();
for line in $input.split("\n") {
if line.starts_with("```") | (line == "<style>") | (line == "</style>") {
in_pre_block = !in_pre_block;
output.push_str(&format!("{line}\n"));
continue;
}
if in_pre_block {
output.push_str(&format!("{line}\n"));
continue;
}
$body(&mut output, &mut buffer, line);
output.push_str(&format!("{buffer}\n"));
buffer.clear();
}
output
}};
($body:ident, $input:ident, $id:literal, ..) => {{
let mut in_pre_block = false;
let mut output = String::new();
let mut buffer = String::new();
let mut proc_str = String::new();
let mut pre_blocks = Vec::new();
let mut pre_idx = 0;
for line in $input.split("\n") {
if line.starts_with("```") {
in_pre_block = !in_pre_block;
pre_idx += 1;
pre_blocks.push(String::new());
pre_blocks[pre_idx - 1] += &(line.to_string() + "\n");
proc_str += &format!("$pre:{}.{pre_idx}\n", $id);
continue;
}
if in_pre_block {
pre_blocks[pre_idx - 1] += &(line.to_string() + "\n");
continue;
}
proc_str += &(line.to_string() + "\n");
}
$body(&mut output, &mut buffer, &proc_str);
output.push_str(&format!("{buffer}\n"));
buffer.clear();
for (mut i, block) in pre_blocks.iter().enumerate() {
i += 1;
if block == "```\n" {
output = output.replacen(&format!("$pre:{}.{i}", $id), "", 1);
continue;
}
output = output.replacen(&format!("$pre:{}.{i}", $id), &format!("{block}```\n"), 1);
}
output
}};
}
pub fn parse_text_color(input: &str) -> String {
parser_ignores_pre!(parse_text_color_line, input, 0, ..)
}
pub fn parse_highlight(input: &str) -> String {
parser_ignores_pre!(parse_highlight_line, input, 1, ..)
}
pub fn parse_underline(input: &str) -> String {
parser_ignores_pre!(parse_underline_line, input, 2, ..)
}
pub fn parse_comment(input: &str) -> String {
parser_ignores_pre!(parse_comment_line, input)
}
pub fn parse_image_size(input: &str) -> String {
parser_ignores_pre!(parse_image_size_line, input)
}
pub fn parse_image(input: &str) -> String {
parser_ignores_pre!(parse_image_line, input)
}
pub fn parse_link(input: &str) -> String {
parser_ignores_pre!(parse_link_line, input)
}
/// Match page definitions.
///
/// Each page is denoted with two at symbols, followed by the name of the page.
/// The page can also have an optional second argument (separated by a semicolon)
/// which accepts the "visible" value; marking the page as visible by default.
///
/// To close a page (after you're done with the page's content), just put two
/// at symbols with nothing else on the line.
///
/// You're able to put content AFTER the page closing line. This allows you to have
/// persistant content which is shared between every page. Only content within pages
/// is hidden when navigating to another page. This means everything in the entry
/// that isn't part of a page will remian throughout navigations.
///
/// # Example
/// ```md
/// @@ home; visible
/// this is the homepage which is shown by default!
/// @@
///
/// @@ about
/// this is the about page which is NOT shown by default! a link with an href of "#/about" will open this page
/// @@
/// ```
pub fn parse_page(input: &str) -> String {
let mut output = String::new();
let mut buffer = String::new();
let mut page_id = String::new();
let mut start_shown = false;
let mut in_page = false;
let mut in_pre = false;
for line in input.split("\n") {
if line.starts_with("```") || line.starts_with("<style>") || line.starts_with("</style>") {
in_pre = !in_pre;
if in_page {
buffer.push_str(&format!("{line}\n"));
} else {
output.push_str(&format!("{line}\n"));
}
continue;
}
if in_pre {
if in_page {
buffer.push_str(&format!("{line}\n"));
} else {
output.push_str(&format!("{line}\n"));
}
continue;
}
// not in pre
if line == "@@" {
// ending block
if in_page {
output.push_str(&format!(
"<div id=\"#/{page_id}\" class=\"{}subpage no_p_margin fadein\">\n{}\n</div>",
if !start_shown { "hidden " } else { "" },
render_markdown(&buffer) // recurse to render markdown since the renderer is ignoring the div content :/
));
start_shown = false;
in_page = false;
buffer.clear();
continue;
}
} else if line.starts_with("@@") {
if !in_page {
in_page = true;
let x = line.replace("@@", "").trim().to_string();
let id_parts: Vec<&str> = x.split(";").map(|x| x.trim()).collect();
page_id = id_parts[0].to_string();
if let Some(x) = id_parts.get(1) {
if *x == "visible" {
start_shown = true;
}
}
continue;
}
}
// otherwise
if in_page {
buffer.push_str(&format!("{line}\n"));
} else {
output.push_str(&format!("{line}\n"));
}
}
output
}
/// Parse the markdown syntax for the expandable `<details>` element.
///
/// Similar to the [`parse_page`] page definitions, details elements are denoted
/// with two ampersand symbols. The opening line should look like `&& [summary]`.
///
/// The block is closed with a line of exactly two ampersand symbols.
///
/// # Example
/// ```md
/// && other summary
/// this element starts closed, but can be expanded
/// &&
/// ```
pub fn parse_details(input: &str) -> String {
let mut output = String::new();
let mut buffer = String::new();
let mut summary = String::new();
let mut in_details = false;
let mut in_pre = false;
for line in input.split("\n") {
if line.starts_with("```") || line.starts_with("<style>") || line.starts_with("</style>") {
in_pre = !in_pre;
if in_details {
buffer.push_str(&format!("{line}\n"));
} else {
output.push_str(&format!("{line}\n"));
}
continue;
}
if in_pre {
if in_details {
buffer.push_str(&format!("{line}\n"));
} else {
output.push_str(&format!("{line}\n"));
}
continue;
}
// not in pre
if line == "&&" {
// ending block
if in_details {
output.push_str(&format!(
"<details><summary>{summary}</summary><div class=\"content\">{}</div></details>",
render_markdown(&buffer),
));
in_details = false;
buffer.clear();
continue;
}
} else if line.starts_with("&&") {
if !in_details {
in_details = true;
summary = line.replace("&&", "").trim().to_string();
continue;
}
}
// otherwise
if in_details {
buffer.push_str(&format!("{line}\n"));
} else {
output.push_str(&format!("{line}\n"));
}
}
output
}
fn underscore_chars(mut x: String, chars: &[&str]) -> String {
for y in chars {
x = x.replace(y, "_");
}
x
}
/// Get the list of headers needed for [`parse_toc`].
pub fn get_toc_list(input: &str) -> (String, String) {
let mut output = String::new();
let mut toc = String::new();
let mut in_pre = false;
let mut hc_offset: Option<usize> = None;
for line in input.split("\n") {
if line.starts_with("```") || line.starts_with("<style>") || line.starts_with("</style>") {
in_pre = !in_pre;
output.push_str(&format!("{line}\n"));
continue;
}
if in_pre {
output.push_str(&format!("{line}\n"));
continue;
}
// not in pre
if line.starts_with("#") {
// get heading count
let mut hc = 0;
let real_hc;
for x in line.chars() {
if x != '#' {
break;
}
hc += 1;
}
real_hc = hc.clone();
if hc_offset.is_none() {
if hc > 1 {
// offset this count to 1 so the list renders properly
hc_offset = Some(hc - 1);
hc = 1;
} else {
hc_offset = Some(0);
}
} else if let Some(offset) = hc_offset {
hc -= offset;
}
// add heading with id
let x = line.replacen(&"#".repeat(real_hc), "", 1);
let htext = x.trim();
let id = underscore_chars(
htext.to_lowercase(),
&[" ", "(", ")", "[", "]", "{", "}", ":", "?", "#", "&"],
);
output.push_str(&format!(
"<h{real_hc} id=\"{id}\">{}</h{real_hc}>\n\n",
render_markdown(&htext)
));
// add heading to toc
toc += &format!("{}- <a href=\"#{id}\">{htext}</a>\n", " ".repeat(hc));
// ...
continue;
}
// otherwise
output.push_str(&format!("{line}\n"));
}
(toc, output)
}
/// Parse the `[toc]` table-of-contents syntax.
pub fn parse_toc(input: &str) -> String {
let (toc_list, new_input) = get_toc_list(input);
let mut output = String::new();
let mut in_pre = false;
for line in new_input.split("\n") {
if line.starts_with("```") || line.starts_with("<style>") || line.starts_with("</style>") {
in_pre = !in_pre;
output.push_str(&format!("{line}\n"));
continue;
}
if in_pre {
output.push_str(&format!("{line}\n"));
continue;
}
// not in pre
if line.len() == 5 && line.to_lowercase() == "[toc]" {
// add toc
output.push_str(&format!("\n{toc_list}"));
continue;
}
// otherwise
output.push_str(&format!("{line}\n"));
}
output
}
/// Handle the `<markdown>` HTML element.
fn parse_markdown_element_line(output: &mut String, buffer: &mut String, line: &str) {
let mut in_markdown = false;
for char in line.chars() {
if buffer.ends_with("<markdown>") {
in_markdown = true;
output.push_str(&buffer.replace("<markdown>", ""));
buffer.clear();
} else if in_markdown && buffer.ends_with("</markdown>") {
in_markdown = false;
output.push_str(&render_markdown(&buffer.replace("</markdown>", "")));
buffer.clear();
}
buffer.push(char);
}
}
pub fn parse_markdown_element(input: &str) -> String {
parser_ignores_pre!(parse_markdown_element_line, input)
}