bberry/src/core/parser.rs
2025-05-31 12:42:23 -04:00

242 lines
7 KiB
Rust

use super::element::Element;
/// Parse a string input. Modifies the given `buffer` IN PLACE.
pub fn string_parser(buf: impl Iterator<Item = char>, buffer: &mut String) {
let mut is_parsing_string: bool = false;
let mut escape: bool = false;
for char in buf {
// string parser
if is_parsing_string {
if escape {
buffer.push(char);
escape = false;
continue;
}
if char == '\\' {
escape = true;
continue;
} else if char == '"' {
break;
}
buffer.push(char);
} else if char == ' ' {
continue;
}
if char == '"' {
// start string parser
is_parsing_string = true;
}
}
}
/// Parse an attribute block.
///
/// # Returns
/// `(key, value)`
pub fn attr_parser(buf: &str) -> (String, String) {
let mut key: String = String::new();
let mut value: String = String::new();
let mut buffer: String = String::new();
let mut finished_key: bool = false;
for _ in 0..2 {
string_parser(
if finished_key {
// skip quote + key + quote chars to get to AFTER the first string
buf.chars().skip(key.len() + 2)
} else {
// we haven't done the key yet, so let's just select from the start
buf.chars().skip(0)
},
&mut buffer,
);
// at this point, `buffer` should contain a string since `string_parser` is blocking
// until it fully parses a string
if finished_key {
value = buffer.clone();
} else {
key = buffer.clone();
finished_key = true;
}
buffer.clear();
}
(key, value)
}
/// A parser for parsing an element expression. The element parser **only** parsers
/// element statements (stuff in parenthesis).
pub fn expr_parser(buf: &str) -> Element {
let mut element: Element = Element::default();
// state
let mut skip_for: usize = 0;
let mut finished_parsing_tag: bool = false;
for (i, char) in buf.chars().enumerate() {
if skip_for > 0 {
// skip char
skip_for -= 1;
continue;
}
// parse tag first as it is the first thing
if !finished_parsing_tag {
if (char == ' ') | (char == '\n') | (char == '"') {
finished_parsing_tag = true;
continue;
}
if char != '(' {
element.tag.push(char);
}
continue;
}
// special elements
if (element.tag == "attr") | (element.tag == ":") | (element.tag.is_empty()) {
let mut chars = (&buf[i..buf.len()]).to_string();
if element.tag.is_empty() {
chars.insert_str(0, "\"");
}
// parse
let (k, v) = attr_parser(&chars);
element.attrs.insert(k, v);
element.tag = "attr".to_string();
return element;
} else if (element.tag == "text") | (element.tag.ends_with("'")) {
let mut buffer: String = String::new();
string_parser((&buf[i..buf.len()]).chars(), &mut buffer);
if element.tag != "text" {
// allows us to write `(h1' "Hello, world!")` instead of `(h1 [(text "Hello, world!")])`
element.tag = element.tag.replace("'", "");
let mut text_element = Element::default();
text_element.tag = "text".to_string();
text_element.attrs.insert("content".to_string(), buffer);
element.children.push(text_element);
} else {
// just add attr
element.attrs.insert("content".to_string(), buffer);
}
return element;
}
// everything else
if char == '(' {
// parse from here to the end (including the paren)
let (element_, used) = element_parser(&buf[i..buf.len()]);
skip_for = used;
if element_.tag == "attr" {
// element just holds attributes... merge with list
for attr in element_.attrs {
element.attrs.insert(attr.0, attr.1);
}
} else {
// element is a child
element.children.push(element_);
}
}
}
// ...
element
}
/// Parse a single element.
///
/// # Returns
/// `(element, number of characters consumed)`
pub fn element_parser(value: &str) -> (Element, usize) {
let mut element: Element = Element::default();
let mut consumed: usize = 0;
// parser state
let mut scope: i32 = 0; // the scope tells us when we can stop parsing junk...
// basically, we stop parsing junk when the scope reaches the same scope we started
// with when we began parsing junk
let mut return_to_scope: i32 = 0;
let mut buffer: String = String::new();
let mut building_buffer_so_ignore_junk: bool = false;
let mut comment: bool = false;
for char in value.chars() {
if comment {
if char == '\n' {
comment = false;
}
continue;
}
if building_buffer_so_ignore_junk {
// everything here can be assumed to be junk to be parsed later
if char == ')' {
// if we're back at the scope we started when we first opened the block,
// don't stop parsing as junk
scope -= 1;
if scope == return_to_scope {
// building_buffer_so_ignore_junk = false;
// parse actual stuff
consumed += buffer.len();
element = expr_parser(&buffer);
// reset the buffer and return first element IMMEDIATELY
// this allows expr_parser to continue parsing the next
// expression by skipping how many characters we parsed here
buffer.clear();
return (element, consumed);
} else {
buffer.push(char);
}
} else {
if char == '(' {
scope += 1;
}
buffer.push(char);
}
continue;
}
// everything here happens ONLY IF we aren't parsing any junk yet
if char == ' ' {
continue;
}
if char == '(' {
// tell the parser that we're building a buffer to recursively parse,
// so everything can currently be ignored
scope += 1;
if !building_buffer_so_ignore_junk {
return_to_scope = scope - 1;
building_buffer_so_ignore_junk = true;
}
}
}
// ...
(element, consumed)
}
/// Parse a full document.
pub fn document(value: &str) -> Element {
element_parser(&format!("(null? [{value}])")).0
}