use super::element::Element; /// Parse a string input. Modifies the given `buffer` IN PLACE. pub fn string_parser(buf: impl Iterator, buffer: &mut String) { let mut is_parsing_string: bool = false; let mut escape: bool = false; for char in buf { // string parser if is_parsing_string { if escape { buffer.push(char); escape = false; continue; } if char == '\\' { escape = true; continue; } else if char == '"' { break; } buffer.push(char); } else if char == ' ' { continue; } if char == '"' { // start string parser is_parsing_string = true; } } } /// Parse an attribute block. /// /// # Returns /// `(key, value)` pub fn attr_parser(buf: &str) -> (String, String) { let mut key: String = String::new(); let mut value: String = String::new(); let mut buffer: String = String::new(); let mut finished_key: bool = false; for _ in 0..2 { string_parser( if finished_key { // skip quote + key + quote chars to get to AFTER the first string buf.chars().skip(key.len() + 2) } else { // we haven't done the key yet, so let's just select from the start buf.chars().skip(0) }, &mut buffer, ); // at this point, `buffer` should contain a string since `string_parser` is blocking // until it fully parses a string if finished_key { value = buffer.clone(); } else { key = buffer.clone(); finished_key = true; } buffer.clear(); } (key, value) } /// A parser for parsing an element expression. The element parser **only** parsers /// element statements (stuff in parenthesis). pub fn expr_parser(buf: &str) -> Element { let mut element: Element = Element::default(); // state let mut skip_for: usize = 0; let mut finished_parsing_tag: bool = false; for (i, char) in buf.chars().enumerate() { if skip_for > 0 { // skip char skip_for -= 1; continue; } // parse tag first as it is the first thing if !finished_parsing_tag { if (char == ' ') | (char == '\n') | (char == '"') { finished_parsing_tag = true; continue; } if char != '(' { element.tag.push(char); } continue; } // special elements if element.tag == "#" { // parse as tuple // tuples can only contain strings let len = buf.matches("s\"").collect::>().len(); let mut values: Vec = Vec::new(); let mut last_len: usize = 0; for _ in 0..len { let mut buffer: String = String::new(); string_parser( buf.replace("# ", "") .replace("s\"", "\"") .chars() .skip(last_len), &mut buffer, ); last_len = buffer.len() + 2; values.push(buffer); } for (i, v) in values.iter().enumerate() { element.attrs.insert(i.to_string(), v.to_owned()); } return element; } else if (element.tag == "attr") | (element.tag == ":") | (element.tag.is_empty()) { let mut chars = (&buf[i..buf.len()]).to_string(); if element.tag.is_empty() { chars.insert_str(0, "\""); } // parse let (k, v) = attr_parser(&chars); element.attrs.insert(k, v); element.tag = "attr".to_string(); return element; } else if (element.tag == "text") | (element.tag.ends_with("'")) { let mut buffer: String = String::new(); string_parser((&buf[i..buf.len()]).chars(), &mut buffer); if element.tag != "text" { // allows us to write `(h1' "Hello, world!")` instead of `(h1 [(text "Hello, world!")])` element.tag = element.tag.replace("'", ""); let mut text_element = Element::default(); text_element.tag = "text".to_string(); text_element.attrs.insert("content".to_string(), buffer); element.children.push(text_element); } else { // just add attr element.attrs.insert("content".to_string(), buffer); } return element; } // everything else if char == '(' { // parse from here to the end (including the paren) let (element_, used) = element_parser(&buf[i..buf.len()]); skip_for = used; if element_.tag == "attr" { // element just holds attributes... merge with list for attr in element_.attrs { element.attrs.insert(attr.0, attr.1); } } else { // element is a child element.children.push(element_); } } } // ... element } /// Parse a single element. /// /// # Returns /// `(element, number of characters consumed)` pub fn element_parser(value: &str) -> (Element, usize) { let mut element: Element = Element::default(); let mut consumed: usize = 0; // parser state let mut scope: i32 = 0; // the scope tells us when we can stop parsing junk... // basically, we stop parsing junk when the scope reaches the same scope we started // with when we began parsing junk let mut return_to_scope: i32 = 0; let mut buffer: String = String::new(); let mut building_buffer_so_ignore_junk: bool = false; let mut comment: bool = false; for char in value.chars() { if comment { if char == '\n' { comment = false; } continue; } if building_buffer_so_ignore_junk { // everything here can be assumed to be junk to be parsed later if char == ')' { // if we're back at the scope we started when we first opened the block, // don't stop parsing as junk scope -= 1; if scope == return_to_scope { // building_buffer_so_ignore_junk = false; // parse actual stuff consumed += buffer.len(); element = expr_parser(&buffer); // reset the buffer and return first element IMMEDIATELY // this allows expr_parser to continue parsing the next // expression by skipping how many characters we parsed here buffer.clear(); return (element, consumed); } else { buffer.push(char); } } else { if char == '(' { scope += 1; } buffer.push(char); } continue; } // everything here happens ONLY IF we aren't parsing any junk yet if char == ' ' { continue; } if char == '(' { // tell the parser that we're building a buffer to recursively parse, // so everything can currently be ignored scope += 1; if !building_buffer_so_ignore_junk { return_to_scope = scope - 1; building_buffer_so_ignore_junk = true; } } } // ... (element, consumed) } /// Parse a full document. pub fn document(value: &str) -> (Element, usize) { element_parser(&format!("(null? [{value}])")) }