use crate::{ color::color, email::email, handle::handle, url::{url, Url}, }; use combine::{ attempt, between, choice, error::StreamError, look_ahead, many, many1, parser, satisfy, stream::StreamErrorFor, token, value, Parser, Stream, }; #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] pub enum Tag { Right, Center, Color, Quote, Code, Codeblock, Pre, Mono, Sub, Sup, S, Spoiler, Bold, Strong, I, Em, U, Smcaps, IconText, Icon, Hr, Url, } #[derive(Clone, Debug)] pub(crate) enum Node { TagNode { tag: Tag, attr: Option, children: Vec, }, UrlNode { url: Url, }, HandleNode { handle: String, domain: String, }, EmailNode { email: String, }, CharNode { text: char, }, NewlineNode, } #[derive(Clone, Debug, thiserror::Error)] #[error("Invalid tag: {0}")] struct TagError(String); #[derive(Clone, Debug, thiserror::Error)] #[error("Backing out due to found closing tag")] struct ClosingTagBackout; impl Tag { fn needs_closing(&self) -> bool { !matches!(self, Tag::Hr) } fn with_attribute(&self, attribute: &Option) -> Option { let attr = attribute.as_deref()?; match self { Tag::Color => color().parse(attr).ok().map(|color| color.0), Tag::Url => url().parse(attr).ok().map(|url| url.0.to_string()), _ => None, } } } fn tag_string() -> impl Parser where Input: Stream, { many1(satisfy(|c| c != ']')) } fn tag() -> impl Parser)> where Input: Stream, { tag_string().and_then(|full_tag| { let mut iter = full_tag.split('='); let tag_name = iter .next() .ok_or_else(|| StreamErrorFor::::other(TagError(full_tag.clone())))?; let tag = match tag_name { "right" => Tag::Right, "center" => Tag::Center, "quote" => Tag::Quote, "color" => Tag::Color, "code" => Tag::Code, "codeblock" => Tag::Codeblock, "pre" => Tag::Pre, "mono" => Tag::Mono, "sub" => Tag::Sub, "sup" => Tag::Sup, "s" => Tag::S, "spoiler" => Tag::Spoiler, "bold" => Tag::Bold, "strong" => Tag::Strong, "i" => Tag::I, "em" => Tag::Em, "u" => Tag::U, "smcaps" => Tag::Smcaps, "icontext" => Tag::IconText, "icon" => Tag::Icon, "hr" => Tag::Hr, "url" => Tag::Url, _ => { return Err(StreamErrorFor::::other(TagError( tag_name.to_owned(), ))) } }; let attribute = iter.next().map(|s| s.to_owned()); Ok((tag, attribute)) }) } fn closing_tag(tag: Tag) -> impl Parser where Input: Stream, { between(token('['), token(']'), tag_string()).and_then(move |closing_tag| { if closing_tag == format!("/{}", tag) { Ok(()) } else { Err(StreamErrorFor::::other(TagError(closing_tag))) } }) } fn openening_tag() -> impl Parser)> where Input: Stream, { between(token('['), token(']'), tag()) } fn url_inner(attr: Option) -> impl Parser where Input: Stream, { let tag = Tag::Url; if let Some(attr) = tag.with_attribute(&attr) { tag_body(tag, Some(attr)).left() } else { url_node() .map(move |node| Node::TagNode { tag, attr: None, children: vec![node], }) .skip(closing_tag(tag)) .right() } } fn icon_text_inner() -> impl Parser where Input: Stream, { handle_node() .skip(closing_tag(Tag::IconText)) .map(|node| Node::TagNode { tag: Tag::IconText, attr: None, children: vec![node], }) } fn icon_inner() -> impl Parser where Input: Stream, { handle_node() .skip(closing_tag(Tag::Icon)) .map(|node| Node::TagNode { tag: Tag::Icon, attr: None, children: vec![node], }) } fn tag_body(tag: Tag, attr: Option) -> impl Parser where Input: Stream, { node_vec(Some(tag)) .skip(closing_tag(tag)) .map(move |children| Node::TagNode { tag, attr: tag.with_attribute(&attr), children, }) } fn singleton_tag(tag: Tag) -> impl Parser where Input: Stream, { value(Node::TagNode { tag, attr: None, children: vec![], }) } fn tag_node() -> impl Parser where Input: Stream, { openening_tag().then(|(tag, attr)| { if tag.needs_closing() { match tag { Tag::Url => url_inner(attr).left().left().left(), Tag::IconText => icon_text_inner().right().left().left(), Tag::Icon => icon_inner().left().right().left(), _ => tag_body(tag, attr).right().right().left(), } } else { singleton_tag(tag).right() } }) } fn handle_node() -> impl Parser where Input: Stream, { handle().map(|handle| Node::HandleNode { handle: handle.handle, domain: handle.domain, }) } fn email_node() -> impl Parser where Input: Stream, { email().map(|email| Node::EmailNode { email }) } fn url_node() -> impl Parser where Input: Stream, { url().map(|url| Node::UrlNode { url }) } fn valid_char() -> impl Parser where Input: Stream, { satisfy(|c| c != '\n') } fn char_node(closing: Option) -> impl Parser where Input: Stream, { if let Some(tag) = closing { look_ahead(closing_tag(tag)) .map(|_| None) .or(valid_char().map(Some)) .and_then(|text: Option| { if let Some(text) = text { Ok(Node::CharNode { text }) } else { Err(StreamErrorFor::::other(ClosingTagBackout)) } }) .left() } else { valid_char().map(|text| Node::CharNode { text }).right() } } fn newline_node() -> impl Parser where Input: Stream, { many1(combine::parser::char::char('\n')).map(|_: String| Node::NewlineNode) } fn single_node(closing: Option) -> impl Parser where Input: Stream, { choice(( attempt(tag_node()), attempt(handle_node()), attempt(email_node()), attempt(url_node()), char_node(closing), newline_node(), )) } fn node_vec_(closing: Option) -> impl Parser> where Input: Stream, { many(single_node(closing)) } parser! { pub(crate) fn node_vec[Input](closing: Option)(Input) -> Vec where [Input: Stream] { node_vec_(*closing) } } impl std::fmt::Display for Tag { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { let s = match self { Tag::Right => "right", Tag::Center => "center", Tag::Quote => "quote", Tag::Color => "color", Tag::Code => "code", Tag::Codeblock => "codeblock", Tag::Pre => "pre", Tag::Mono => "mono", Tag::Sub => "sub", Tag::Sup => "sup", Tag::S => "s", Tag::Spoiler => "spoiler", Tag::Bold => "bold", Tag::Strong => "strong", Tag::I => "i", Tag::Em => "em", Tag::U => "u", Tag::Smcaps => "smcaps", Tag::IconText => "icontext", Tag::Icon => "icon", Tag::Hr => "hr", Tag::Url => "url", }; write!(f, "{}", s) } } #[cfg(test)] mod tests { use super::*; use combine::EasyParser; #[test] fn parse_closing_tag() { let (_, rest) = closing_tag(Tag::Right).easy_parse("[/right]").unwrap(); assert_eq!(rest, ""); } #[test] fn parse_tag() { let (_, rest) = tag().easy_parse("right").unwrap(); assert_eq!(rest, ""); } #[test] fn parse_right_node() { let (_, rest) = tag_node().easy_parse("[right][/right]").unwrap(); assert_eq!(rest, ""); } #[test] fn parse_nested_node() { let (node, rest) = tag_node() .easy_parse("[center][bold][/bold][/center]") .unwrap(); assert_eq!(rest, ""); match node { Node::TagNode { tag, children, .. } => { assert_eq!(tag, Tag::Center); assert_eq!(children.len(), 1); } _ => panic!("Invalid node type"), } } #[test] fn parse_multiple_nodes() { let (vec, rest) = node_vec(None) .easy_parse("[center][/center][right][/right]") .unwrap(); assert_eq!(rest, ""); assert_eq!(vec.len(), 2); } #[test] fn parse_plaintext() { let input = "this is a plaintext string"; let (vec, rest) = node_vec(None).easy_parse(input).unwrap(); assert_eq!(rest, ""); assert_eq!(vec.len(), input.len()); } #[test] fn parse_text_with_bracket() { let input = "plaintext [ but with an open bracket"; let (vec, rest) = node_vec(None).easy_parse(input).unwrap(); assert_eq!(rest, ""); assert_eq!(vec.len(), input.len()); } #[test] fn parse_text_with_bad_tag() { let input = "bad tag [here] is parsed fine"; let (vec, rest) = node_vec(None).easy_parse(input).unwrap(); assert_eq!(rest, ""); assert_eq!(vec.len(), input.len()); } #[test] fn parse_url() { let (vec, rest) = node_vec(None) .easy_parse("https://example.com:80/path?query#fragment") .unwrap(); assert_eq!(rest, ""); assert_eq!(vec.len(), 1); } #[test] fn parse_string_with_url() { let (vec, rest) = node_vec(None) .easy_parse("hello http://example.com world") .unwrap(); assert_eq!(rest, ""); assert_eq!(vec.len(), 13); } #[test] fn parse_url_tag() { let (vec, rest) = node_vec(None) .easy_parse("[url=http://example.com]hey there[/url]") .unwrap(); assert_eq!(rest, ""); assert_eq!(vec.len(), 1); } #[test] fn parse_url_tag_2() { let (vec, rest) = node_vec(None) .easy_parse("[url]http://example.com[/url]") .unwrap(); assert_eq!(rest, ""); assert_eq!(vec.len(), 1); } #[test] fn parse_invalid_url_tag() { let input = "[url]not a url[/url]"; let (vec, rest) = node_vec(None).easy_parse(input).unwrap(); assert_eq!(rest, ""); assert_eq!(vec.len(), input.len()); } #[test] fn parse_invalid_url_tag_2() { let input = "[url=bad]not a url[/url]"; let (vec, rest) = node_vec(None).easy_parse(input).unwrap(); assert_eq!(rest, ""); assert_eq!(vec.len(), input.len()); } #[test] fn parse_text_with_color_name() { let input = "some [color=white]text[/color]"; let (vec, rest) = node_vec(None).easy_parse(input).unwrap(); assert_eq!(rest, ""); assert_eq!(vec.len(), 6); } #[test] fn parse_text_with_color_hash() { let input = "some [color=#fff]text[/color]"; let (vec, rest) = node_vec(None).easy_parse(input).unwrap(); assert_eq!(rest, ""); assert_eq!(vec.len(), 6); } #[test] fn parse_text_with_mixed_tags() { let (vec, rest) = node_vec(None) .easy_parse("[bold]bold text[/bold] with a [bad] tag and a [hr] good tag") .unwrap(); assert_eq!(rest, ""); assert_eq!(vec.len(), 35); } #[test] fn parse_handle_node() { let (vec, rest) = node_vec(None).easy_parse("@one@two").unwrap(); assert_eq!(rest, ""); assert_eq!(vec.len(), 1); } #[test] fn parse_handle_node_in_text() { let (vec, rest) = node_vec(None).easy_parse("before @han@dle after").unwrap(); assert_eq!(rest, ""); assert_eq!(vec.len(), 14); } #[test] fn parse_icon_tag() { let (vec, rest) = node_vec(None).easy_parse("[icon]@han@dle[/icon]").unwrap(); assert_eq!(rest, ""); assert_eq!(vec.len(), 1); } #[test] fn parse_invalid_icon_tag() { let input = "[icon]bad[/icon]"; let (vec, rest) = node_vec(None).easy_parse(input).unwrap(); assert_eq!(rest, ""); assert_eq!(vec.len(), input.len()); } #[test] fn parse_icontext_tag() { let (vec, rest) = node_vec(None) .easy_parse("[icontext]@han@dle[/icontext]") .unwrap(); assert_eq!(rest, ""); assert_eq!(vec.len(), 1); } #[test] fn parse_invalid_icontext_tag() { let input = "[icontext]bad[/icontext]"; let (vec, rest) = node_vec(None).easy_parse(input).unwrap(); assert_eq!(rest, ""); assert_eq!(vec.len(), input.len()); } #[test] fn parse_email_node() { let (vec, rest) = node_vec(None).easy_parse("one.two@three.four").unwrap(); assert_eq!(rest, ""); assert_eq!(vec.len(), 1); } #[test] fn parse_email_in_text() { let (vec, rest) = node_vec(None) .easy_parse("this is a string with.an@email") .unwrap(); assert_eq!(rest, ""); assert_eq!(vec.len(), 18); } #[test] fn parse_newline() { let (vec, rest) = node_vec(None).easy_parse("\n").unwrap(); assert_eq!(rest, ""); assert_eq!(vec.len(), 1); } #[test] fn parse_multiple_newlines() { let (vec, rest) = node_vec(None).easy_parse("\n\n\n").unwrap(); assert_eq!(rest, ""); assert_eq!(vec.len(), 1); } #[test] fn parse_newlines_in_text() { let (vec, rest) = node_vec(None).easy_parse("hewwo\n\n\nmr\nobama\n").unwrap(); assert_eq!(rest, ""); assert_eq!(vec.len(), 15); } }