hyaenidae/content/src/bbcode.rs
asonix 0e1552eeaa Content: Use own bbcode impl
This gives us more control over things, like
automatic 'linkifying', and the ability to add
custom user tagging logic
2021-01-31 13:50:34 -06:00

595 lines
15 KiB
Rust

use crate::{
color::color,
email::email,
handle::handle,
url::{url, Url},
};
use combine::{
attempt, between, choice, error::StreamError, look_ahead, many, many1, parser, satisfy,
stream::StreamErrorFor, token, value, Parser, Stream,
};
#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
pub enum Tag {
Right,
Center,
Color,
Quote,
Code,
Codeblock,
Pre,
Mono,
Sub,
Sup,
S,
Spoiler,
Bold,
Strong,
I,
Em,
U,
Smcaps,
IconText,
Icon,
Hr,
Url,
}
#[derive(Clone, Debug)]
pub(crate) enum Node {
TagNode {
tag: Tag,
attr: Option<String>,
children: Vec<Node>,
},
UrlNode {
url: Url,
},
HandleNode {
handle: String,
domain: String,
},
EmailNode {
email: String,
},
CharNode {
text: char,
},
NewlineNode,
}
#[derive(Clone, Debug, thiserror::Error)]
#[error("Invalid tag: {0}")]
struct TagError(String);
#[derive(Clone, Debug, thiserror::Error)]
#[error("Backing out due to found closing tag")]
struct ClosingTagBackout;
impl Tag {
fn needs_closing(&self) -> bool {
!matches!(self, Tag::Hr)
}
fn with_attribute(&self, attribute: &Option<String>) -> Option<String> {
let attr = attribute.as_deref()?;
match self {
Tag::Color => color().parse(attr).ok().map(|color| color.0),
Tag::Url => url().parse(attr).ok().map(|url| url.0.to_string()),
_ => None,
}
}
}
fn tag_string<Input>() -> impl Parser<Input, Output = String>
where
Input: Stream<Token = char>,
{
many1(satisfy(|c| c != ']'))
}
fn tag<Input>() -> impl Parser<Input, Output = (Tag, Option<String>)>
where
Input: Stream<Token = char>,
{
tag_string().and_then(|full_tag| {
let mut iter = full_tag.split('=');
let tag_name = iter
.next()
.ok_or_else(|| StreamErrorFor::<Input>::other(TagError(full_tag.clone())))?;
let tag = match tag_name {
"right" => Tag::Right,
"center" => Tag::Center,
"quote" => Tag::Quote,
"color" => Tag::Color,
"code" => Tag::Code,
"codeblock" => Tag::Codeblock,
"pre" => Tag::Pre,
"mono" => Tag::Mono,
"sub" => Tag::Sub,
"sup" => Tag::Sup,
"s" => Tag::S,
"spoiler" => Tag::Spoiler,
"bold" => Tag::Bold,
"strong" => Tag::Strong,
"i" => Tag::I,
"em" => Tag::Em,
"u" => Tag::U,
"smcaps" => Tag::Smcaps,
"icontext" => Tag::IconText,
"icon" => Tag::Icon,
"hr" => Tag::Hr,
"url" => Tag::Url,
_ => {
return Err(StreamErrorFor::<Input>::other(TagError(
tag_name.to_owned(),
)))
}
};
let attribute = iter.next().map(|s| s.to_owned());
Ok((tag, attribute))
})
}
fn closing_tag<Input>(tag: Tag) -> impl Parser<Input, Output = ()>
where
Input: Stream<Token = char>,
{
between(token('['), token(']'), tag_string()).and_then(move |closing_tag| {
if closing_tag == format!("/{}", tag) {
Ok(())
} else {
Err(StreamErrorFor::<Input>::other(TagError(closing_tag)))
}
})
}
fn openening_tag<Input>() -> impl Parser<Input, Output = (Tag, Option<String>)>
where
Input: Stream<Token = char>,
{
between(token('['), token(']'), tag())
}
fn url_inner<Input>(attr: Option<String>) -> impl Parser<Input, Output = Node>
where
Input: Stream<Token = char>,
{
let tag = Tag::Url;
if let Some(attr) = tag.with_attribute(&attr) {
tag_body(tag, Some(attr)).left()
} else {
url_node()
.map(move |node| Node::TagNode {
tag,
attr: None,
children: vec![node],
})
.skip(closing_tag(tag))
.right()
}
}
fn icon_text_inner<Input>() -> impl Parser<Input, Output = Node>
where
Input: Stream<Token = char>,
{
handle_node()
.skip(closing_tag(Tag::IconText))
.map(|node| Node::TagNode {
tag: Tag::IconText,
attr: None,
children: vec![node],
})
}
fn icon_inner<Input>() -> impl Parser<Input, Output = Node>
where
Input: Stream<Token = char>,
{
handle_node()
.skip(closing_tag(Tag::Icon))
.map(|node| Node::TagNode {
tag: Tag::Icon,
attr: None,
children: vec![node],
})
}
fn tag_body<Input>(tag: Tag, attr: Option<String>) -> impl Parser<Input, Output = Node>
where
Input: Stream<Token = char>,
{
node_vec(Some(tag))
.skip(closing_tag(tag))
.map(move |children| Node::TagNode {
tag,
attr: tag.with_attribute(&attr),
children,
})
}
fn singleton_tag<Input>(tag: Tag) -> impl Parser<Input, Output = Node>
where
Input: Stream<Token = char>,
{
value(Node::TagNode {
tag,
attr: None,
children: vec![],
})
}
fn tag_node<Input>() -> impl Parser<Input, Output = Node>
where
Input: Stream<Token = char>,
{
openening_tag().then(|(tag, attr)| {
if tag.needs_closing() {
match tag {
Tag::Url => url_inner(attr).left().left().left(),
Tag::IconText => icon_text_inner().right().left().left(),
Tag::Icon => icon_inner().left().right().left(),
_ => tag_body(tag, attr).right().right().left(),
}
} else {
singleton_tag(tag).right()
}
})
}
fn handle_node<Input>() -> impl Parser<Input, Output = Node>
where
Input: Stream<Token = char>,
{
handle().map(|handle| Node::HandleNode {
handle: handle.handle,
domain: handle.domain,
})
}
fn email_node<Input>() -> impl Parser<Input, Output = Node>
where
Input: Stream<Token = char>,
{
email().map(|email| Node::EmailNode { email })
}
fn url_node<Input>() -> impl Parser<Input, Output = Node>
where
Input: Stream<Token = char>,
{
url().map(|url| Node::UrlNode { url })
}
fn valid_char<Input>() -> impl Parser<Input, Output = char>
where
Input: Stream<Token = char>,
{
satisfy(|c| c != '\n')
}
fn char_node<Input>(closing: Option<Tag>) -> impl Parser<Input, Output = Node>
where
Input: Stream<Token = char>,
{
if let Some(tag) = closing {
look_ahead(closing_tag(tag))
.map(|_| None)
.or(valid_char().map(Some))
.and_then(|text: Option<char>| {
if let Some(text) = text {
Ok(Node::CharNode { text })
} else {
Err(StreamErrorFor::<Input>::other(ClosingTagBackout))
}
})
.left()
} else {
valid_char().map(|text| Node::CharNode { text }).right()
}
}
fn newline_node<Input>() -> impl Parser<Input, Output = Node>
where
Input: Stream<Token = char>,
{
many1(combine::parser::char::char('\n')).map(|_: String| Node::NewlineNode)
}
fn single_node<Input>(closing: Option<Tag>) -> impl Parser<Input, Output = Node>
where
Input: Stream<Token = char>,
{
choice((
attempt(tag_node()),
attempt(handle_node()),
attempt(email_node()),
attempt(url_node()),
char_node(closing),
newline_node(),
))
}
fn node_vec_<Input>(closing: Option<Tag>) -> impl Parser<Input, Output = Vec<Node>>
where
Input: Stream<Token = char>,
{
many(single_node(closing))
}
parser! {
pub(crate) fn node_vec[Input](closing: Option<Tag>)(Input) -> Vec<Node>
where [Input: Stream<Token = char>]
{
node_vec_(*closing)
}
}
impl std::fmt::Display for Tag {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
let s = match self {
Tag::Right => "right",
Tag::Center => "center",
Tag::Quote => "quote",
Tag::Color => "color",
Tag::Code => "code",
Tag::Codeblock => "codeblock",
Tag::Pre => "pre",
Tag::Mono => "mono",
Tag::Sub => "sub",
Tag::Sup => "sup",
Tag::S => "s",
Tag::Spoiler => "spoiler",
Tag::Bold => "bold",
Tag::Strong => "strong",
Tag::I => "i",
Tag::Em => "em",
Tag::U => "u",
Tag::Smcaps => "smcaps",
Tag::IconText => "icontext",
Tag::Icon => "icon",
Tag::Hr => "hr",
Tag::Url => "url",
};
write!(f, "{}", s)
}
}
#[cfg(test)]
mod tests {
use super::*;
use combine::EasyParser;
#[test]
fn parse_closing_tag() {
let (_, rest) = closing_tag(Tag::Right).easy_parse("[/right]").unwrap();
assert_eq!(rest, "");
}
#[test]
fn parse_tag() {
let (_, rest) = tag().easy_parse("right").unwrap();
assert_eq!(rest, "");
}
#[test]
fn parse_right_node() {
let (_, rest) = tag_node().easy_parse("[right][/right]").unwrap();
assert_eq!(rest, "");
}
#[test]
fn parse_nested_node() {
let (node, rest) = tag_node()
.easy_parse("[center][bold][/bold][/center]")
.unwrap();
assert_eq!(rest, "");
match node {
Node::TagNode { tag, children, .. } => {
assert_eq!(tag, Tag::Center);
assert_eq!(children.len(), 1);
}
_ => panic!("Invalid node type"),
}
}
#[test]
fn parse_multiple_nodes() {
let (vec, rest) = node_vec(None)
.easy_parse("[center][/center][right][/right]")
.unwrap();
assert_eq!(rest, "");
assert_eq!(vec.len(), 2);
}
#[test]
fn parse_plaintext() {
let input = "this is a plaintext string";
let (vec, rest) = node_vec(None).easy_parse(input).unwrap();
assert_eq!(rest, "");
assert_eq!(vec.len(), input.len());
}
#[test]
fn parse_text_with_bracket() {
let input = "plaintext [ but with an open bracket";
let (vec, rest) = node_vec(None).easy_parse(input).unwrap();
assert_eq!(rest, "");
assert_eq!(vec.len(), input.len());
}
#[test]
fn parse_text_with_bad_tag() {
let input = "bad tag [here] is parsed fine";
let (vec, rest) = node_vec(None).easy_parse(input).unwrap();
assert_eq!(rest, "");
assert_eq!(vec.len(), input.len());
}
#[test]
fn parse_url() {
let (vec, rest) = node_vec(None)
.easy_parse("https://example.com:80/path?query#fragment")
.unwrap();
assert_eq!(rest, "");
assert_eq!(vec.len(), 1);
}
#[test]
fn parse_string_with_url() {
let (vec, rest) = node_vec(None)
.easy_parse("hello http://example.com world")
.unwrap();
assert_eq!(rest, "");
assert_eq!(vec.len(), 13);
}
#[test]
fn parse_url_tag() {
let (vec, rest) = node_vec(None)
.easy_parse("[url=http://example.com]hey there[/url]")
.unwrap();
assert_eq!(rest, "");
assert_eq!(vec.len(), 1);
}
#[test]
fn parse_url_tag_2() {
let (vec, rest) = node_vec(None)
.easy_parse("[url]http://example.com[/url]")
.unwrap();
assert_eq!(rest, "");
assert_eq!(vec.len(), 1);
}
#[test]
fn parse_invalid_url_tag() {
let input = "[url]not a url[/url]";
let (vec, rest) = node_vec(None).easy_parse(input).unwrap();
assert_eq!(rest, "");
assert_eq!(vec.len(), input.len());
}
#[test]
fn parse_invalid_url_tag_2() {
let input = "[url=bad]not a url[/url]";
let (vec, rest) = node_vec(None).easy_parse(input).unwrap();
assert_eq!(rest, "");
assert_eq!(vec.len(), input.len());
}
#[test]
fn parse_text_with_color_name() {
let input = "some [color=white]text[/color]";
let (vec, rest) = node_vec(None).easy_parse(input).unwrap();
assert_eq!(rest, "");
assert_eq!(vec.len(), 6);
}
#[test]
fn parse_text_with_color_hash() {
let input = "some [color=#fff]text[/color]";
let (vec, rest) = node_vec(None).easy_parse(input).unwrap();
assert_eq!(rest, "");
assert_eq!(vec.len(), 6);
}
#[test]
fn parse_text_with_mixed_tags() {
let (vec, rest) = node_vec(None)
.easy_parse("[bold]bold text[/bold] with a [bad] tag and a [hr] good tag")
.unwrap();
assert_eq!(rest, "");
assert_eq!(vec.len(), 35);
}
#[test]
fn parse_handle_node() {
let (vec, rest) = node_vec(None).easy_parse("@one@two").unwrap();
assert_eq!(rest, "");
assert_eq!(vec.len(), 1);
}
#[test]
fn parse_handle_node_in_text() {
let (vec, rest) = node_vec(None).easy_parse("before @han@dle after").unwrap();
assert_eq!(rest, "");
assert_eq!(vec.len(), 14);
}
#[test]
fn parse_icon_tag() {
let (vec, rest) = node_vec(None).easy_parse("[icon]@han@dle[/icon]").unwrap();
assert_eq!(rest, "");
assert_eq!(vec.len(), 1);
}
#[test]
fn parse_invalid_icon_tag() {
let input = "[icon]bad[/icon]";
let (vec, rest) = node_vec(None).easy_parse(input).unwrap();
assert_eq!(rest, "");
assert_eq!(vec.len(), input.len());
}
#[test]
fn parse_icontext_tag() {
let (vec, rest) = node_vec(None)
.easy_parse("[icontext]@han@dle[/icontext]")
.unwrap();
assert_eq!(rest, "");
assert_eq!(vec.len(), 1);
}
#[test]
fn parse_invalid_icontext_tag() {
let input = "[icontext]bad[/icontext]";
let (vec, rest) = node_vec(None).easy_parse(input).unwrap();
assert_eq!(rest, "");
assert_eq!(vec.len(), input.len());
}
#[test]
fn parse_email_node() {
let (vec, rest) = node_vec(None).easy_parse("one.two@three.four").unwrap();
assert_eq!(rest, "");
assert_eq!(vec.len(), 1);
}
#[test]
fn parse_email_in_text() {
let (vec, rest) = node_vec(None)
.easy_parse("this is a string with.an@email")
.unwrap();
assert_eq!(rest, "");
assert_eq!(vec.len(), 18);
}
#[test]
fn parse_newline() {
let (vec, rest) = node_vec(None).easy_parse("\n").unwrap();
assert_eq!(rest, "");
assert_eq!(vec.len(), 1);
}
#[test]
fn parse_multiple_newlines() {
let (vec, rest) = node_vec(None).easy_parse("\n\n\n").unwrap();
assert_eq!(rest, "");
assert_eq!(vec.len(), 1);
}
#[test]
fn parse_newlines_in_text() {
let (vec, rest) = node_vec(None).easy_parse("hewwo\n\n\nmr\nobama\n").unwrap();
assert_eq!(rest, "");
assert_eq!(vec.len(), 15);
}
}