Content: Use own bbcode impl

This gives us more control over things, like
automatic 'linkifying', and the ability to add
custom user tagging logic
This commit is contained in:
asonix 2021-01-31 13:50:34 -06:00
parent 3778603df2
commit 0e1552eeaa
8 changed files with 1761 additions and 20 deletions

View file

@ -8,6 +8,7 @@ edition = "2018"
[dependencies]
ammonia = "3.1.0"
bbclash = "1.1.1"
combine = "4.5.2"
log = "0.4"
once_cell = "1.5.2"
thiserror = "1"

594
content/src/bbcode.rs Normal file
View file

@ -0,0 +1,594 @@
use crate::{
color::color,
email::email,
handle::handle,
url::{url, Url},
};
use combine::{
attempt, between, choice, error::StreamError, look_ahead, many, many1, parser, satisfy,
stream::StreamErrorFor, token, value, Parser, Stream,
};
#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
pub enum Tag {
Right,
Center,
Color,
Quote,
Code,
Codeblock,
Pre,
Mono,
Sub,
Sup,
S,
Spoiler,
Bold,
Strong,
I,
Em,
U,
Smcaps,
IconText,
Icon,
Hr,
Url,
}
#[derive(Clone, Debug)]
pub(crate) enum Node {
TagNode {
tag: Tag,
attr: Option<String>,
children: Vec<Node>,
},
UrlNode {
url: Url,
},
HandleNode {
handle: String,
domain: String,
},
EmailNode {
email: String,
},
CharNode {
text: char,
},
NewlineNode,
}
#[derive(Clone, Debug, thiserror::Error)]
#[error("Invalid tag: {0}")]
struct TagError(String);
#[derive(Clone, Debug, thiserror::Error)]
#[error("Backing out due to found closing tag")]
struct ClosingTagBackout;
impl Tag {
fn needs_closing(&self) -> bool {
!matches!(self, Tag::Hr)
}
fn with_attribute(&self, attribute: &Option<String>) -> Option<String> {
let attr = attribute.as_deref()?;
match self {
Tag::Color => color().parse(attr).ok().map(|color| color.0),
Tag::Url => url().parse(attr).ok().map(|url| url.0.to_string()),
_ => None,
}
}
}
fn tag_string<Input>() -> impl Parser<Input, Output = String>
where
Input: Stream<Token = char>,
{
many1(satisfy(|c| c != ']'))
}
fn tag<Input>() -> impl Parser<Input, Output = (Tag, Option<String>)>
where
Input: Stream<Token = char>,
{
tag_string().and_then(|full_tag| {
let mut iter = full_tag.split('=');
let tag_name = iter
.next()
.ok_or_else(|| StreamErrorFor::<Input>::other(TagError(full_tag.clone())))?;
let tag = match tag_name {
"right" => Tag::Right,
"center" => Tag::Center,
"quote" => Tag::Quote,
"color" => Tag::Color,
"code" => Tag::Code,
"codeblock" => Tag::Codeblock,
"pre" => Tag::Pre,
"mono" => Tag::Mono,
"sub" => Tag::Sub,
"sup" => Tag::Sup,
"s" => Tag::S,
"spoiler" => Tag::Spoiler,
"bold" => Tag::Bold,
"strong" => Tag::Strong,
"i" => Tag::I,
"em" => Tag::Em,
"u" => Tag::U,
"smcaps" => Tag::Smcaps,
"icontext" => Tag::IconText,
"icon" => Tag::Icon,
"hr" => Tag::Hr,
"url" => Tag::Url,
_ => {
return Err(StreamErrorFor::<Input>::other(TagError(
tag_name.to_owned(),
)))
}
};
let attribute = iter.next().map(|s| s.to_owned());
Ok((tag, attribute))
})
}
fn closing_tag<Input>(tag: Tag) -> impl Parser<Input, Output = ()>
where
Input: Stream<Token = char>,
{
between(token('['), token(']'), tag_string()).and_then(move |closing_tag| {
if closing_tag == format!("/{}", tag) {
Ok(())
} else {
Err(StreamErrorFor::<Input>::other(TagError(closing_tag)))
}
})
}
fn openening_tag<Input>() -> impl Parser<Input, Output = (Tag, Option<String>)>
where
Input: Stream<Token = char>,
{
between(token('['), token(']'), tag())
}
fn url_inner<Input>(attr: Option<String>) -> impl Parser<Input, Output = Node>
where
Input: Stream<Token = char>,
{
let tag = Tag::Url;
if let Some(attr) = tag.with_attribute(&attr) {
tag_body(tag, Some(attr)).left()
} else {
url_node()
.map(move |node| Node::TagNode {
tag,
attr: None,
children: vec![node],
})
.skip(closing_tag(tag))
.right()
}
}
fn icon_text_inner<Input>() -> impl Parser<Input, Output = Node>
where
Input: Stream<Token = char>,
{
handle_node()
.skip(closing_tag(Tag::IconText))
.map(|node| Node::TagNode {
tag: Tag::IconText,
attr: None,
children: vec![node],
})
}
fn icon_inner<Input>() -> impl Parser<Input, Output = Node>
where
Input: Stream<Token = char>,
{
handle_node()
.skip(closing_tag(Tag::Icon))
.map(|node| Node::TagNode {
tag: Tag::Icon,
attr: None,
children: vec![node],
})
}
fn tag_body<Input>(tag: Tag, attr: Option<String>) -> impl Parser<Input, Output = Node>
where
Input: Stream<Token = char>,
{
node_vec(Some(tag))
.skip(closing_tag(tag))
.map(move |children| Node::TagNode {
tag,
attr: tag.with_attribute(&attr),
children,
})
}
fn singleton_tag<Input>(tag: Tag) -> impl Parser<Input, Output = Node>
where
Input: Stream<Token = char>,
{
value(Node::TagNode {
tag,
attr: None,
children: vec![],
})
}
fn tag_node<Input>() -> impl Parser<Input, Output = Node>
where
Input: Stream<Token = char>,
{
openening_tag().then(|(tag, attr)| {
if tag.needs_closing() {
match tag {
Tag::Url => url_inner(attr).left().left().left(),
Tag::IconText => icon_text_inner().right().left().left(),
Tag::Icon => icon_inner().left().right().left(),
_ => tag_body(tag, attr).right().right().left(),
}
} else {
singleton_tag(tag).right()
}
})
}
fn handle_node<Input>() -> impl Parser<Input, Output = Node>
where
Input: Stream<Token = char>,
{
handle().map(|handle| Node::HandleNode {
handle: handle.handle,
domain: handle.domain,
})
}
fn email_node<Input>() -> impl Parser<Input, Output = Node>
where
Input: Stream<Token = char>,
{
email().map(|email| Node::EmailNode { email })
}
fn url_node<Input>() -> impl Parser<Input, Output = Node>
where
Input: Stream<Token = char>,
{
url().map(|url| Node::UrlNode { url })
}
fn valid_char<Input>() -> impl Parser<Input, Output = char>
where
Input: Stream<Token = char>,
{
satisfy(|c| c != '\n')
}
fn char_node<Input>(closing: Option<Tag>) -> impl Parser<Input, Output = Node>
where
Input: Stream<Token = char>,
{
if let Some(tag) = closing {
look_ahead(closing_tag(tag))
.map(|_| None)
.or(valid_char().map(Some))
.and_then(|text: Option<char>| {
if let Some(text) = text {
Ok(Node::CharNode { text })
} else {
Err(StreamErrorFor::<Input>::other(ClosingTagBackout))
}
})
.left()
} else {
valid_char().map(|text| Node::CharNode { text }).right()
}
}
fn newline_node<Input>() -> impl Parser<Input, Output = Node>
where
Input: Stream<Token = char>,
{
many1(combine::parser::char::char('\n')).map(|_: String| Node::NewlineNode)
}
fn single_node<Input>(closing: Option<Tag>) -> impl Parser<Input, Output = Node>
where
Input: Stream<Token = char>,
{
choice((
attempt(tag_node()),
attempt(handle_node()),
attempt(email_node()),
attempt(url_node()),
char_node(closing),
newline_node(),
))
}
fn node_vec_<Input>(closing: Option<Tag>) -> impl Parser<Input, Output = Vec<Node>>
where
Input: Stream<Token = char>,
{
many(single_node(closing))
}
parser! {
pub(crate) fn node_vec[Input](closing: Option<Tag>)(Input) -> Vec<Node>
where [Input: Stream<Token = char>]
{
node_vec_(*closing)
}
}
impl std::fmt::Display for Tag {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
let s = match self {
Tag::Right => "right",
Tag::Center => "center",
Tag::Quote => "quote",
Tag::Color => "color",
Tag::Code => "code",
Tag::Codeblock => "codeblock",
Tag::Pre => "pre",
Tag::Mono => "mono",
Tag::Sub => "sub",
Tag::Sup => "sup",
Tag::S => "s",
Tag::Spoiler => "spoiler",
Tag::Bold => "bold",
Tag::Strong => "strong",
Tag::I => "i",
Tag::Em => "em",
Tag::U => "u",
Tag::Smcaps => "smcaps",
Tag::IconText => "icontext",
Tag::Icon => "icon",
Tag::Hr => "hr",
Tag::Url => "url",
};
write!(f, "{}", s)
}
}
#[cfg(test)]
mod tests {
use super::*;
use combine::EasyParser;
#[test]
fn parse_closing_tag() {
let (_, rest) = closing_tag(Tag::Right).easy_parse("[/right]").unwrap();
assert_eq!(rest, "");
}
#[test]
fn parse_tag() {
let (_, rest) = tag().easy_parse("right").unwrap();
assert_eq!(rest, "");
}
#[test]
fn parse_right_node() {
let (_, rest) = tag_node().easy_parse("[right][/right]").unwrap();
assert_eq!(rest, "");
}
#[test]
fn parse_nested_node() {
let (node, rest) = tag_node()
.easy_parse("[center][bold][/bold][/center]")
.unwrap();
assert_eq!(rest, "");
match node {
Node::TagNode { tag, children, .. } => {
assert_eq!(tag, Tag::Center);
assert_eq!(children.len(), 1);
}
_ => panic!("Invalid node type"),
}
}
#[test]
fn parse_multiple_nodes() {
let (vec, rest) = node_vec(None)
.easy_parse("[center][/center][right][/right]")
.unwrap();
assert_eq!(rest, "");
assert_eq!(vec.len(), 2);
}
#[test]
fn parse_plaintext() {
let input = "this is a plaintext string";
let (vec, rest) = node_vec(None).easy_parse(input).unwrap();
assert_eq!(rest, "");
assert_eq!(vec.len(), input.len());
}
#[test]
fn parse_text_with_bracket() {
let input = "plaintext [ but with an open bracket";
let (vec, rest) = node_vec(None).easy_parse(input).unwrap();
assert_eq!(rest, "");
assert_eq!(vec.len(), input.len());
}
#[test]
fn parse_text_with_bad_tag() {
let input = "bad tag [here] is parsed fine";
let (vec, rest) = node_vec(None).easy_parse(input).unwrap();
assert_eq!(rest, "");
assert_eq!(vec.len(), input.len());
}
#[test]
fn parse_url() {
let (vec, rest) = node_vec(None)
.easy_parse("https://example.com:80/path?query#fragment")
.unwrap();
assert_eq!(rest, "");
assert_eq!(vec.len(), 1);
}
#[test]
fn parse_string_with_url() {
let (vec, rest) = node_vec(None)
.easy_parse("hello http://example.com world")
.unwrap();
assert_eq!(rest, "");
assert_eq!(vec.len(), 13);
}
#[test]
fn parse_url_tag() {
let (vec, rest) = node_vec(None)
.easy_parse("[url=http://example.com]hey there[/url]")
.unwrap();
assert_eq!(rest, "");
assert_eq!(vec.len(), 1);
}
#[test]
fn parse_url_tag_2() {
let (vec, rest) = node_vec(None)
.easy_parse("[url]http://example.com[/url]")
.unwrap();
assert_eq!(rest, "");
assert_eq!(vec.len(), 1);
}
#[test]
fn parse_invalid_url_tag() {
let input = "[url]not a url[/url]";
let (vec, rest) = node_vec(None).easy_parse(input).unwrap();
assert_eq!(rest, "");
assert_eq!(vec.len(), input.len());
}
#[test]
fn parse_invalid_url_tag_2() {
let input = "[url=bad]not a url[/url]";
let (vec, rest) = node_vec(None).easy_parse(input).unwrap();
assert_eq!(rest, "");
assert_eq!(vec.len(), input.len());
}
#[test]
fn parse_text_with_color_name() {
let input = "some [color=white]text[/color]";
let (vec, rest) = node_vec(None).easy_parse(input).unwrap();
assert_eq!(rest, "");
assert_eq!(vec.len(), 6);
}
#[test]
fn parse_text_with_color_hash() {
let input = "some [color=#fff]text[/color]";
let (vec, rest) = node_vec(None).easy_parse(input).unwrap();
assert_eq!(rest, "");
assert_eq!(vec.len(), 6);
}
#[test]
fn parse_text_with_mixed_tags() {
let (vec, rest) = node_vec(None)
.easy_parse("[bold]bold text[/bold] with a [bad] tag and a [hr] good tag")
.unwrap();
assert_eq!(rest, "");
assert_eq!(vec.len(), 35);
}
#[test]
fn parse_handle_node() {
let (vec, rest) = node_vec(None).easy_parse("@one@two").unwrap();
assert_eq!(rest, "");
assert_eq!(vec.len(), 1);
}
#[test]
fn parse_handle_node_in_text() {
let (vec, rest) = node_vec(None).easy_parse("before @han@dle after").unwrap();
assert_eq!(rest, "");
assert_eq!(vec.len(), 14);
}
#[test]
fn parse_icon_tag() {
let (vec, rest) = node_vec(None).easy_parse("[icon]@han@dle[/icon]").unwrap();
assert_eq!(rest, "");
assert_eq!(vec.len(), 1);
}
#[test]
fn parse_invalid_icon_tag() {
let input = "[icon]bad[/icon]";
let (vec, rest) = node_vec(None).easy_parse(input).unwrap();
assert_eq!(rest, "");
assert_eq!(vec.len(), input.len());
}
#[test]
fn parse_icontext_tag() {
let (vec, rest) = node_vec(None)
.easy_parse("[icontext]@han@dle[/icontext]")
.unwrap();
assert_eq!(rest, "");
assert_eq!(vec.len(), 1);
}
#[test]
fn parse_invalid_icontext_tag() {
let input = "[icontext]bad[/icontext]";
let (vec, rest) = node_vec(None).easy_parse(input).unwrap();
assert_eq!(rest, "");
assert_eq!(vec.len(), input.len());
}
#[test]
fn parse_email_node() {
let (vec, rest) = node_vec(None).easy_parse("one.two@three.four").unwrap();
assert_eq!(rest, "");
assert_eq!(vec.len(), 1);
}
#[test]
fn parse_email_in_text() {
let (vec, rest) = node_vec(None)
.easy_parse("this is a string with.an@email")
.unwrap();
assert_eq!(rest, "");
assert_eq!(vec.len(), 18);
}
#[test]
fn parse_newline() {
let (vec, rest) = node_vec(None).easy_parse("\n").unwrap();
assert_eq!(rest, "");
assert_eq!(vec.len(), 1);
}
#[test]
fn parse_multiple_newlines() {
let (vec, rest) = node_vec(None).easy_parse("\n\n\n").unwrap();
assert_eq!(rest, "");
assert_eq!(vec.len(), 1);
}
#[test]
fn parse_newlines_in_text() {
let (vec, rest) = node_vec(None).easy_parse("hewwo\n\n\nmr\nobama\n").unwrap();
assert_eq!(rest, "");
assert_eq!(vec.len(), 15);
}
}

43
content/src/color.rs Normal file
View file

@ -0,0 +1,43 @@
use combine::{
choice, count, many1,
parser::char::{char as parsechar, hex_digit, lower},
Parser, Stream,
};
pub(crate) fn color<Input>() -> impl Parser<Input, Output = String>
where
Input: Stream<Token = char>,
{
let hashcolor = parsechar('#')
.with(count(8, hex_digit()))
.map(|color: String| format!("#{}", color));
let namecolor = many1(lower());
choice((hashcolor, namecolor))
}
#[cfg(test)]
mod tests {
use super::*;
use combine::EasyParser;
#[test]
fn parse_shortcolor() {
let (_, rest) = color().easy_parse("#aaa").unwrap();
assert_eq!(rest, "");
}
#[test]
fn parse_longcolor() {
let (_, rest) = color().easy_parse("#aaaaaaff").unwrap();
assert_eq!(rest, "");
}
#[test]
fn parse_colorname() {
let (value, rest) = color().easy_parse("white").unwrap();
assert_eq!(rest, "");
assert_eq!(value, "white");
}
}

127
content/src/email.rs Normal file
View file

@ -0,0 +1,127 @@
use crate::url::domain;
use combine::{
choice, many, many1,
parser::char::{alpha_num, char as parsechar},
Parser, Stream,
};
fn unquoted<Input>() -> impl Parser<Input, Output = char>
where
Input: Stream<Token = char>,
{
choice((
alpha_num(),
parsechar('!'),
parsechar('#'),
parsechar('$'),
parsechar('%'),
parsechar('&'),
parsechar('\''),
parsechar('*'),
parsechar('+'),
parsechar('-'),
parsechar('/'),
parsechar('='),
parsechar('?'),
parsechar('^'),
parsechar('`'),
parsechar('{'),
parsechar('|'),
parsechar('}'),
parsechar('~'),
))
}
fn unquoted_middle_segment<Input>() -> impl Parser<Input, Output = String>
where
Input: Stream<Token = char>,
{
parsechar('.')
.and(many1(unquoted()))
.map(|(c, s): (_, String)| {
let mut string = String::new();
string.push(c);
string += &s;
string
})
}
fn unquoted_full<Input>() -> impl Parser<Input, Output = String>
where
Input: Stream<Token = char>,
{
many1(unquoted())
.and(many(unquoted_middle_segment()))
.map(|(s1, s2): (String, String)| s1 + &s2)
}
pub(crate) fn email<Input>() -> impl Parser<Input, Output = String>
where
Input: Stream<Token = char>,
{
unquoted_full()
.skip(parsechar('@'))
.and(domain().map(|d| d.0))
.map(|(local, domain)| format!("{}@{}", local, domain))
}
#[cfg(test)]
mod tests {
use super::*;
use combine::EasyParser;
#[test]
fn unquoted_parses_chars() {
for c in &['a', 'b', '$', '#'] {
let s = c.to_string();
let (_, rest) = unquoted().easy_parse(s.as_str()).unwrap();
assert_eq!(rest, "");
}
}
#[test]
fn unquoted_middle_segment_parses_dots() {
let (_, rest) = unquoted_middle_segment().easy_parse(".one").unwrap();
assert_eq!(rest, "");
}
#[test]
fn unquoted_full_parses_dots() {
let (_, rest) = unquoted_full().easy_parse("one.two.three.four").unwrap();
assert_eq!(rest, "");
}
#[test]
fn parses_basic_email() {
let (_, rest) = email().easy_parse("a@b").unwrap();
assert_eq!(rest, "");
}
#[test]
fn doesnt_parse_invalid_email() {
assert!(email().easy_parse("@a@b").is_err());
}
#[test]
fn parses_longer_email() {
let (_, rest) = email()
.easy_parse("one.two.three.four@sub.domain.tld")
.unwrap();
assert_eq!(rest, "");
}
#[test]
fn doesnt_parse_double_dot() {
assert!(email().easy_parse("bad..email@tld").is_err());
}
#[test]
fn doesnt_parse_dot_local() {
assert!(email().easy_parse(".local@tld").is_err());
}
#[test]
fn doesnt_parse_end_dot_local() {
assert!(email().easy_parse("local.@tld").is_err());
}
}

68
content/src/handle.rs Normal file
View file

@ -0,0 +1,68 @@
use combine::{many1, parser::char::alpha_num, Parser, Stream};
#[derive(Clone, Debug, Eq, PartialEq)]
pub struct Handle {
pub handle: String,
pub domain: String,
}
fn at<Input>() -> impl Parser<Input, Output = char>
where
Input: Stream<Token = char>,
{
combine::parser::char::char('@')
}
fn handle_part<Input>() -> impl Parser<Input, Output = String>
where
Input: Stream<Token = char>,
{
many1(alpha_num())
}
fn domain_part<Input>() -> impl Parser<Input, Output = String>
where
Input: Stream<Token = char>,
{
crate::url::domain().map(|d| d.0)
}
pub(crate) fn handle<Input>() -> impl Parser<Input, Output = Handle>
where
Input: Stream<Token = char>,
{
at().with(handle_part())
.skip(at())
.and(domain_part())
.map(|(handle, domain)| Handle { handle, domain })
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn parse_handle_part() {
let (_, rest) = handle_part().parse("as123").unwrap();
assert_eq!(rest, "");
}
#[test]
fn parse_simple_handle() {
let (_, rest) = handle().parse("@asdf@asdf").unwrap();
assert_eq!(rest, "");
}
#[test]
fn parse_complex_handle() {
let (_, rest) = handle()
.parse("@r2d2@telnet.towel.blinkenlights.nl")
.unwrap();
assert_eq!(rest, "");
}
#[test]
fn dont_parse_invalid_handle() {
assert!(handle().parse("asdf@asdf").is_err())
}
}

View file

@ -3,6 +3,16 @@ use once_cell::sync::Lazy;
use std::borrow::Cow;
use std::collections::{HashMap, HashSet};
mod bbcode;
mod color;
mod email;
mod handle;
mod render;
mod url;
pub use bbcode::Tag;
pub use render::NodeView;
fn allow_styles<'u>(allowed: &[&str], value: &'u str) -> Option<Cow<'u, str>> {
let mut altered = false;
let rules: Vec<_> = value
@ -38,9 +48,7 @@ fn attribute_filter<'u>(element: &str, attribute: &str, value: &'u str) -> Optio
("span", "style") => allow_styles(&["color", "opacity"], value),
("div", "class")
| ("span", "class")
| ("figure", "class")
| ("pre", "class")
| ("pre", "data-language")
| ("span", "data-symbol")
| ("blockquote", "data-author")
| ("a", "rel")
@ -53,44 +61,56 @@ fn attribute_filter<'u>(element: &str, attribute: &str, value: &'u str) -> Optio
}
}
// Classes based on bbclash BBCode spec:
// https://github.com/EndaHallahan/BBClash/blob/master/Spec.md
static STRIP_CONFIG: Lazy<Builder> = Lazy::new(|| {
let mut builder = Builder::new();
builder.allowed_classes(HashMap::new()).tags(HashSet::new());
builder
});
static AMMONIA_CONFIG: Lazy<Builder> = Lazy::new(|| {
let mut classes = HashMap::new();
let div_hs = classes.entry("div").or_insert(HashSet::new());
div_hs.insert("center");
div_hs.insert("right");
div_hs.insert("math_container");
// div_hs.insert("embed"); for now, no embeds
div_hs.insert("indent-1");
div_hs.insert("indent-2");
div_hs.insert("indent-3");
div_hs.insert("indent-4");
let span_hs = classes.entry("span").or_insert(HashSet::new());
span_hs.insert("underline");
span_hs.insert("smallcaps");
span_hs.insert("monospace");
span_hs.insert("spoiler");
span_hs.insert("math_container");
let pre_hs = classes.entry("pre").or_insert(HashSet::new());
pre_hs.insert("codeblock");
let figure_hs = classes.entry("figure").or_insert(HashSet::new());
figure_hs.insert("figure-right");
figure_hs.insert("figure-left");
let mut schemes = HashSet::new();
schemes.insert("http");
schemes.insert("https");
schemes.insert("mailto");
let mut tags = HashSet::new();
tags.insert("div");
tags.insert("span");
tags.insert("pre");
tags.insert("code");
tags.insert("i");
tags.insert("em");
tags.insert("b");
tags.insert("strong");
tags.insert("s");
tags.insert("sub");
tags.insert("sup");
tags.insert("blockquote");
tags.insert("a");
tags.insert("img");
tags.insert("br");
let mut builder = Builder::new();
builder
.tags(tags)
.allowed_classes(classes)
.url_schemes(schemes)
.link_rel(Some("nofollow noopener noreferer"))
.attribute_filter(attribute_filter)
.add_tag_attributes("span", &["style"])
.add_tag_attributes("div", &["style"]);
@ -104,8 +124,12 @@ pub fn html(source: &str) -> String {
h
}
pub fn bbcode(source: &str) -> String {
let bb = bbclash::bbcode_to_html(source);
log::debug!("{}", bb);
bb
pub fn bbcode<F>(source: &str, mapper: F) -> String
where
for<'a> F: Fn(NodeView<'a>) -> NodeView<'a> + Copy,
{
let stripped = STRIP_CONFIG.clean(source).to_string();
let preprocessed = render::preprocessor(&stripped, mapper);
log::debug!("{}", preprocessed);
preprocessed
}

483
content/src/render.rs Normal file
View file

@ -0,0 +1,483 @@
use crate::bbcode::Tag;
use std::borrow::Cow;
#[derive(Debug)]
pub enum NodeView<'a> {
Tag {
tag: Tag,
attr: Option<Cow<'a, str>>,
},
Url {
href: Cow<'a, str>,
},
IconText {
handle: Cow<'a, str>,
domain: Cow<'a, str>,
img: Option<String>,
href: Option<String>,
},
Icon {
handle: Cow<'a, str>,
domain: Cow<'a, str>,
img: Option<String>,
href: Option<String>,
},
Handle {
handle: Cow<'a, str>,
domain: Cow<'a, str>,
href: Option<String>,
},
Email {
email: Cow<'a, str>,
},
Text {
text: Cow<'a, str>,
},
Newline,
}
#[derive(Debug)]
enum Node {
Tag {
tag: Tag,
attr: Option<String>,
children: Vec<Node>,
},
Url {
href: String,
},
Handle {
handle: String,
domain: String,
},
Email {
email: String,
},
Text {
text: String,
},
Newline,
}
#[derive(Debug)]
enum RenderNode {
Tag {
tag: Tag,
attr: Option<String>,
children: Vec<RenderNode>,
},
Url {
href: String,
},
IconText {
handle: String,
domain: String,
img: String,
href: String,
},
Icon {
handle: String,
domain: String,
img: String,
href: String,
},
Handle {
handle: String,
domain: String,
href: String,
},
Email {
email: String,
},
Text {
text: String,
},
Newline,
}
fn render_nodes(nodes: Vec<RenderNode>) -> String {
nodes
.into_iter()
.map(|node| {
log::trace!("Rendering {:?}", node);
match node {
RenderNode::Tag {
tag,
attr,
children,
} => match tag {
Tag::Right if !children.is_empty() => {
String::new() + "<div class=\"right\">" + &render_nodes(children) + "</div>"
}
Tag::Center if !children.is_empty() => {
String::new()
+ "<div class=\"center\">"
+ &render_nodes(children)
+ "</div>"
}
Tag::Quote if !children.is_empty() => {
String::new() + "<blockquote>" + &render_nodes(children) + "</blockquote>"
}
Tag::Color if !children.is_empty() => {
if let Some(attr) = attr {
format!("<span style=\"color:{};\">", attr)
+ &render_nodes(children)
+ "</span>"
} else {
render_nodes(children)
}
}
Tag::Code if !children.is_empty() => {
String::new() + "<code>" + &render_nodes(children) + "</code>"
}
Tag::Codeblock if !children.is_empty() => {
String::new()
+ "<pre class=\"codeblock\">"
+ &render_nodes(children)
+ "</pre>"
}
Tag::Pre if !children.is_empty() => {
String::new() + "<pre>" + &render_nodes(children) + "</pre>"
}
Tag::Mono if !children.is_empty() => {
String::new()
+ "<span class=\"monospace\">"
+ &render_nodes(children)
+ "</span>"
}
Tag::Sub if !children.is_empty() => {
String::new() + "<sub>" + &render_nodes(children) + "</sub>"
}
Tag::Sup if !children.is_empty() => {
String::new() + "<sup>" + &render_nodes(children) + "</sup>"
}
Tag::S if !children.is_empty() => {
String::new() + "<s>" + &render_nodes(children) + "</s>"
}
Tag::Spoiler if !children.is_empty() => {
String::new()
+ "<span class=\"spoiler\">"
+ &render_nodes(children)
+ "</span>"
}
Tag::Bold if !children.is_empty() => {
String::new() + "<b>" + &render_nodes(children) + "</b>"
}
Tag::Strong if !children.is_empty() => {
String::new() + "<strong>" + &render_nodes(children) + "</strong>"
}
Tag::I if !children.is_empty() => {
String::new() + "<i>" + &render_nodes(children) + "</i>"
}
Tag::Em if !children.is_empty() => {
String::new() + "<em>" + &render_nodes(children) + "</em>"
}
Tag::U if !children.is_empty() => {
String::new()
+ "<span class=\"underline\">"
+ &render_nodes(children)
+ "</span>"
}
Tag::Smcaps if !children.is_empty() => {
String::new()
+ "<span class=\"smallcaps\">"
+ &render_nodes(children)
+ "</span>"
}
Tag::IconText if !children.is_empty() => render_nodes(children),
Tag::Icon if !children.is_empty() => render_nodes(children),
Tag::Hr => String::from("<hr>"),
Tag::Url if !children.is_empty() => {
if let Some(href) = attr {
format!("<a href=\"{}\" rel=\"noopener noreferer nofollow\">", href)
+ &render_nodes(children)
+ "</a>"
} else {
render_nodes(children)
}
}
_ => String::new(),
},
RenderNode::Url { href } => format!(
"<a href=\"{href}\" rel=\"noopener noreferer nofollow\">{href}</a>",
href = href
),
RenderNode::IconText {
handle,
domain,
img,
href,
} => {
format!("<a href=\"{}\" rel=\"noopener noreferer nofollow\">", href)
+ &format!(
"<img src=\"{}\" title=\"@{handle}@{domain}\" alt=\"@{handle}@{domain}\" />",
img,
handle = handle,
domain = domain
) + &format!("@{}@{}", handle, domain)
+ &format!("</a>")
}
RenderNode::Icon {
handle,
domain,
img,
href,
} => {
format!("<a href=\"{}\" rel=\"noopener noreferer nofollow\">", href)
+ &format!(
"<img src=\"{}\" title=\"@{handle}@{domain}\" alt=\"@{handle}@{domain}\" />",
img,
handle = handle,
domain = domain
) + &format!("</a>")
}
RenderNode::Handle {
handle,
domain,
href,
} => {
format!("<a href=\"{}\" rel=\"noopener noreferer nofollow\">", href)
+ &format!("@{}@{}", handle, domain)
+ &format!("</a>")
}
RenderNode::Email { email } => {
format!("<a href=\"mailto:{email}\">{email}</a>", email = email)
}
RenderNode::Text { text } => text,
RenderNode::Newline => format!("<br>"),
}
})
.collect::<String>()
}
fn to_render<'b, F>(node: NodeView<'b>, children: Option<Vec<Node>>, f: F) -> RenderNode
where
for<'a> F: Fn(NodeView<'a>) -> NodeView<'a> + Copy,
{
match node {
NodeView::Tag { tag, attr } => RenderNode::Tag {
tag,
attr: attr.map(|a| a.to_string()),
children: map_nodes(children.unwrap_or(vec![]), f),
},
NodeView::Url { href } => RenderNode::Url {
href: href.to_string(),
},
NodeView::IconText {
handle,
domain,
img,
href,
} => match (img, href) {
(Some(img), Some(href)) => RenderNode::IconText {
handle: handle.to_string(),
domain: domain.to_string(),
img,
href,
},
(None, Some(href)) => RenderNode::Handle {
handle: handle.to_string(),
domain: domain.to_string(),
href,
},
_ => RenderNode::Text {
text: format!("@{}@{}", handle, domain),
},
},
NodeView::Icon {
handle,
domain,
img,
href,
} => match (img, href) {
(Some(img), Some(href)) => RenderNode::Icon {
handle: handle.to_string(),
domain: domain.to_string(),
img,
href,
},
(None, Some(href)) => RenderNode::Handle {
handle: handle.to_string(),
domain: domain.to_string(),
href,
},
_ => RenderNode::Text {
text: format!("@{}@{}", handle, domain),
},
},
NodeView::Handle {
handle,
domain,
href,
} => match href {
Some(href) => RenderNode::Handle {
handle: handle.to_string(),
domain: domain.to_string(),
href: href.to_string(),
},
None => RenderNode::Text {
text: format!("@{}@{}", handle, domain),
},
},
NodeView::Email { email } => RenderNode::Email {
email: email.to_string(),
},
NodeView::Text { text } => RenderNode::Text {
text: text.to_string(),
},
NodeView::Newline => RenderNode::Newline,
}
}
fn map_nodes<F>(nodes: Vec<Node>, f: F) -> Vec<RenderNode>
where
for<'a> F: Fn(NodeView<'a>) -> NodeView<'a> + Copy,
{
nodes
.into_iter()
.map(move |node| {
log::trace!("Mapping {:?}", node);
match node {
Node::Tag {
tag,
attr,
children,
} => to_render(
(f)(NodeView::Tag {
tag,
attr: attr.as_deref().map(Cow::Borrowed),
}),
Some(children),
f,
),
Node::Url { href } => to_render(
(f)(NodeView::Url {
href: Cow::Borrowed(&href),
}),
None,
f,
),
Node::Handle { handle, domain } => to_render(
(f)(NodeView::Handle {
handle: Cow::Borrowed(&handle),
domain: Cow::Borrowed(&domain),
href: None,
}),
None,
f,
),
Node::Email { email } => to_render(
(f)(NodeView::Email {
email: Cow::Borrowed(&email),
}),
None,
f,
),
Node::Text { text } => to_render(
(f)(NodeView::Text {
text: Cow::Borrowed(&text),
}),
None,
f,
),
Node::Newline => to_render((f)(NodeView::Newline), None, f),
}
})
.collect()
}
fn build_nodes(input: Vec<crate::bbcode::Node>) -> Vec<Node> {
let mut nodes = vec![];
for n in input {
log::trace!("Building {:?}", n);
match n {
crate::bbcode::Node::TagNode {
tag,
attr,
children,
} => nodes.push(Node::Tag {
tag,
attr,
children: build_nodes(children),
}),
crate::bbcode::Node::UrlNode { url } => nodes.push(Node::Url {
href: url.to_string(),
}),
crate::bbcode::Node::HandleNode { handle, domain } => {
nodes.push(Node::Handle { handle, domain })
}
crate::bbcode::Node::EmailNode { email } => nodes.push(Node::Email { email }),
crate::bbcode::Node::CharNode { text: c_text } => match nodes.last_mut() {
Some(Node::Text { ref mut text }) => {
text.push(c_text);
}
_ => {
let mut text = String::new();
text.push(c_text);
nodes.push(Node::Text { text });
}
},
crate::bbcode::Node::NewlineNode => nodes.push(Node::Newline),
};
}
nodes
}
pub(crate) fn preprocessor<F>(source: &str, mapper: F) -> String
where
for<'a> F: Fn(NodeView<'a>) -> NodeView<'a> + Copy,
{
use combine::Parser;
let parsenodes = crate::bbcode::node_vec(None)
.parse(source)
.ok()
.map(|(nodes, rest)| {
if rest.len() > 0 {
log::warn!("Failed to parse '{}', rest: '{}'", source, rest);
}
nodes
})
.unwrap_or(vec![]);
render_nodes(map_nodes(build_nodes(parsenodes), mapper))
}
#[cfg(test)]
mod tests {
use super::preprocessor;
#[test]
fn basic_parse() {
let input = "some plain text";
let output = preprocessor(input, |view| view);
assert_eq!(output, input)
}
#[test]
fn parse_with_link() {
let input = "it's http://example.com a link";
let output = preprocessor(input, |view| view);
assert_eq!(output, "it's <a href=\"http://example.com\" rel=\"noopener noreferer nofollow\">http://example.com</a> a link");
}
#[test]
fn parse_with_custom_link() {
let input = "it's [url=http://example.com]a link[/url]";
let output = preprocessor(input, |view| view);
assert_eq!(
output,
"it's <a href=\"http://example.com\" rel=\"noopener noreferer nofollow\">a link</a>"
);
}
#[test]
fn parse_with_strong() {
let input = "it's [strong]bold[/strong] right";
let output = preprocessor(input, |view| view);
assert_eq!(output, "it's <strong>bold</strong> right");
}
}

401
content/src/url.rs Normal file
View file

@ -0,0 +1,401 @@
use combine::{
choice, many, many1, optional,
parser::char::{alpha_num, digit, hex_digit, string},
Parser, Stream,
};
#[derive(Clone, Copy, Debug)]
enum Scheme {
Http,
Https,
}
#[derive(Clone, Debug)]
pub(crate) struct Domain(pub(crate) String);
#[derive(Clone, Debug)]
struct Port(String);
#[derive(Clone, Debug)]
struct Path(String);
#[derive(Clone, Debug)]
struct Query(String);
#[derive(Clone, Debug)]
struct Fragment(String);
#[derive(Clone, Debug)]
pub struct Url {
scheme: Scheme,
domain: Domain,
port: Option<Port>,
path: Option<Path>,
query: Option<Query>,
fragment: Option<Fragment>,
}
fn scheme<Input>() -> impl Parser<Input, Output = Scheme>
where
Input: Stream<Token = char>,
{
let http = string("http");
let s = combine::parser::char::char('s');
let https = http.and(optional(s)).map(|(_, c)| {
if c.is_some() {
Scheme::Https
} else {
Scheme::Http
}
});
let separator = string("://");
https.skip(separator)
}
fn domain_text<Input>() -> impl Parser<Input, Output = String>
where
Input: Stream<Token = char>,
{
let domain_char = alpha_num().or(combine::parser::char::char('-'));
many1(domain_char)
}
pub(crate) fn domain<Input>() -> impl Parser<Input, Output = Domain>
where
Input: Stream<Token = char>,
{
let domain_segment =
combine::parser::char::char('.')
.and(domain_text())
.map(|(c, s): (char, String)| {
let mut string = String::new();
string.push(c);
string += &s;
string
});
domain_text()
.and(many(domain_segment))
.map(|(first, rest): (String, String)| Domain(first + &rest))
}
fn port<Input>() -> impl Parser<Input, Output = Port>
where
Input: Stream<Token = char>,
{
let colon = combine::parser::char::char(':');
let port = many1(digit()).map(Port);
colon.with(port)
}
fn subdelim<Input>() -> impl Parser<Input, Output = char>
where
Input: Stream<Token = char>,
{
let exclamation = combine::parser::char::char('!');
let dollar = combine::parser::char::char('$');
let and = combine::parser::char::char('&');
let apostrophe = combine::parser::char::char('\'');
let open_paren = combine::parser::char::char('(');
let close_paren = combine::parser::char::char(')');
let asterisk = combine::parser::char::char('*');
let plus = combine::parser::char::char('+');
let comma = combine::parser::char::char(',');
let semi_colon = combine::parser::char::char(';');
let equal = combine::parser::char::char('=');
choice((
exclamation,
dollar,
and,
apostrophe,
open_paren,
close_paren,
asterisk,
plus,
comma,
semi_colon,
equal,
))
}
fn unreserved<Input>() -> impl Parser<Input, Output = char>
where
Input: Stream<Token = char>,
{
let dash = combine::parser::char::char('-');
let dot = combine::parser::char::char('.');
let underscore = combine::parser::char::char('_');
let tilde = combine::parser::char::char('~');
choice((dash, dot, underscore, tilde, alpha_num()))
}
fn pct_encoded<Input>() -> impl Parser<Input, Output = String>
where
Input: Stream<Token = char>,
{
combine::parser::char::char('%')
.and(hex_digit())
.and(hex_digit())
.map(|((s1, s2), s3)| {
let mut s = String::new();
s.push(s1);
s.push(s2);
s.push(s3);
s
})
}
fn pchar<Input>() -> impl Parser<Input, Output = String>
where
Input: Stream<Token = char>,
{
choice((
unreserved().map(String::from),
pct_encoded(),
subdelim().map(String::from),
combine::parser::char::char(':').map(String::from),
combine::parser::char::char('@').map(String::from),
))
}
fn path<Input>() -> impl Parser<Input, Output = Path>
where
Input: Stream<Token = char>,
{
let slash = combine::parser::char::char('/');
let segment = many(pchar());
let path_part = slash
.and(segment)
.map(|(slash, segment): (char, String)| String::new() + &slash.to_string() + &segment);
many1(path_part).map(Path)
}
fn question<Input>() -> impl Parser<Input, Output = char>
where
Input: Stream<Token = char>,
{
combine::parser::char::char('?')
}
fn query<Input>() -> impl Parser<Input, Output = Query>
where
Input: Stream<Token = char>,
{
let query_char = choice((
pchar(),
combine::parser::char::char('/').map(String::from),
question().map(String::from),
));
question().with(many(query_char)).map(Query)
}
fn fragment<Input>() -> impl Parser<Input, Output = Fragment>
where
Input: Stream<Token = char>,
{
let hash = combine::parser::char::char('#');
let fragment_char = choice((
pchar(),
combine::parser::char::char('/').map(String::from),
combine::parser::char::char('?').map(String::from),
));
hash.with(many(fragment_char)).map(Fragment)
}
pub(crate) fn url<Input>() -> impl Parser<Input, Output = Url>
where
Input: Stream<Token = char>,
{
scheme()
.and(domain())
.and(optional(port()))
.and(optional(path()))
.and(optional(query()))
.and(optional(fragment()))
.map(
|(((((scheme, domain), port), path), query), fragment)| Url {
scheme,
domain,
port,
path,
query,
fragment,
},
)
}
impl std::fmt::Display for Scheme {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
match self {
Scheme::Http => write!(f, "http"),
Scheme::Https => write!(f, "https"),
}
}
}
impl std::fmt::Display for Url {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
write!(f, "{}://", self.scheme)?;
write!(f, "{}", self.domain.0)?;
if let Some(port) = &self.port {
write!(f, ":{}", port.0)?;
}
if let Some(path) = &self.path {
write!(f, "{}", path.0)?;
}
if let Some(query) = &self.query {
write!(f, "?{}", query.0)?;
}
if let Some(fragment) = &self.fragment {
write!(f, "#{}", fragment.0)?;
}
Ok(())
}
}
#[cfg(test)]
mod tests {
use super::*;
use combine::EasyParser;
#[test]
fn parse_https() {
let (_, rest) = scheme().easy_parse("https://").unwrap();
assert_eq!(rest, "");
}
#[test]
fn parse_http() {
let (_, rest) = scheme().easy_parse("http://").unwrap();
assert_eq!(rest, "");
}
#[test]
fn parse_domain_text() {
let (_, rest) = domain_text().easy_parse("hyaenidae-3").unwrap();
assert_eq!(rest, "");
}
#[test]
fn parse_simple_domain() {
let (_, rest) = domain().easy_parse("example.com").unwrap();
assert_eq!(rest, "");
}
#[test]
fn parse_single_domain() {
let (_, rest) = domain().easy_parse("hyaenidae-3").unwrap();
assert_eq!(rest, "");
}
#[test]
fn parse_subdomains() {
let (_, rest) = domain().easy_parse("one.two.three.four").unwrap();
assert_eq!(rest, "");
}
#[test]
fn parse_single_slash() {
let (_, rest) = path().easy_parse("/").unwrap();
assert_eq!(rest, "");
}
#[test]
fn parse_double_slash() {
let (_, rest) = path().easy_parse("//").unwrap();
assert_eq!(rest, "");
}
#[test]
fn parse_deep_path() {
let (_, rest) = path().easy_parse("/one/two/three/four").unwrap();
assert_eq!(rest, "");
}
#[test]
fn dont_parse_invalid_path() {
assert!(path().easy_parse("asdf").is_err());
}
#[test]
fn parse_empty_query() {
query().easy_parse("?").unwrap();
}
#[test]
fn parse_long_query() {
let (_, rest) = query().easy_parse("?one=two&three=four").unwrap();
assert_eq!(rest, "");
}
#[test]
fn dont_parse_invalid_query() {
assert!(query().easy_parse("asdf").is_err());
}
#[test]
fn parse_empty_fragment() {
let (_, rest) = fragment().easy_parse("#").unwrap();
assert_eq!(rest, "");
}
#[test]
fn parse_long_fragment() {
let (_, rest) = fragment().easy_parse("#asdf-5").unwrap();
assert_eq!(rest, "");
}
#[test]
fn dont_parse_invalid_fragment() {
assert!(fragment().easy_parse("asdf").is_err());
}
#[test]
fn parse_example_com() {
let (_, rest) = url().easy_parse("http://example.com").unwrap();
assert_eq!(rest, "");
}
#[test]
fn parse_extended_example_com() {
let (_, rest) = url()
.easy_parse("https://www.example.com/path/part?query=hi#fragment")
.unwrap();
assert_eq!(rest, "");
}
#[test]
fn round_trip_example_com() {
let (url, rest) = url().easy_parse("http://example.com").unwrap();
let url_str = url.to_string();
assert_eq!(rest, "");
assert_eq!(url_str, "http://example.com")
}
#[test]
fn round_trip_extended_example_com() {
let (url, rest) = url()
.easy_parse("https://www.example.com/path/part?query=hi#fragment")
.unwrap();
let url_str = url.to_string();
assert_eq!(rest, "");
assert_eq!(
url_str,
"https://www.example.com/path/part?query=hi#fragment"
);
}
}