hyaenidae/content/src/url.rs
asonix 0e1552eeaa Content: Use own bbcode impl
This gives us more control over things, like
automatic 'linkifying', and the ability to add
custom user tagging logic
2021-01-31 13:50:34 -06:00

402 lines
9.5 KiB
Rust

use combine::{
choice, many, many1, optional,
parser::char::{alpha_num, digit, hex_digit, string},
Parser, Stream,
};
#[derive(Clone, Copy, Debug)]
enum Scheme {
Http,
Https,
}
#[derive(Clone, Debug)]
pub(crate) struct Domain(pub(crate) String);
#[derive(Clone, Debug)]
struct Port(String);
#[derive(Clone, Debug)]
struct Path(String);
#[derive(Clone, Debug)]
struct Query(String);
#[derive(Clone, Debug)]
struct Fragment(String);
#[derive(Clone, Debug)]
pub struct Url {
scheme: Scheme,
domain: Domain,
port: Option<Port>,
path: Option<Path>,
query: Option<Query>,
fragment: Option<Fragment>,
}
fn scheme<Input>() -> impl Parser<Input, Output = Scheme>
where
Input: Stream<Token = char>,
{
let http = string("http");
let s = combine::parser::char::char('s');
let https = http.and(optional(s)).map(|(_, c)| {
if c.is_some() {
Scheme::Https
} else {
Scheme::Http
}
});
let separator = string("://");
https.skip(separator)
}
fn domain_text<Input>() -> impl Parser<Input, Output = String>
where
Input: Stream<Token = char>,
{
let domain_char = alpha_num().or(combine::parser::char::char('-'));
many1(domain_char)
}
pub(crate) fn domain<Input>() -> impl Parser<Input, Output = Domain>
where
Input: Stream<Token = char>,
{
let domain_segment =
combine::parser::char::char('.')
.and(domain_text())
.map(|(c, s): (char, String)| {
let mut string = String::new();
string.push(c);
string += &s;
string
});
domain_text()
.and(many(domain_segment))
.map(|(first, rest): (String, String)| Domain(first + &rest))
}
fn port<Input>() -> impl Parser<Input, Output = Port>
where
Input: Stream<Token = char>,
{
let colon = combine::parser::char::char(':');
let port = many1(digit()).map(Port);
colon.with(port)
}
fn subdelim<Input>() -> impl Parser<Input, Output = char>
where
Input: Stream<Token = char>,
{
let exclamation = combine::parser::char::char('!');
let dollar = combine::parser::char::char('$');
let and = combine::parser::char::char('&');
let apostrophe = combine::parser::char::char('\'');
let open_paren = combine::parser::char::char('(');
let close_paren = combine::parser::char::char(')');
let asterisk = combine::parser::char::char('*');
let plus = combine::parser::char::char('+');
let comma = combine::parser::char::char(',');
let semi_colon = combine::parser::char::char(';');
let equal = combine::parser::char::char('=');
choice((
exclamation,
dollar,
and,
apostrophe,
open_paren,
close_paren,
asterisk,
plus,
comma,
semi_colon,
equal,
))
}
fn unreserved<Input>() -> impl Parser<Input, Output = char>
where
Input: Stream<Token = char>,
{
let dash = combine::parser::char::char('-');
let dot = combine::parser::char::char('.');
let underscore = combine::parser::char::char('_');
let tilde = combine::parser::char::char('~');
choice((dash, dot, underscore, tilde, alpha_num()))
}
fn pct_encoded<Input>() -> impl Parser<Input, Output = String>
where
Input: Stream<Token = char>,
{
combine::parser::char::char('%')
.and(hex_digit())
.and(hex_digit())
.map(|((s1, s2), s3)| {
let mut s = String::new();
s.push(s1);
s.push(s2);
s.push(s3);
s
})
}
fn pchar<Input>() -> impl Parser<Input, Output = String>
where
Input: Stream<Token = char>,
{
choice((
unreserved().map(String::from),
pct_encoded(),
subdelim().map(String::from),
combine::parser::char::char(':').map(String::from),
combine::parser::char::char('@').map(String::from),
))
}
fn path<Input>() -> impl Parser<Input, Output = Path>
where
Input: Stream<Token = char>,
{
let slash = combine::parser::char::char('/');
let segment = many(pchar());
let path_part = slash
.and(segment)
.map(|(slash, segment): (char, String)| String::new() + &slash.to_string() + &segment);
many1(path_part).map(Path)
}
fn question<Input>() -> impl Parser<Input, Output = char>
where
Input: Stream<Token = char>,
{
combine::parser::char::char('?')
}
fn query<Input>() -> impl Parser<Input, Output = Query>
where
Input: Stream<Token = char>,
{
let query_char = choice((
pchar(),
combine::parser::char::char('/').map(String::from),
question().map(String::from),
));
question().with(many(query_char)).map(Query)
}
fn fragment<Input>() -> impl Parser<Input, Output = Fragment>
where
Input: Stream<Token = char>,
{
let hash = combine::parser::char::char('#');
let fragment_char = choice((
pchar(),
combine::parser::char::char('/').map(String::from),
combine::parser::char::char('?').map(String::from),
));
hash.with(many(fragment_char)).map(Fragment)
}
pub(crate) fn url<Input>() -> impl Parser<Input, Output = Url>
where
Input: Stream<Token = char>,
{
scheme()
.and(domain())
.and(optional(port()))
.and(optional(path()))
.and(optional(query()))
.and(optional(fragment()))
.map(
|(((((scheme, domain), port), path), query), fragment)| Url {
scheme,
domain,
port,
path,
query,
fragment,
},
)
}
impl std::fmt::Display for Scheme {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
match self {
Scheme::Http => write!(f, "http"),
Scheme::Https => write!(f, "https"),
}
}
}
impl std::fmt::Display for Url {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
write!(f, "{}://", self.scheme)?;
write!(f, "{}", self.domain.0)?;
if let Some(port) = &self.port {
write!(f, ":{}", port.0)?;
}
if let Some(path) = &self.path {
write!(f, "{}", path.0)?;
}
if let Some(query) = &self.query {
write!(f, "?{}", query.0)?;
}
if let Some(fragment) = &self.fragment {
write!(f, "#{}", fragment.0)?;
}
Ok(())
}
}
#[cfg(test)]
mod tests {
use super::*;
use combine::EasyParser;
#[test]
fn parse_https() {
let (_, rest) = scheme().easy_parse("https://").unwrap();
assert_eq!(rest, "");
}
#[test]
fn parse_http() {
let (_, rest) = scheme().easy_parse("http://").unwrap();
assert_eq!(rest, "");
}
#[test]
fn parse_domain_text() {
let (_, rest) = domain_text().easy_parse("hyaenidae-3").unwrap();
assert_eq!(rest, "");
}
#[test]
fn parse_simple_domain() {
let (_, rest) = domain().easy_parse("example.com").unwrap();
assert_eq!(rest, "");
}
#[test]
fn parse_single_domain() {
let (_, rest) = domain().easy_parse("hyaenidae-3").unwrap();
assert_eq!(rest, "");
}
#[test]
fn parse_subdomains() {
let (_, rest) = domain().easy_parse("one.two.three.four").unwrap();
assert_eq!(rest, "");
}
#[test]
fn parse_single_slash() {
let (_, rest) = path().easy_parse("/").unwrap();
assert_eq!(rest, "");
}
#[test]
fn parse_double_slash() {
let (_, rest) = path().easy_parse("//").unwrap();
assert_eq!(rest, "");
}
#[test]
fn parse_deep_path() {
let (_, rest) = path().easy_parse("/one/two/three/four").unwrap();
assert_eq!(rest, "");
}
#[test]
fn dont_parse_invalid_path() {
assert!(path().easy_parse("asdf").is_err());
}
#[test]
fn parse_empty_query() {
query().easy_parse("?").unwrap();
}
#[test]
fn parse_long_query() {
let (_, rest) = query().easy_parse("?one=two&three=four").unwrap();
assert_eq!(rest, "");
}
#[test]
fn dont_parse_invalid_query() {
assert!(query().easy_parse("asdf").is_err());
}
#[test]
fn parse_empty_fragment() {
let (_, rest) = fragment().easy_parse("#").unwrap();
assert_eq!(rest, "");
}
#[test]
fn parse_long_fragment() {
let (_, rest) = fragment().easy_parse("#asdf-5").unwrap();
assert_eq!(rest, "");
}
#[test]
fn dont_parse_invalid_fragment() {
assert!(fragment().easy_parse("asdf").is_err());
}
#[test]
fn parse_example_com() {
let (_, rest) = url().easy_parse("http://example.com").unwrap();
assert_eq!(rest, "");
}
#[test]
fn parse_extended_example_com() {
let (_, rest) = url()
.easy_parse("https://www.example.com/path/part?query=hi#fragment")
.unwrap();
assert_eq!(rest, "");
}
#[test]
fn round_trip_example_com() {
let (url, rest) = url().easy_parse("http://example.com").unwrap();
let url_str = url.to_string();
assert_eq!(rest, "");
assert_eq!(url_str, "http://example.com")
}
#[test]
fn round_trip_extended_example_com() {
let (url, rest) = url()
.easy_parse("https://www.example.com/path/part?query=hi#fragment")
.unwrap();
let url_str = url.to_string();
assert_eq!(rest, "");
assert_eq!(
url_str,
"https://www.example.com/path/part?query=hi#fragment"
);
}
}