asonix
0e1552eeaa
This gives us more control over things, like automatic 'linkifying', and the ability to add custom user tagging logic
402 lines
9.5 KiB
Rust
402 lines
9.5 KiB
Rust
use combine::{
|
|
choice, many, many1, optional,
|
|
parser::char::{alpha_num, digit, hex_digit, string},
|
|
Parser, Stream,
|
|
};
|
|
|
|
#[derive(Clone, Copy, Debug)]
|
|
enum Scheme {
|
|
Http,
|
|
Https,
|
|
}
|
|
|
|
#[derive(Clone, Debug)]
|
|
pub(crate) struct Domain(pub(crate) String);
|
|
|
|
#[derive(Clone, Debug)]
|
|
struct Port(String);
|
|
|
|
#[derive(Clone, Debug)]
|
|
struct Path(String);
|
|
|
|
#[derive(Clone, Debug)]
|
|
struct Query(String);
|
|
|
|
#[derive(Clone, Debug)]
|
|
struct Fragment(String);
|
|
|
|
#[derive(Clone, Debug)]
|
|
pub struct Url {
|
|
scheme: Scheme,
|
|
domain: Domain,
|
|
port: Option<Port>,
|
|
path: Option<Path>,
|
|
query: Option<Query>,
|
|
fragment: Option<Fragment>,
|
|
}
|
|
|
|
fn scheme<Input>() -> impl Parser<Input, Output = Scheme>
|
|
where
|
|
Input: Stream<Token = char>,
|
|
{
|
|
let http = string("http");
|
|
let s = combine::parser::char::char('s');
|
|
|
|
let https = http.and(optional(s)).map(|(_, c)| {
|
|
if c.is_some() {
|
|
Scheme::Https
|
|
} else {
|
|
Scheme::Http
|
|
}
|
|
});
|
|
|
|
let separator = string("://");
|
|
|
|
https.skip(separator)
|
|
}
|
|
|
|
fn domain_text<Input>() -> impl Parser<Input, Output = String>
|
|
where
|
|
Input: Stream<Token = char>,
|
|
{
|
|
let domain_char = alpha_num().or(combine::parser::char::char('-'));
|
|
many1(domain_char)
|
|
}
|
|
|
|
pub(crate) fn domain<Input>() -> impl Parser<Input, Output = Domain>
|
|
where
|
|
Input: Stream<Token = char>,
|
|
{
|
|
let domain_segment =
|
|
combine::parser::char::char('.')
|
|
.and(domain_text())
|
|
.map(|(c, s): (char, String)| {
|
|
let mut string = String::new();
|
|
string.push(c);
|
|
string += &s;
|
|
string
|
|
});
|
|
|
|
domain_text()
|
|
.and(many(domain_segment))
|
|
.map(|(first, rest): (String, String)| Domain(first + &rest))
|
|
}
|
|
|
|
fn port<Input>() -> impl Parser<Input, Output = Port>
|
|
where
|
|
Input: Stream<Token = char>,
|
|
{
|
|
let colon = combine::parser::char::char(':');
|
|
let port = many1(digit()).map(Port);
|
|
|
|
colon.with(port)
|
|
}
|
|
|
|
fn subdelim<Input>() -> impl Parser<Input, Output = char>
|
|
where
|
|
Input: Stream<Token = char>,
|
|
{
|
|
let exclamation = combine::parser::char::char('!');
|
|
let dollar = combine::parser::char::char('$');
|
|
let and = combine::parser::char::char('&');
|
|
let apostrophe = combine::parser::char::char('\'');
|
|
let open_paren = combine::parser::char::char('(');
|
|
let close_paren = combine::parser::char::char(')');
|
|
let asterisk = combine::parser::char::char('*');
|
|
let plus = combine::parser::char::char('+');
|
|
let comma = combine::parser::char::char(',');
|
|
let semi_colon = combine::parser::char::char(';');
|
|
let equal = combine::parser::char::char('=');
|
|
|
|
choice((
|
|
exclamation,
|
|
dollar,
|
|
and,
|
|
apostrophe,
|
|
open_paren,
|
|
close_paren,
|
|
asterisk,
|
|
plus,
|
|
comma,
|
|
semi_colon,
|
|
equal,
|
|
))
|
|
}
|
|
|
|
fn unreserved<Input>() -> impl Parser<Input, Output = char>
|
|
where
|
|
Input: Stream<Token = char>,
|
|
{
|
|
let dash = combine::parser::char::char('-');
|
|
let dot = combine::parser::char::char('.');
|
|
let underscore = combine::parser::char::char('_');
|
|
let tilde = combine::parser::char::char('~');
|
|
|
|
choice((dash, dot, underscore, tilde, alpha_num()))
|
|
}
|
|
|
|
fn pct_encoded<Input>() -> impl Parser<Input, Output = String>
|
|
where
|
|
Input: Stream<Token = char>,
|
|
{
|
|
combine::parser::char::char('%')
|
|
.and(hex_digit())
|
|
.and(hex_digit())
|
|
.map(|((s1, s2), s3)| {
|
|
let mut s = String::new();
|
|
s.push(s1);
|
|
s.push(s2);
|
|
s.push(s3);
|
|
s
|
|
})
|
|
}
|
|
|
|
fn pchar<Input>() -> impl Parser<Input, Output = String>
|
|
where
|
|
Input: Stream<Token = char>,
|
|
{
|
|
choice((
|
|
unreserved().map(String::from),
|
|
pct_encoded(),
|
|
subdelim().map(String::from),
|
|
combine::parser::char::char(':').map(String::from),
|
|
combine::parser::char::char('@').map(String::from),
|
|
))
|
|
}
|
|
|
|
fn path<Input>() -> impl Parser<Input, Output = Path>
|
|
where
|
|
Input: Stream<Token = char>,
|
|
{
|
|
let slash = combine::parser::char::char('/');
|
|
|
|
let segment = many(pchar());
|
|
|
|
let path_part = slash
|
|
.and(segment)
|
|
.map(|(slash, segment): (char, String)| String::new() + &slash.to_string() + &segment);
|
|
|
|
many1(path_part).map(Path)
|
|
}
|
|
|
|
fn question<Input>() -> impl Parser<Input, Output = char>
|
|
where
|
|
Input: Stream<Token = char>,
|
|
{
|
|
combine::parser::char::char('?')
|
|
}
|
|
|
|
fn query<Input>() -> impl Parser<Input, Output = Query>
|
|
where
|
|
Input: Stream<Token = char>,
|
|
{
|
|
let query_char = choice((
|
|
pchar(),
|
|
combine::parser::char::char('/').map(String::from),
|
|
question().map(String::from),
|
|
));
|
|
|
|
question().with(many(query_char)).map(Query)
|
|
}
|
|
|
|
fn fragment<Input>() -> impl Parser<Input, Output = Fragment>
|
|
where
|
|
Input: Stream<Token = char>,
|
|
{
|
|
let hash = combine::parser::char::char('#');
|
|
|
|
let fragment_char = choice((
|
|
pchar(),
|
|
combine::parser::char::char('/').map(String::from),
|
|
combine::parser::char::char('?').map(String::from),
|
|
));
|
|
|
|
hash.with(many(fragment_char)).map(Fragment)
|
|
}
|
|
|
|
pub(crate) fn url<Input>() -> impl Parser<Input, Output = Url>
|
|
where
|
|
Input: Stream<Token = char>,
|
|
{
|
|
scheme()
|
|
.and(domain())
|
|
.and(optional(port()))
|
|
.and(optional(path()))
|
|
.and(optional(query()))
|
|
.and(optional(fragment()))
|
|
.map(
|
|
|(((((scheme, domain), port), path), query), fragment)| Url {
|
|
scheme,
|
|
domain,
|
|
port,
|
|
path,
|
|
query,
|
|
fragment,
|
|
},
|
|
)
|
|
}
|
|
|
|
impl std::fmt::Display for Scheme {
|
|
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
|
match self {
|
|
Scheme::Http => write!(f, "http"),
|
|
Scheme::Https => write!(f, "https"),
|
|
}
|
|
}
|
|
}
|
|
|
|
impl std::fmt::Display for Url {
|
|
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
|
write!(f, "{}://", self.scheme)?;
|
|
write!(f, "{}", self.domain.0)?;
|
|
|
|
if let Some(port) = &self.port {
|
|
write!(f, ":{}", port.0)?;
|
|
}
|
|
if let Some(path) = &self.path {
|
|
write!(f, "{}", path.0)?;
|
|
}
|
|
if let Some(query) = &self.query {
|
|
write!(f, "?{}", query.0)?;
|
|
}
|
|
if let Some(fragment) = &self.fragment {
|
|
write!(f, "#{}", fragment.0)?;
|
|
}
|
|
|
|
Ok(())
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
use combine::EasyParser;
|
|
|
|
#[test]
|
|
fn parse_https() {
|
|
let (_, rest) = scheme().easy_parse("https://").unwrap();
|
|
assert_eq!(rest, "");
|
|
}
|
|
|
|
#[test]
|
|
fn parse_http() {
|
|
let (_, rest) = scheme().easy_parse("http://").unwrap();
|
|
assert_eq!(rest, "");
|
|
}
|
|
|
|
#[test]
|
|
fn parse_domain_text() {
|
|
let (_, rest) = domain_text().easy_parse("hyaenidae-3").unwrap();
|
|
assert_eq!(rest, "");
|
|
}
|
|
|
|
#[test]
|
|
fn parse_simple_domain() {
|
|
let (_, rest) = domain().easy_parse("example.com").unwrap();
|
|
assert_eq!(rest, "");
|
|
}
|
|
|
|
#[test]
|
|
fn parse_single_domain() {
|
|
let (_, rest) = domain().easy_parse("hyaenidae-3").unwrap();
|
|
assert_eq!(rest, "");
|
|
}
|
|
|
|
#[test]
|
|
fn parse_subdomains() {
|
|
let (_, rest) = domain().easy_parse("one.two.three.four").unwrap();
|
|
assert_eq!(rest, "");
|
|
}
|
|
|
|
#[test]
|
|
fn parse_single_slash() {
|
|
let (_, rest) = path().easy_parse("/").unwrap();
|
|
assert_eq!(rest, "");
|
|
}
|
|
|
|
#[test]
|
|
fn parse_double_slash() {
|
|
let (_, rest) = path().easy_parse("//").unwrap();
|
|
assert_eq!(rest, "");
|
|
}
|
|
|
|
#[test]
|
|
fn parse_deep_path() {
|
|
let (_, rest) = path().easy_parse("/one/two/three/four").unwrap();
|
|
assert_eq!(rest, "");
|
|
}
|
|
|
|
#[test]
|
|
fn dont_parse_invalid_path() {
|
|
assert!(path().easy_parse("asdf").is_err());
|
|
}
|
|
|
|
#[test]
|
|
fn parse_empty_query() {
|
|
query().easy_parse("?").unwrap();
|
|
}
|
|
|
|
#[test]
|
|
fn parse_long_query() {
|
|
let (_, rest) = query().easy_parse("?one=two&three=four").unwrap();
|
|
assert_eq!(rest, "");
|
|
}
|
|
|
|
#[test]
|
|
fn dont_parse_invalid_query() {
|
|
assert!(query().easy_parse("asdf").is_err());
|
|
}
|
|
|
|
#[test]
|
|
fn parse_empty_fragment() {
|
|
let (_, rest) = fragment().easy_parse("#").unwrap();
|
|
assert_eq!(rest, "");
|
|
}
|
|
|
|
#[test]
|
|
fn parse_long_fragment() {
|
|
let (_, rest) = fragment().easy_parse("#asdf-5").unwrap();
|
|
assert_eq!(rest, "");
|
|
}
|
|
|
|
#[test]
|
|
fn dont_parse_invalid_fragment() {
|
|
assert!(fragment().easy_parse("asdf").is_err());
|
|
}
|
|
|
|
#[test]
|
|
fn parse_example_com() {
|
|
let (_, rest) = url().easy_parse("http://example.com").unwrap();
|
|
assert_eq!(rest, "");
|
|
}
|
|
|
|
#[test]
|
|
fn parse_extended_example_com() {
|
|
let (_, rest) = url()
|
|
.easy_parse("https://www.example.com/path/part?query=hi#fragment")
|
|
.unwrap();
|
|
assert_eq!(rest, "");
|
|
}
|
|
|
|
#[test]
|
|
fn round_trip_example_com() {
|
|
let (url, rest) = url().easy_parse("http://example.com").unwrap();
|
|
let url_str = url.to_string();
|
|
assert_eq!(rest, "");
|
|
assert_eq!(url_str, "http://example.com")
|
|
}
|
|
|
|
#[test]
|
|
fn round_trip_extended_example_com() {
|
|
let (url, rest) = url()
|
|
.easy_parse("https://www.example.com/path/part?query=hi#fragment")
|
|
.unwrap();
|
|
let url_str = url.to_string();
|
|
assert_eq!(rest, "");
|
|
assert_eq!(
|
|
url_str,
|
|
"https://www.example.com/path/part?query=hi#fragment"
|
|
);
|
|
}
|
|
}
|