path-gen/flee/src/lib.rs
2022-05-15 23:30:30 -05:00

237 lines
6.9 KiB
Rust

use std::borrow::{Borrow, Cow};
fn rfind(s: &[u8], item: u8) -> Option<usize> {
s.iter()
.enumerate()
.rev()
.find(|(_, elem)| **elem == item)
.map(|(index, _)| index)
}
fn find_any(s: &[u8], items: &[u8]) -> Option<usize> {
s.iter()
.enumerate()
.find(|(_, elem)| items.contains(*elem))
.map(|(index, _)| index)
}
fn find_exact(s: &[u8], pattern: &[u8]) -> Option<usize> {
s.windows(pattern.len())
.enumerate()
.find(|(_, elem)| *elem == pattern)
.map(|(index, _)| index)
}
fn join<I, T, U>(iterator: I, joiner: &[U]) -> Vec<U>
where
I: Iterator<Item = T>,
T: Borrow<[U]>,
U: Clone,
{
iterator
.fold((true, Vec::new()), |(first, mut vec), elem| {
if first {
vec.extend_from_slice(&elem.borrow());
} else {
vec.extend_from_slice(joiner);
vec.extend_from_slice(&elem.borrow());
}
(false, vec)
})
.1
}
struct Split<'a, 'b>(Option<&'a [u8]>, &'b [u8]);
impl<'a, 'b> Iterator for Split<'a, 'b> {
type Item = &'a [u8];
fn next(&mut self) -> Option<Self::Item> {
if let Some(source) = self.0.take() {
if let Some(index) = find_exact(source, self.1) {
let (left, right) = source.split_at(index);
self.0 = Some(&right[self.1.len()..]);
return Some(left);
} else {
return Some(source);
}
}
None
}
}
fn split<'a, 'b>(s: &'a [u8], items: &'b [u8]) -> Split<'a, 'b> {
Split(Some(s), items)
}
fn do_escape<'a>(s: &'a [u8], escaped_codes: &[u8], escape_token: u8) -> Cow<'a, [u8]> {
match escaped_codes {
[] => Cow::Borrowed(s),
[first, rest @ ..] => Cow::Owned(join(
split(s, &[*first]).map(|part| do_escape(part, rest, escape_token)),
&[escape_token, *first],
)),
}
}
pub fn escape<'a>(s: &'a [u8], delimiter: u8, escape_token: u8) -> Cow<'a, [u8]> {
let escaped_codes = &[escape_token, delimiter];
if find_any(&s, escaped_codes).is_some() {
return do_escape(s, escaped_codes, escape_token);
}
Cow::Borrowed(s)
}
fn do_unescape(s: &[u8], escaped_codes: &[u8], escape_token: u8) -> Vec<u8> {
match escaped_codes {
[] => s.to_vec(),
[first, rest @ ..] => join(
split(s, &[escape_token, *first]).map(|part| do_unescape(part, rest, escape_token)),
&[*first],
),
}
}
pub fn unescape<'a>(s: &'a [u8], delimiter: u8, escape_token: u8) -> Cow<'a, [u8]> {
let escaped_codes = &[escape_token, delimiter];
if find_any(s, escaped_codes).is_some() {
return Cow::Owned(do_unescape(s, escaped_codes, escape_token));
}
Cow::Borrowed(s)
}
pub fn rsplit_once_escaped<'a>(
s: &'a [u8],
delimiter: u8,
escape_token: u8,
) -> Option<(&'a [u8], &'a [u8])> {
let mut position: usize = s.len();
while let Some(index) = rfind(&s[..position], delimiter) {
let mut escaped = false;
for prev in (0..index).rev() {
if s.get(prev..prev + 1) != Some(&[escape_token]) {
break;
}
escaped = !escaped;
}
if !escaped {
let (left, right) = s.split_at(index);
return Some((left, &right[1..]));
}
position = index;
}
None
}
#[cfg(test)]
mod tests {
use super::{escape, rsplit_once_escaped, unescape};
const DELIMITER: u8 = b'.';
const ESCAPE_TOKEN: u8 = b'\\';
#[test]
fn escape_output() {
let inputs: [(&[u8], &[u8]); 13] = [
(
b"..\\.\\.\\..\\\\\\...",
b"\\.\\.\\\\\\.\\\\\\.\\\\\\.\\.\\\\\\\\\\\\\\.\\.\\.",
),
(b".", b"\\."),
(b"\\", b"\\\\"),
(b"\\.", b"\\\\\\."),
(b".\\", b"\\.\\\\"),
(b".\\\\\\", b"\\.\\\\\\\\\\\\"),
(b"\\\\\\.", b"\\\\\\\\\\\\\\."),
(b"...\\", b"\\.\\.\\.\\\\"),
(b"\\...", b"\\\\\\.\\.\\."),
(b"Some text with a . in it", b"Some text with a \\. in it"),
(b"Some text with a \\ in it", b"Some text with a \\\\ in it"),
(
b"Some text with a \\ and a . in it",
b"Some text with a \\\\ and a \\. in it",
),
(
b"Some text with a . and a \\ in it",
b"Some text with a \\. and a \\\\ in it",
),
];
for (start, escaped) in inputs {
assert_eq!(escape(start, DELIMITER, ESCAPE_TOKEN).as_ref(), escaped);
assert_eq!(unescape(escaped, DELIMITER, ESCAPE_TOKEN).as_ref(), start);
}
}
#[test]
fn splits_on_dot() {
let input = b"left of dot . right of dot";
let (left, right) = rsplit_once_escaped(input, b'.', b'\\').unwrap();
assert_eq!(left, b"left of dot ");
assert_eq!(right, b" right of dot");
}
#[test]
fn splits_on_ending_dot() {
let input = b"left of dot .";
let (left, right) = rsplit_once_escaped(input, b'.', b'\\').unwrap();
assert_eq!(left, b"left of dot ");
assert_eq!(right, b"");
}
#[test]
fn splits_on_starting_dot() {
let input = b". right of dot";
let (left, right) = rsplit_once_escaped(input, b'.', b'\\').unwrap();
assert_eq!(left, b"");
assert_eq!(right, b" right of dot");
}
#[test]
fn splits_last_dot() {
let input = b"left of dots . between dots . right of dots";
let (left, right) = rsplit_once_escaped(input, b'.', b'\\').unwrap();
assert_eq!(left, b"left of dots . between dots ");
assert_eq!(right, b" right of dots");
}
#[test]
fn doesnt_split_escaped_dot() {
let input = b"left of dot \\. right of dot";
let opt = rsplit_once_escaped(input, b'.', b'\\');
assert!(opt.is_none());
}
#[test]
fn splits_unescaped_dot_with_preceding_escaped_dots() {
let input = b"left of dots . between dots \\. right of dots";
let (left, right) = rsplit_once_escaped(input, b'.', b'\\').unwrap();
assert_eq!(left, b"left of dots ");
assert_eq!(right, b" between dots \\. right of dots");
}
#[test]
fn splits_unescaped_dot_with_preceding_backslashes() {
let input = b"left of dots . between dots \\\\. right of dots";
let (left, right) = rsplit_once_escaped(input, b'.', b'\\').unwrap();
assert_eq!(left, b"left of dots . between dots \\\\");
assert_eq!(right, b" right of dots");
}
#[test]
fn splits_unescaped_dot_with_preceding_escaped_dots_with_preceding_backslashes() {
let input = b"left of dots . between dots \\\\\\. right of dots";
let (left, right) = rsplit_once_escaped(input, b'.', b'\\').unwrap();
assert_eq!(left, b"left of dots ");
assert_eq!(right, b" between dots \\\\\\. right of dots");
}
}