237 lines
6.9 KiB
Rust
237 lines
6.9 KiB
Rust
use std::borrow::{Borrow, Cow};
|
|
|
|
fn rfind(s: &[u8], item: u8) -> Option<usize> {
|
|
s.iter()
|
|
.enumerate()
|
|
.rev()
|
|
.find(|(_, elem)| **elem == item)
|
|
.map(|(index, _)| index)
|
|
}
|
|
|
|
fn find_any(s: &[u8], items: &[u8]) -> Option<usize> {
|
|
s.iter()
|
|
.enumerate()
|
|
.find(|(_, elem)| items.contains(*elem))
|
|
.map(|(index, _)| index)
|
|
}
|
|
|
|
fn find_exact(s: &[u8], pattern: &[u8]) -> Option<usize> {
|
|
s.windows(pattern.len())
|
|
.enumerate()
|
|
.find(|(_, elem)| *elem == pattern)
|
|
.map(|(index, _)| index)
|
|
}
|
|
|
|
fn join<I, T, U>(iterator: I, joiner: &[U]) -> Vec<U>
|
|
where
|
|
I: Iterator<Item = T>,
|
|
T: Borrow<[U]>,
|
|
U: Clone,
|
|
{
|
|
iterator
|
|
.fold((true, Vec::new()), |(first, mut vec), elem| {
|
|
if first {
|
|
vec.extend_from_slice(&elem.borrow());
|
|
} else {
|
|
vec.extend_from_slice(joiner);
|
|
vec.extend_from_slice(&elem.borrow());
|
|
}
|
|
(false, vec)
|
|
})
|
|
.1
|
|
}
|
|
|
|
struct Split<'a, 'b>(Option<&'a [u8]>, &'b [u8]);
|
|
|
|
impl<'a, 'b> Iterator for Split<'a, 'b> {
|
|
type Item = &'a [u8];
|
|
|
|
fn next(&mut self) -> Option<Self::Item> {
|
|
if let Some(source) = self.0.take() {
|
|
if let Some(index) = find_exact(source, self.1) {
|
|
let (left, right) = source.split_at(index);
|
|
self.0 = Some(&right[self.1.len()..]);
|
|
return Some(left);
|
|
} else {
|
|
return Some(source);
|
|
}
|
|
}
|
|
|
|
None
|
|
}
|
|
}
|
|
|
|
fn split<'a, 'b>(s: &'a [u8], items: &'b [u8]) -> Split<'a, 'b> {
|
|
Split(Some(s), items)
|
|
}
|
|
|
|
fn do_escape<'a>(s: &'a [u8], escaped_codes: &[u8], escape_token: u8) -> Cow<'a, [u8]> {
|
|
match escaped_codes {
|
|
[] => Cow::Borrowed(s),
|
|
[first, rest @ ..] => Cow::Owned(join(
|
|
split(s, &[*first]).map(|part| do_escape(part, rest, escape_token)),
|
|
&[escape_token, *first],
|
|
)),
|
|
}
|
|
}
|
|
|
|
pub fn escape<'a>(s: &'a [u8], delimiter: u8, escape_token: u8) -> Cow<'a, [u8]> {
|
|
let escaped_codes = &[escape_token, delimiter];
|
|
if find_any(&s, escaped_codes).is_some() {
|
|
return do_escape(s, escaped_codes, escape_token);
|
|
}
|
|
|
|
Cow::Borrowed(s)
|
|
}
|
|
|
|
fn do_unescape(s: &[u8], escaped_codes: &[u8], escape_token: u8) -> Vec<u8> {
|
|
match escaped_codes {
|
|
[] => s.to_vec(),
|
|
[first, rest @ ..] => join(
|
|
split(s, &[escape_token, *first]).map(|part| do_unescape(part, rest, escape_token)),
|
|
&[*first],
|
|
),
|
|
}
|
|
}
|
|
|
|
pub fn unescape<'a>(s: &'a [u8], delimiter: u8, escape_token: u8) -> Cow<'a, [u8]> {
|
|
let escaped_codes = &[escape_token, delimiter];
|
|
if find_any(s, escaped_codes).is_some() {
|
|
return Cow::Owned(do_unescape(s, escaped_codes, escape_token));
|
|
}
|
|
|
|
Cow::Borrowed(s)
|
|
}
|
|
|
|
pub fn rsplit_once_escaped<'a>(
|
|
s: &'a [u8],
|
|
delimiter: u8,
|
|
escape_token: u8,
|
|
) -> Option<(&'a [u8], &'a [u8])> {
|
|
let mut position: usize = s.len();
|
|
|
|
while let Some(index) = rfind(&s[..position], delimiter) {
|
|
let mut escaped = false;
|
|
for prev in (0..index).rev() {
|
|
if s.get(prev..prev + 1) != Some(&[escape_token]) {
|
|
break;
|
|
}
|
|
|
|
escaped = !escaped;
|
|
}
|
|
|
|
if !escaped {
|
|
let (left, right) = s.split_at(index);
|
|
return Some((left, &right[1..]));
|
|
}
|
|
|
|
position = index;
|
|
}
|
|
|
|
None
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::{escape, rsplit_once_escaped, unescape};
|
|
|
|
const DELIMITER: u8 = b'.';
|
|
const ESCAPE_TOKEN: u8 = b'\\';
|
|
|
|
#[test]
|
|
fn escape_output() {
|
|
let inputs: [(&[u8], &[u8]); 13] = [
|
|
(
|
|
b"..\\.\\.\\..\\\\\\...",
|
|
b"\\.\\.\\\\\\.\\\\\\.\\\\\\.\\.\\\\\\\\\\\\\\.\\.\\.",
|
|
),
|
|
(b".", b"\\."),
|
|
(b"\\", b"\\\\"),
|
|
(b"\\.", b"\\\\\\."),
|
|
(b".\\", b"\\.\\\\"),
|
|
(b".\\\\\\", b"\\.\\\\\\\\\\\\"),
|
|
(b"\\\\\\.", b"\\\\\\\\\\\\\\."),
|
|
(b"...\\", b"\\.\\.\\.\\\\"),
|
|
(b"\\...", b"\\\\\\.\\.\\."),
|
|
(b"Some text with a . in it", b"Some text with a \\. in it"),
|
|
(b"Some text with a \\ in it", b"Some text with a \\\\ in it"),
|
|
(
|
|
b"Some text with a \\ and a . in it",
|
|
b"Some text with a \\\\ and a \\. in it",
|
|
),
|
|
(
|
|
b"Some text with a . and a \\ in it",
|
|
b"Some text with a \\. and a \\\\ in it",
|
|
),
|
|
];
|
|
|
|
for (start, escaped) in inputs {
|
|
assert_eq!(escape(start, DELIMITER, ESCAPE_TOKEN).as_ref(), escaped);
|
|
assert_eq!(unescape(escaped, DELIMITER, ESCAPE_TOKEN).as_ref(), start);
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn splits_on_dot() {
|
|
let input = b"left of dot . right of dot";
|
|
let (left, right) = rsplit_once_escaped(input, b'.', b'\\').unwrap();
|
|
assert_eq!(left, b"left of dot ");
|
|
assert_eq!(right, b" right of dot");
|
|
}
|
|
|
|
#[test]
|
|
fn splits_on_ending_dot() {
|
|
let input = b"left of dot .";
|
|
let (left, right) = rsplit_once_escaped(input, b'.', b'\\').unwrap();
|
|
assert_eq!(left, b"left of dot ");
|
|
assert_eq!(right, b"");
|
|
}
|
|
|
|
#[test]
|
|
fn splits_on_starting_dot() {
|
|
let input = b". right of dot";
|
|
let (left, right) = rsplit_once_escaped(input, b'.', b'\\').unwrap();
|
|
assert_eq!(left, b"");
|
|
assert_eq!(right, b" right of dot");
|
|
}
|
|
|
|
#[test]
|
|
fn splits_last_dot() {
|
|
let input = b"left of dots . between dots . right of dots";
|
|
let (left, right) = rsplit_once_escaped(input, b'.', b'\\').unwrap();
|
|
assert_eq!(left, b"left of dots . between dots ");
|
|
assert_eq!(right, b" right of dots");
|
|
}
|
|
|
|
#[test]
|
|
fn doesnt_split_escaped_dot() {
|
|
let input = b"left of dot \\. right of dot";
|
|
let opt = rsplit_once_escaped(input, b'.', b'\\');
|
|
assert!(opt.is_none());
|
|
}
|
|
|
|
#[test]
|
|
fn splits_unescaped_dot_with_preceding_escaped_dots() {
|
|
let input = b"left of dots . between dots \\. right of dots";
|
|
let (left, right) = rsplit_once_escaped(input, b'.', b'\\').unwrap();
|
|
assert_eq!(left, b"left of dots ");
|
|
assert_eq!(right, b" between dots \\. right of dots");
|
|
}
|
|
|
|
#[test]
|
|
fn splits_unescaped_dot_with_preceding_backslashes() {
|
|
let input = b"left of dots . between dots \\\\. right of dots";
|
|
let (left, right) = rsplit_once_escaped(input, b'.', b'\\').unwrap();
|
|
assert_eq!(left, b"left of dots . between dots \\\\");
|
|
assert_eq!(right, b" right of dots");
|
|
}
|
|
|
|
#[test]
|
|
fn splits_unescaped_dot_with_preceding_escaped_dots_with_preceding_backslashes() {
|
|
let input = b"left of dots . between dots \\\\\\. right of dots";
|
|
let (left, right) = rsplit_once_escaped(input, b'.', b'\\').unwrap();
|
|
assert_eq!(left, b"left of dots ");
|
|
assert_eq!(right, b" between dots \\\\\\. right of dots");
|
|
}
|
|
}
|