Move escaping logic to flee

This commit is contained in:
Aode (Lion) 2022-05-15 13:28:25 -05:00
parent d99576d929
commit 54b47da5a2
4 changed files with 260 additions and 210 deletions

View file

@ -10,3 +10,7 @@ test = []
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
flee = { path = "./flee" }
[workspace]
members = ["flee"]

8
flee/Cargo.toml Normal file
View file

@ -0,0 +1,8 @@
[package]
name = "flee"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]

236
flee/src/lib.rs Normal file
View file

@ -0,0 +1,236 @@
use std::borrow::{Borrow, Cow};
fn rfind(s: &[u8], item: u8) -> Option<usize> {
s.iter()
.enumerate()
.rev()
.find(|(_, elem)| **elem == item)
.map(|(index, _)| index)
}
fn find_any(s: &[u8], items: &[u8]) -> Option<usize> {
s.iter()
.enumerate()
.find(|(_, elem)| items.contains(*elem))
.map(|(index, _)| index)
}
fn find_exact(s: &[u8], pattern: &[u8]) -> Option<usize> {
s.windows(pattern.len())
.enumerate()
.find(|(_, elem)| *elem == pattern)
.map(|(index, _)| index)
}
fn join<I, T, U>(iterator: I, joiner: &[U]) -> Vec<U>
where
I: Iterator<Item = T>,
T: Borrow<[U]>,
U: Clone,
{
iterator
.fold((true, Vec::new()), |(first, mut vec), elem| {
if first {
vec.extend_from_slice(&elem.borrow());
} else {
vec.extend_from_slice(joiner);
vec.extend_from_slice(&elem.borrow());
}
(false, vec)
})
.1
}
struct Split<'a, 'b>(Option<&'a [u8]>, &'b [u8]);
impl<'a, 'b> Iterator for Split<'a, 'b> {
type Item = &'a [u8];
fn next(&mut self) -> Option<Self::Item> {
if let Some(source) = self.0.take() {
if let Some(index) = find_exact(source, self.1) {
let (left, right) = source.split_at(index);
self.0 = Some(&right[self.1.len()..]);
return Some(left);
} else {
return Some(source);
}
}
None
}
}
fn split<'a, 'b>(s: &'a [u8], items: &'b [u8]) -> Split<'a, 'b> {
Split(Some(s), items)
}
fn do_escape(s: &[u8], escaped_codes: &[u8], escape_token: u8) -> Vec<u8> {
match escaped_codes {
[] => s.to_vec(),
[first, rest @ ..] => join(
split(s, &[*first]).map(|part| do_escape(part, rest, escape_token)),
&[escape_token, *first],
),
}
}
pub fn escape<'a>(s: &'a [u8], delimiter: u8, escape_token: u8) -> Cow<'a, [u8]> {
let escaped_codes = &[escape_token, delimiter];
if find_any(&s, escaped_codes).is_some() {
return Cow::Owned(do_escape(s, escaped_codes, escape_token));
}
Cow::Borrowed(s)
}
fn do_unescape(s: &[u8], escaped_codes: &[u8], escape_token: u8) -> Vec<u8> {
match escaped_codes {
[] => s.to_vec(),
[first, rest @ ..] => join(
split(s, &[escape_token, *first]).map(|part| do_unescape(part, rest, escape_token)),
&[*first],
),
}
}
pub fn unescape<'a>(s: &'a [u8], delimiter: u8, escape_token: u8) -> Cow<'a, [u8]> {
let escaped_codes = &[escape_token, delimiter];
if find_any(s, escaped_codes).is_some() {
return Cow::Owned(do_unescape(s, escaped_codes, escape_token));
}
Cow::Borrowed(s)
}
pub fn rsplit_once_escaped<'a>(
s: &'a [u8],
delimiter: u8,
escape_token: u8,
) -> Option<(&'a [u8], &'a [u8])> {
let mut position: usize = s.len();
while let Some(index) = rfind(&s[..position], delimiter) {
let mut escaped = false;
for prev in (0..index).rev() {
if s.get(prev..prev + 1) != Some(&[escape_token]) {
break;
}
escaped = !escaped;
}
if !escaped {
let (left, right) = s.split_at(index);
return Some((left, &right[1..]));
}
position = index;
}
None
}
#[cfg(test)]
mod tests {
use super::{escape, rsplit_once_escaped, unescape};
const DELIMITER: u8 = b'.';
const ESCAPE_TOKEN: u8 = b'\\';
#[test]
fn escape_output() {
let inputs: [(&[u8], &[u8]); 13] = [
(
b"..\\.\\.\\..\\\\\\...",
b"\\.\\.\\\\\\.\\\\\\.\\\\\\.\\.\\\\\\\\\\\\\\.\\.\\.",
),
(b".", b"\\."),
(b"\\", b"\\\\"),
(b"\\.", b"\\\\\\."),
(b".\\", b"\\.\\\\"),
(b".\\\\\\", b"\\.\\\\\\\\\\\\"),
(b"\\\\\\.", b"\\\\\\\\\\\\\\."),
(b"...\\", b"\\.\\.\\.\\\\"),
(b"\\...", b"\\\\\\.\\.\\."),
(b"Some text with a . in it", b"Some text with a \\. in it"),
(b"Some text with a \\ in it", b"Some text with a \\\\ in it"),
(
b"Some text with a \\ and a . in it",
b"Some text with a \\\\ and a \\. in it",
),
(
b"Some text with a . and a \\ in it",
b"Some text with a \\. and a \\\\ in it",
),
];
for (start, escaped) in inputs {
assert_eq!(escape(start, DELIMITER, ESCAPE_TOKEN).as_ref(), escaped);
assert_eq!(unescape(escaped, DELIMITER, ESCAPE_TOKEN).as_ref(), start);
}
}
#[test]
fn splits_on_dot() {
let input = b"left of dot . right of dot";
let (left, right) = rsplit_once_escaped(input, b'.', b'\\').unwrap();
assert_eq!(left, b"left of dot ");
assert_eq!(right, b" right of dot");
}
#[test]
fn splits_on_ending_dot() {
let input = b"left of dot .";
let (left, right) = rsplit_once_escaped(input, b'.', b'\\').unwrap();
assert_eq!(left, b"left of dot ");
assert_eq!(right, b"");
}
#[test]
fn splits_on_starting_dot() {
let input = b". right of dot";
let (left, right) = rsplit_once_escaped(input, b'.', b'\\').unwrap();
assert_eq!(left, b"");
assert_eq!(right, b" right of dot");
}
#[test]
fn splits_last_dot() {
let input = b"left of dots . between dots . right of dots";
let (left, right) = rsplit_once_escaped(input, b'.', b'\\').unwrap();
assert_eq!(left, b"left of dots . between dots ");
assert_eq!(right, b" right of dots");
}
#[test]
fn doesnt_split_escaped_dot() {
let input = b"left of dot \\. right of dot";
let opt = rsplit_once_escaped(input, b'.', b'\\');
assert!(opt.is_none());
}
#[test]
fn splits_unescaped_dot_with_preceding_escaped_dots() {
let input = b"left of dots . between dots \\. right of dots";
let (left, right) = rsplit_once_escaped(input, b'.', b'\\').unwrap();
assert_eq!(left, b"left of dots ");
assert_eq!(right, b" between dots \\. right of dots");
}
#[test]
fn splits_unescaped_dot_with_preceding_backslashes() {
let input = b"left of dots . between dots \\\\. right of dots";
let (left, right) = rsplit_once_escaped(input, b'.', b'\\').unwrap();
assert_eq!(left, b"left of dots . between dots \\\\");
assert_eq!(right, b" right of dots");
}
#[test]
fn splits_unescaped_dot_with_preceding_escaped_dots_with_preceding_backslashes() {
let input = b"left of dots . between dots \\\\\\. right of dots";
let (left, right) = rsplit_once_escaped(input, b'.', b'\\').unwrap();
assert_eq!(left, b"left of dots ");
assert_eq!(right, b" between dots \\\\\\. right of dots");
}
}

View file

@ -1,5 +1,5 @@
use std::{
borrow::{Borrow, Cow},
borrow::Cow,
fmt::{Debug, Display},
marker::PhantomData,
ops::Deref,
@ -438,7 +438,7 @@ where
vec.extend_from_slice(b".");
vec.extend_from_slice(&escape(N::NAME.into()));
vec.extend_from_slice(b".");
vec.extend_from_slice(&escape(self.segment.to_bytes()));
vec.extend_from_slice(&escape(&self.segment.to_bytes()));
vec
}
}
@ -451,7 +451,7 @@ where
fn construct(&self) -> Vec<u8> {
let mut vec = self.inner.construct();
vec.extend_from_slice(b".");
vec.extend_from_slice(&escape(self.segment.to_bytes()));
vec.extend_from_slice(&escape(&self.segment.to_bytes()));
vec
}
}
@ -469,121 +469,19 @@ where
}
}
const ESCAPE_CHARS: &'static [u8] = &[b'.', b'\\'];
const DELIMITER: u8 = b'.';
const ESCAPE_TOKEN: u8 = b'\\';
fn rfind(s: &[u8], item: u8) -> Option<usize> {
s.iter()
.enumerate()
.rev()
.find(|(_, elem)| **elem == item)
.map(|(index, _)| index)
}
fn find_any(s: &[u8], items: &[u8]) -> Option<usize> {
s.iter()
.enumerate()
.find(|(_, elem)| items.contains(*elem))
.map(|(index, _)| index)
}
fn find_exact(s: &[u8], pattern: &[u8]) -> Option<usize> {
s.windows(pattern.len())
.enumerate()
.find(|(_, elem)| *elem == pattern)
.map(|(index, _)| index)
}
struct Split<'a, 'b>(Option<&'a [u8]>, &'b [u8]);
impl<'a, 'b> Iterator for Split<'a, 'b> {
type Item = &'a [u8];
fn next(&mut self) -> Option<Self::Item> {
if let Some(source) = self.0.take() {
if let Some(index) = find_exact(source, self.1) {
let (left, right) = source.split_at(index);
self.0 = Some(&right[self.1.len()..]);
return Some(left);
} else {
return Some(source);
}
}
None
}
}
fn split<'a, 'b>(s: &'a [u8], items: &'b [u8]) -> Split<'a, 'b> {
Split(Some(s), items)
}
fn join<I, T, U>(iterator: I, joiner: &[U]) -> Vec<U>
where
I: Iterator<Item = T>,
T: Borrow<[U]>,
U: Clone + Debug,
{
iterator
.fold((true, Vec::new()), |(first, mut vec), elem| {
if first {
vec.extend_from_slice(&elem.borrow());
} else {
vec.extend_from_slice(joiner);
vec.extend_from_slice(&elem.borrow());
}
(false, vec)
})
.1
}
fn rsplit_once_escaped<'a>(s: &'a [u8]) -> Option<(&'a [u8], &'a [u8])> {
let mut position: usize = s.len();
while let Some(index) = rfind(&s[..position], b'.') {
let mut escaped = false;
for prev in (0..index).rev() {
if s.get(prev..prev + 1) != Some(b"\\") {
break;
}
escaped = !escaped;
}
if !escaped {
let (left, right) = s.split_at(index);
return Some((left, &right[1..]));
}
position = index;
}
None
}
fn escape<'a>(s: Cow<'a, [u8]>) -> Cow<'a, [u8]> {
if find_any(&s, ESCAPE_CHARS).is_some() {
let v = join(
split(&s, b".").map(|part| join(split(part, b"\\"), b"\\\\")),
b"\\.",
);
return Cow::Owned(v);
}
s
fn escape(s: &[u8]) -> Cow<'_, [u8]> {
flee::escape(s, DELIMITER, ESCAPE_TOKEN)
}
fn unescape(s: &[u8]) -> Cow<'_, [u8]> {
if find_any(s, ESCAPE_CHARS).is_some() {
let v = join(
split(s, b"\\\\").map(|part| join(split(part, b"\\."), b".")),
b"\\",
);
flee::unescape(s, DELIMITER, ESCAPE_TOKEN)
}
return Cow::Owned(v);
}
Cow::Borrowed(s)
fn rsplit_once_escaped(s: &[u8]) -> Option<(&[u8], &[u8])> {
flee::rsplit_once_escaped(s, DELIMITER, ESCAPE_TOKEN)
}
#[cfg(feature = "test")]
@ -633,106 +531,10 @@ mod tests {
str::{from_utf8, Utf8Error},
};
use super::{escape, rsplit_once_escaped, unescape, PathField, PathGen, PathItem, PathNode};
use super::{PathField, PathGen, PathItem, PathNode};
const ROOT: PathGen<'static> = PathGen::new("test-root");
#[test]
fn splits_on_dot() {
let input = b"left of dot . right of dot";
let (left, right) = rsplit_once_escaped(input).unwrap();
assert_eq!(left, b"left of dot ");
assert_eq!(right, b" right of dot");
}
#[test]
fn splits_on_ending_dot() {
let input = b"left of dot .";
let (left, right) = rsplit_once_escaped(input).unwrap();
assert_eq!(left, b"left of dot ");
assert_eq!(right, b"");
}
#[test]
fn splits_on_starting_dot() {
let input = b". right of dot";
let (left, right) = rsplit_once_escaped(input).unwrap();
assert_eq!(left, b"");
assert_eq!(right, b" right of dot");
}
#[test]
fn splits_last_dot() {
let input = b"left of dots . between dots . right of dots";
let (left, right) = rsplit_once_escaped(input).unwrap();
assert_eq!(left, b"left of dots . between dots ");
assert_eq!(right, b" right of dots");
}
#[test]
fn doesnt_split_escaped_dot() {
let input = b"left of dot \\. right of dot";
let opt = rsplit_once_escaped(input);
assert!(opt.is_none());
}
#[test]
fn splits_unescaped_dot_with_preceding_escaped_dots() {
let input = b"left of dots . between dots \\. right of dots";
let (left, right) = rsplit_once_escaped(input).unwrap();
assert_eq!(left, b"left of dots ");
assert_eq!(right, b" between dots \\. right of dots");
}
#[test]
fn splits_unescaped_dot_with_preceding_backslashes() {
let input = b"left of dots . between dots \\\\. right of dots";
let (left, right) = rsplit_once_escaped(input).unwrap();
assert_eq!(left, b"left of dots . between dots \\\\");
assert_eq!(right, b" right of dots");
}
#[test]
fn splits_unescaped_dot_with_preceding_escaped_dots_with_preceding_backslashes() {
let input = b"left of dots . between dots \\\\\\. right of dots";
let (left, right) = rsplit_once_escaped(input).unwrap();
assert_eq!(left, b"left of dots ");
assert_eq!(right, b" between dots \\\\\\. right of dots");
}
#[test]
fn escape_output() {
let inputs: [(&[u8], &[u8]); 13] = [
(
b"..\\.\\.\\..\\\\\\...",
b"\\.\\.\\\\\\.\\\\\\.\\\\\\.\\.\\\\\\\\\\\\\\.\\.\\.",
),
(b".", b"\\."),
(b"\\", b"\\\\"),
(b"\\.", b"\\\\\\."),
(b".\\", b"\\.\\\\"),
(b".\\\\\\", b"\\.\\\\\\\\\\\\"),
(b"\\\\\\.", b"\\\\\\\\\\\\\\."),
(b"...\\", b"\\.\\.\\.\\\\"),
(b"\\...", b"\\\\\\.\\.\\."),
(b"Some text with a . in it", b"Some text with a \\. in it"),
(b"Some text with a \\ in it", b"Some text with a \\\\ in it"),
(
b"Some text with a \\ and a . in it",
b"Some text with a \\\\ and a \\. in it",
),
(
b"Some text with a . and a \\ in it",
b"Some text with a \\. and a \\\\ in it",
),
];
for (start, escaped) in inputs {
assert_eq!(escape(start.into()).as_ref(), escaped);
assert_eq!(unescape(escaped).as_ref(), start);
}
}
#[test]
fn construct_field() {
let s = ROOT