Move escaping logic to flee
This commit is contained in:
parent
d99576d929
commit
54b47da5a2
|
@ -10,3 +10,7 @@ test = []
|
|||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
flee = { path = "./flee" }
|
||||
|
||||
[workspace]
|
||||
members = ["flee"]
|
||||
|
|
8
flee/Cargo.toml
Normal file
8
flee/Cargo.toml
Normal file
|
@ -0,0 +1,8 @@
|
|||
[package]
|
||||
name = "flee"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
236
flee/src/lib.rs
Normal file
236
flee/src/lib.rs
Normal file
|
@ -0,0 +1,236 @@
|
|||
use std::borrow::{Borrow, Cow};
|
||||
|
||||
fn rfind(s: &[u8], item: u8) -> Option<usize> {
|
||||
s.iter()
|
||||
.enumerate()
|
||||
.rev()
|
||||
.find(|(_, elem)| **elem == item)
|
||||
.map(|(index, _)| index)
|
||||
}
|
||||
|
||||
fn find_any(s: &[u8], items: &[u8]) -> Option<usize> {
|
||||
s.iter()
|
||||
.enumerate()
|
||||
.find(|(_, elem)| items.contains(*elem))
|
||||
.map(|(index, _)| index)
|
||||
}
|
||||
|
||||
fn find_exact(s: &[u8], pattern: &[u8]) -> Option<usize> {
|
||||
s.windows(pattern.len())
|
||||
.enumerate()
|
||||
.find(|(_, elem)| *elem == pattern)
|
||||
.map(|(index, _)| index)
|
||||
}
|
||||
|
||||
fn join<I, T, U>(iterator: I, joiner: &[U]) -> Vec<U>
|
||||
where
|
||||
I: Iterator<Item = T>,
|
||||
T: Borrow<[U]>,
|
||||
U: Clone,
|
||||
{
|
||||
iterator
|
||||
.fold((true, Vec::new()), |(first, mut vec), elem| {
|
||||
if first {
|
||||
vec.extend_from_slice(&elem.borrow());
|
||||
} else {
|
||||
vec.extend_from_slice(joiner);
|
||||
vec.extend_from_slice(&elem.borrow());
|
||||
}
|
||||
(false, vec)
|
||||
})
|
||||
.1
|
||||
}
|
||||
|
||||
struct Split<'a, 'b>(Option<&'a [u8]>, &'b [u8]);
|
||||
|
||||
impl<'a, 'b> Iterator for Split<'a, 'b> {
|
||||
type Item = &'a [u8];
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
if let Some(source) = self.0.take() {
|
||||
if let Some(index) = find_exact(source, self.1) {
|
||||
let (left, right) = source.split_at(index);
|
||||
self.0 = Some(&right[self.1.len()..]);
|
||||
return Some(left);
|
||||
} else {
|
||||
return Some(source);
|
||||
}
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
fn split<'a, 'b>(s: &'a [u8], items: &'b [u8]) -> Split<'a, 'b> {
|
||||
Split(Some(s), items)
|
||||
}
|
||||
|
||||
fn do_escape(s: &[u8], escaped_codes: &[u8], escape_token: u8) -> Vec<u8> {
|
||||
match escaped_codes {
|
||||
[] => s.to_vec(),
|
||||
[first, rest @ ..] => join(
|
||||
split(s, &[*first]).map(|part| do_escape(part, rest, escape_token)),
|
||||
&[escape_token, *first],
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn escape<'a>(s: &'a [u8], delimiter: u8, escape_token: u8) -> Cow<'a, [u8]> {
|
||||
let escaped_codes = &[escape_token, delimiter];
|
||||
if find_any(&s, escaped_codes).is_some() {
|
||||
return Cow::Owned(do_escape(s, escaped_codes, escape_token));
|
||||
}
|
||||
|
||||
Cow::Borrowed(s)
|
||||
}
|
||||
|
||||
fn do_unescape(s: &[u8], escaped_codes: &[u8], escape_token: u8) -> Vec<u8> {
|
||||
match escaped_codes {
|
||||
[] => s.to_vec(),
|
||||
[first, rest @ ..] => join(
|
||||
split(s, &[escape_token, *first]).map(|part| do_unescape(part, rest, escape_token)),
|
||||
&[*first],
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn unescape<'a>(s: &'a [u8], delimiter: u8, escape_token: u8) -> Cow<'a, [u8]> {
|
||||
let escaped_codes = &[escape_token, delimiter];
|
||||
if find_any(s, escaped_codes).is_some() {
|
||||
return Cow::Owned(do_unescape(s, escaped_codes, escape_token));
|
||||
}
|
||||
|
||||
Cow::Borrowed(s)
|
||||
}
|
||||
|
||||
pub fn rsplit_once_escaped<'a>(
|
||||
s: &'a [u8],
|
||||
delimiter: u8,
|
||||
escape_token: u8,
|
||||
) -> Option<(&'a [u8], &'a [u8])> {
|
||||
let mut position: usize = s.len();
|
||||
|
||||
while let Some(index) = rfind(&s[..position], delimiter) {
|
||||
let mut escaped = false;
|
||||
for prev in (0..index).rev() {
|
||||
if s.get(prev..prev + 1) != Some(&[escape_token]) {
|
||||
break;
|
||||
}
|
||||
|
||||
escaped = !escaped;
|
||||
}
|
||||
|
||||
if !escaped {
|
||||
let (left, right) = s.split_at(index);
|
||||
return Some((left, &right[1..]));
|
||||
}
|
||||
|
||||
position = index;
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::{escape, rsplit_once_escaped, unescape};
|
||||
|
||||
const DELIMITER: u8 = b'.';
|
||||
const ESCAPE_TOKEN: u8 = b'\\';
|
||||
|
||||
#[test]
|
||||
fn escape_output() {
|
||||
let inputs: [(&[u8], &[u8]); 13] = [
|
||||
(
|
||||
b"..\\.\\.\\..\\\\\\...",
|
||||
b"\\.\\.\\\\\\.\\\\\\.\\\\\\.\\.\\\\\\\\\\\\\\.\\.\\.",
|
||||
),
|
||||
(b".", b"\\."),
|
||||
(b"\\", b"\\\\"),
|
||||
(b"\\.", b"\\\\\\."),
|
||||
(b".\\", b"\\.\\\\"),
|
||||
(b".\\\\\\", b"\\.\\\\\\\\\\\\"),
|
||||
(b"\\\\\\.", b"\\\\\\\\\\\\\\."),
|
||||
(b"...\\", b"\\.\\.\\.\\\\"),
|
||||
(b"\\...", b"\\\\\\.\\.\\."),
|
||||
(b"Some text with a . in it", b"Some text with a \\. in it"),
|
||||
(b"Some text with a \\ in it", b"Some text with a \\\\ in it"),
|
||||
(
|
||||
b"Some text with a \\ and a . in it",
|
||||
b"Some text with a \\\\ and a \\. in it",
|
||||
),
|
||||
(
|
||||
b"Some text with a . and a \\ in it",
|
||||
b"Some text with a \\. and a \\\\ in it",
|
||||
),
|
||||
];
|
||||
|
||||
for (start, escaped) in inputs {
|
||||
assert_eq!(escape(start, DELIMITER, ESCAPE_TOKEN).as_ref(), escaped);
|
||||
assert_eq!(unescape(escaped, DELIMITER, ESCAPE_TOKEN).as_ref(), start);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn splits_on_dot() {
|
||||
let input = b"left of dot . right of dot";
|
||||
let (left, right) = rsplit_once_escaped(input, b'.', b'\\').unwrap();
|
||||
assert_eq!(left, b"left of dot ");
|
||||
assert_eq!(right, b" right of dot");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn splits_on_ending_dot() {
|
||||
let input = b"left of dot .";
|
||||
let (left, right) = rsplit_once_escaped(input, b'.', b'\\').unwrap();
|
||||
assert_eq!(left, b"left of dot ");
|
||||
assert_eq!(right, b"");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn splits_on_starting_dot() {
|
||||
let input = b". right of dot";
|
||||
let (left, right) = rsplit_once_escaped(input, b'.', b'\\').unwrap();
|
||||
assert_eq!(left, b"");
|
||||
assert_eq!(right, b" right of dot");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn splits_last_dot() {
|
||||
let input = b"left of dots . between dots . right of dots";
|
||||
let (left, right) = rsplit_once_escaped(input, b'.', b'\\').unwrap();
|
||||
assert_eq!(left, b"left of dots . between dots ");
|
||||
assert_eq!(right, b" right of dots");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn doesnt_split_escaped_dot() {
|
||||
let input = b"left of dot \\. right of dot";
|
||||
let opt = rsplit_once_escaped(input, b'.', b'\\');
|
||||
assert!(opt.is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn splits_unescaped_dot_with_preceding_escaped_dots() {
|
||||
let input = b"left of dots . between dots \\. right of dots";
|
||||
let (left, right) = rsplit_once_escaped(input, b'.', b'\\').unwrap();
|
||||
assert_eq!(left, b"left of dots ");
|
||||
assert_eq!(right, b" between dots \\. right of dots");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn splits_unescaped_dot_with_preceding_backslashes() {
|
||||
let input = b"left of dots . between dots \\\\. right of dots";
|
||||
let (left, right) = rsplit_once_escaped(input, b'.', b'\\').unwrap();
|
||||
assert_eq!(left, b"left of dots . between dots \\\\");
|
||||
assert_eq!(right, b" right of dots");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn splits_unescaped_dot_with_preceding_escaped_dots_with_preceding_backslashes() {
|
||||
let input = b"left of dots . between dots \\\\\\. right of dots";
|
||||
let (left, right) = rsplit_once_escaped(input, b'.', b'\\').unwrap();
|
||||
assert_eq!(left, b"left of dots ");
|
||||
assert_eq!(right, b" between dots \\\\\\. right of dots");
|
||||
}
|
||||
}
|
222
src/lib.rs
222
src/lib.rs
|
@ -1,5 +1,5 @@
|
|||
use std::{
|
||||
borrow::{Borrow, Cow},
|
||||
borrow::Cow,
|
||||
fmt::{Debug, Display},
|
||||
marker::PhantomData,
|
||||
ops::Deref,
|
||||
|
@ -438,7 +438,7 @@ where
|
|||
vec.extend_from_slice(b".");
|
||||
vec.extend_from_slice(&escape(N::NAME.into()));
|
||||
vec.extend_from_slice(b".");
|
||||
vec.extend_from_slice(&escape(self.segment.to_bytes()));
|
||||
vec.extend_from_slice(&escape(&self.segment.to_bytes()));
|
||||
vec
|
||||
}
|
||||
}
|
||||
|
@ -451,7 +451,7 @@ where
|
|||
fn construct(&self) -> Vec<u8> {
|
||||
let mut vec = self.inner.construct();
|
||||
vec.extend_from_slice(b".");
|
||||
vec.extend_from_slice(&escape(self.segment.to_bytes()));
|
||||
vec.extend_from_slice(&escape(&self.segment.to_bytes()));
|
||||
vec
|
||||
}
|
||||
}
|
||||
|
@ -469,121 +469,19 @@ where
|
|||
}
|
||||
}
|
||||
|
||||
const ESCAPE_CHARS: &'static [u8] = &[b'.', b'\\'];
|
||||
const DELIMITER: u8 = b'.';
|
||||
const ESCAPE_TOKEN: u8 = b'\\';
|
||||
|
||||
fn rfind(s: &[u8], item: u8) -> Option<usize> {
|
||||
s.iter()
|
||||
.enumerate()
|
||||
.rev()
|
||||
.find(|(_, elem)| **elem == item)
|
||||
.map(|(index, _)| index)
|
||||
}
|
||||
|
||||
fn find_any(s: &[u8], items: &[u8]) -> Option<usize> {
|
||||
s.iter()
|
||||
.enumerate()
|
||||
.find(|(_, elem)| items.contains(*elem))
|
||||
.map(|(index, _)| index)
|
||||
}
|
||||
|
||||
fn find_exact(s: &[u8], pattern: &[u8]) -> Option<usize> {
|
||||
s.windows(pattern.len())
|
||||
.enumerate()
|
||||
.find(|(_, elem)| *elem == pattern)
|
||||
.map(|(index, _)| index)
|
||||
}
|
||||
|
||||
struct Split<'a, 'b>(Option<&'a [u8]>, &'b [u8]);
|
||||
|
||||
impl<'a, 'b> Iterator for Split<'a, 'b> {
|
||||
type Item = &'a [u8];
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
if let Some(source) = self.0.take() {
|
||||
if let Some(index) = find_exact(source, self.1) {
|
||||
let (left, right) = source.split_at(index);
|
||||
self.0 = Some(&right[self.1.len()..]);
|
||||
return Some(left);
|
||||
} else {
|
||||
return Some(source);
|
||||
}
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
fn split<'a, 'b>(s: &'a [u8], items: &'b [u8]) -> Split<'a, 'b> {
|
||||
Split(Some(s), items)
|
||||
}
|
||||
|
||||
fn join<I, T, U>(iterator: I, joiner: &[U]) -> Vec<U>
|
||||
where
|
||||
I: Iterator<Item = T>,
|
||||
T: Borrow<[U]>,
|
||||
U: Clone + Debug,
|
||||
{
|
||||
iterator
|
||||
.fold((true, Vec::new()), |(first, mut vec), elem| {
|
||||
if first {
|
||||
vec.extend_from_slice(&elem.borrow());
|
||||
} else {
|
||||
vec.extend_from_slice(joiner);
|
||||
vec.extend_from_slice(&elem.borrow());
|
||||
}
|
||||
(false, vec)
|
||||
})
|
||||
.1
|
||||
}
|
||||
|
||||
fn rsplit_once_escaped<'a>(s: &'a [u8]) -> Option<(&'a [u8], &'a [u8])> {
|
||||
let mut position: usize = s.len();
|
||||
|
||||
while let Some(index) = rfind(&s[..position], b'.') {
|
||||
let mut escaped = false;
|
||||
for prev in (0..index).rev() {
|
||||
if s.get(prev..prev + 1) != Some(b"\\") {
|
||||
break;
|
||||
}
|
||||
|
||||
escaped = !escaped;
|
||||
}
|
||||
|
||||
if !escaped {
|
||||
let (left, right) = s.split_at(index);
|
||||
return Some((left, &right[1..]));
|
||||
}
|
||||
|
||||
position = index;
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
fn escape<'a>(s: Cow<'a, [u8]>) -> Cow<'a, [u8]> {
|
||||
if find_any(&s, ESCAPE_CHARS).is_some() {
|
||||
let v = join(
|
||||
split(&s, b".").map(|part| join(split(part, b"\\"), b"\\\\")),
|
||||
b"\\.",
|
||||
);
|
||||
|
||||
return Cow::Owned(v);
|
||||
}
|
||||
|
||||
s
|
||||
fn escape(s: &[u8]) -> Cow<'_, [u8]> {
|
||||
flee::escape(s, DELIMITER, ESCAPE_TOKEN)
|
||||
}
|
||||
|
||||
fn unescape(s: &[u8]) -> Cow<'_, [u8]> {
|
||||
if find_any(s, ESCAPE_CHARS).is_some() {
|
||||
let v = join(
|
||||
split(s, b"\\\\").map(|part| join(split(part, b"\\."), b".")),
|
||||
b"\\",
|
||||
);
|
||||
flee::unescape(s, DELIMITER, ESCAPE_TOKEN)
|
||||
}
|
||||
|
||||
return Cow::Owned(v);
|
||||
}
|
||||
|
||||
Cow::Borrowed(s)
|
||||
fn rsplit_once_escaped(s: &[u8]) -> Option<(&[u8], &[u8])> {
|
||||
flee::rsplit_once_escaped(s, DELIMITER, ESCAPE_TOKEN)
|
||||
}
|
||||
|
||||
#[cfg(feature = "test")]
|
||||
|
@ -633,106 +531,10 @@ mod tests {
|
|||
str::{from_utf8, Utf8Error},
|
||||
};
|
||||
|
||||
use super::{escape, rsplit_once_escaped, unescape, PathField, PathGen, PathItem, PathNode};
|
||||
use super::{PathField, PathGen, PathItem, PathNode};
|
||||
|
||||
const ROOT: PathGen<'static> = PathGen::new("test-root");
|
||||
|
||||
#[test]
|
||||
fn splits_on_dot() {
|
||||
let input = b"left of dot . right of dot";
|
||||
let (left, right) = rsplit_once_escaped(input).unwrap();
|
||||
assert_eq!(left, b"left of dot ");
|
||||
assert_eq!(right, b" right of dot");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn splits_on_ending_dot() {
|
||||
let input = b"left of dot .";
|
||||
let (left, right) = rsplit_once_escaped(input).unwrap();
|
||||
assert_eq!(left, b"left of dot ");
|
||||
assert_eq!(right, b"");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn splits_on_starting_dot() {
|
||||
let input = b". right of dot";
|
||||
let (left, right) = rsplit_once_escaped(input).unwrap();
|
||||
assert_eq!(left, b"");
|
||||
assert_eq!(right, b" right of dot");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn splits_last_dot() {
|
||||
let input = b"left of dots . between dots . right of dots";
|
||||
let (left, right) = rsplit_once_escaped(input).unwrap();
|
||||
assert_eq!(left, b"left of dots . between dots ");
|
||||
assert_eq!(right, b" right of dots");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn doesnt_split_escaped_dot() {
|
||||
let input = b"left of dot \\. right of dot";
|
||||
let opt = rsplit_once_escaped(input);
|
||||
assert!(opt.is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn splits_unescaped_dot_with_preceding_escaped_dots() {
|
||||
let input = b"left of dots . between dots \\. right of dots";
|
||||
let (left, right) = rsplit_once_escaped(input).unwrap();
|
||||
assert_eq!(left, b"left of dots ");
|
||||
assert_eq!(right, b" between dots \\. right of dots");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn splits_unescaped_dot_with_preceding_backslashes() {
|
||||
let input = b"left of dots . between dots \\\\. right of dots";
|
||||
let (left, right) = rsplit_once_escaped(input).unwrap();
|
||||
assert_eq!(left, b"left of dots . between dots \\\\");
|
||||
assert_eq!(right, b" right of dots");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn splits_unescaped_dot_with_preceding_escaped_dots_with_preceding_backslashes() {
|
||||
let input = b"left of dots . between dots \\\\\\. right of dots";
|
||||
let (left, right) = rsplit_once_escaped(input).unwrap();
|
||||
assert_eq!(left, b"left of dots ");
|
||||
assert_eq!(right, b" between dots \\\\\\. right of dots");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn escape_output() {
|
||||
let inputs: [(&[u8], &[u8]); 13] = [
|
||||
(
|
||||
b"..\\.\\.\\..\\\\\\...",
|
||||
b"\\.\\.\\\\\\.\\\\\\.\\\\\\.\\.\\\\\\\\\\\\\\.\\.\\.",
|
||||
),
|
||||
(b".", b"\\."),
|
||||
(b"\\", b"\\\\"),
|
||||
(b"\\.", b"\\\\\\."),
|
||||
(b".\\", b"\\.\\\\"),
|
||||
(b".\\\\\\", b"\\.\\\\\\\\\\\\"),
|
||||
(b"\\\\\\.", b"\\\\\\\\\\\\\\."),
|
||||
(b"...\\", b"\\.\\.\\.\\\\"),
|
||||
(b"\\...", b"\\\\\\.\\.\\."),
|
||||
(b"Some text with a . in it", b"Some text with a \\. in it"),
|
||||
(b"Some text with a \\ in it", b"Some text with a \\\\ in it"),
|
||||
(
|
||||
b"Some text with a \\ and a . in it",
|
||||
b"Some text with a \\\\ and a \\. in it",
|
||||
),
|
||||
(
|
||||
b"Some text with a . and a \\ in it",
|
||||
b"Some text with a \\. and a \\\\ in it",
|
||||
),
|
||||
];
|
||||
|
||||
for (start, escaped) in inputs {
|
||||
assert_eq!(escape(start.into()).as_ref(), escaped);
|
||||
assert_eq!(unescape(escaped).as_ref(), start);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn construct_field() {
|
||||
let s = ROOT
|
||||
|
|
Loading…
Reference in a new issue