From 54b47da5a229ee38730148ca94dede327fff3300 Mon Sep 17 00:00:00 2001 From: "Aode (Lion)" Date: Sun, 15 May 2022 13:28:25 -0500 Subject: [PATCH] Move escaping logic to flee --- Cargo.toml | 4 + flee/Cargo.toml | 8 ++ flee/src/lib.rs | 236 ++++++++++++++++++++++++++++++++++++++++++++++++ src/lib.rs | 222 +++------------------------------------------ 4 files changed, 260 insertions(+), 210 deletions(-) create mode 100644 flee/Cargo.toml create mode 100644 flee/src/lib.rs diff --git a/Cargo.toml b/Cargo.toml index 5c6f6b0..c348cd4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,3 +10,7 @@ test = [] # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] +flee = { path = "./flee" } + +[workspace] +members = ["flee"] diff --git a/flee/Cargo.toml b/flee/Cargo.toml new file mode 100644 index 0000000..28b1239 --- /dev/null +++ b/flee/Cargo.toml @@ -0,0 +1,8 @@ +[package] +name = "flee" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] diff --git a/flee/src/lib.rs b/flee/src/lib.rs new file mode 100644 index 0000000..a615d4b --- /dev/null +++ b/flee/src/lib.rs @@ -0,0 +1,236 @@ +use std::borrow::{Borrow, Cow}; + +fn rfind(s: &[u8], item: u8) -> Option { + s.iter() + .enumerate() + .rev() + .find(|(_, elem)| **elem == item) + .map(|(index, _)| index) +} + +fn find_any(s: &[u8], items: &[u8]) -> Option { + s.iter() + .enumerate() + .find(|(_, elem)| items.contains(*elem)) + .map(|(index, _)| index) +} + +fn find_exact(s: &[u8], pattern: &[u8]) -> Option { + s.windows(pattern.len()) + .enumerate() + .find(|(_, elem)| *elem == pattern) + .map(|(index, _)| index) +} + +fn join(iterator: I, joiner: &[U]) -> Vec +where + I: Iterator, + T: Borrow<[U]>, + U: Clone, +{ + iterator + .fold((true, Vec::new()), |(first, mut vec), elem| { + if first { + vec.extend_from_slice(&elem.borrow()); + } else { + vec.extend_from_slice(joiner); + vec.extend_from_slice(&elem.borrow()); + } + (false, vec) + }) + .1 +} + +struct Split<'a, 'b>(Option<&'a [u8]>, &'b [u8]); + +impl<'a, 'b> Iterator for Split<'a, 'b> { + type Item = &'a [u8]; + + fn next(&mut self) -> Option { + if let Some(source) = self.0.take() { + if let Some(index) = find_exact(source, self.1) { + let (left, right) = source.split_at(index); + self.0 = Some(&right[self.1.len()..]); + return Some(left); + } else { + return Some(source); + } + } + + None + } +} + +fn split<'a, 'b>(s: &'a [u8], items: &'b [u8]) -> Split<'a, 'b> { + Split(Some(s), items) +} + +fn do_escape(s: &[u8], escaped_codes: &[u8], escape_token: u8) -> Vec { + match escaped_codes { + [] => s.to_vec(), + [first, rest @ ..] => join( + split(s, &[*first]).map(|part| do_escape(part, rest, escape_token)), + &[escape_token, *first], + ), + } +} + +pub fn escape<'a>(s: &'a [u8], delimiter: u8, escape_token: u8) -> Cow<'a, [u8]> { + let escaped_codes = &[escape_token, delimiter]; + if find_any(&s, escaped_codes).is_some() { + return Cow::Owned(do_escape(s, escaped_codes, escape_token)); + } + + Cow::Borrowed(s) +} + +fn do_unescape(s: &[u8], escaped_codes: &[u8], escape_token: u8) -> Vec { + match escaped_codes { + [] => s.to_vec(), + [first, rest @ ..] => join( + split(s, &[escape_token, *first]).map(|part| do_unescape(part, rest, escape_token)), + &[*first], + ), + } +} + +pub fn unescape<'a>(s: &'a [u8], delimiter: u8, escape_token: u8) -> Cow<'a, [u8]> { + let escaped_codes = &[escape_token, delimiter]; + if find_any(s, escaped_codes).is_some() { + return Cow::Owned(do_unescape(s, escaped_codes, escape_token)); + } + + Cow::Borrowed(s) +} + +pub fn rsplit_once_escaped<'a>( + s: &'a [u8], + delimiter: u8, + escape_token: u8, +) -> Option<(&'a [u8], &'a [u8])> { + let mut position: usize = s.len(); + + while let Some(index) = rfind(&s[..position], delimiter) { + let mut escaped = false; + for prev in (0..index).rev() { + if s.get(prev..prev + 1) != Some(&[escape_token]) { + break; + } + + escaped = !escaped; + } + + if !escaped { + let (left, right) = s.split_at(index); + return Some((left, &right[1..])); + } + + position = index; + } + + None +} + +#[cfg(test)] +mod tests { + use super::{escape, rsplit_once_escaped, unescape}; + + const DELIMITER: u8 = b'.'; + const ESCAPE_TOKEN: u8 = b'\\'; + + #[test] + fn escape_output() { + let inputs: [(&[u8], &[u8]); 13] = [ + ( + b"..\\.\\.\\..\\\\\\...", + b"\\.\\.\\\\\\.\\\\\\.\\\\\\.\\.\\\\\\\\\\\\\\.\\.\\.", + ), + (b".", b"\\."), + (b"\\", b"\\\\"), + (b"\\.", b"\\\\\\."), + (b".\\", b"\\.\\\\"), + (b".\\\\\\", b"\\.\\\\\\\\\\\\"), + (b"\\\\\\.", b"\\\\\\\\\\\\\\."), + (b"...\\", b"\\.\\.\\.\\\\"), + (b"\\...", b"\\\\\\.\\.\\."), + (b"Some text with a . in it", b"Some text with a \\. in it"), + (b"Some text with a \\ in it", b"Some text with a \\\\ in it"), + ( + b"Some text with a \\ and a . in it", + b"Some text with a \\\\ and a \\. in it", + ), + ( + b"Some text with a . and a \\ in it", + b"Some text with a \\. and a \\\\ in it", + ), + ]; + + for (start, escaped) in inputs { + assert_eq!(escape(start, DELIMITER, ESCAPE_TOKEN).as_ref(), escaped); + assert_eq!(unescape(escaped, DELIMITER, ESCAPE_TOKEN).as_ref(), start); + } + } + + #[test] + fn splits_on_dot() { + let input = b"left of dot . right of dot"; + let (left, right) = rsplit_once_escaped(input, b'.', b'\\').unwrap(); + assert_eq!(left, b"left of dot "); + assert_eq!(right, b" right of dot"); + } + + #[test] + fn splits_on_ending_dot() { + let input = b"left of dot ."; + let (left, right) = rsplit_once_escaped(input, b'.', b'\\').unwrap(); + assert_eq!(left, b"left of dot "); + assert_eq!(right, b""); + } + + #[test] + fn splits_on_starting_dot() { + let input = b". right of dot"; + let (left, right) = rsplit_once_escaped(input, b'.', b'\\').unwrap(); + assert_eq!(left, b""); + assert_eq!(right, b" right of dot"); + } + + #[test] + fn splits_last_dot() { + let input = b"left of dots . between dots . right of dots"; + let (left, right) = rsplit_once_escaped(input, b'.', b'\\').unwrap(); + assert_eq!(left, b"left of dots . between dots "); + assert_eq!(right, b" right of dots"); + } + + #[test] + fn doesnt_split_escaped_dot() { + let input = b"left of dot \\. right of dot"; + let opt = rsplit_once_escaped(input, b'.', b'\\'); + assert!(opt.is_none()); + } + + #[test] + fn splits_unescaped_dot_with_preceding_escaped_dots() { + let input = b"left of dots . between dots \\. right of dots"; + let (left, right) = rsplit_once_escaped(input, b'.', b'\\').unwrap(); + assert_eq!(left, b"left of dots "); + assert_eq!(right, b" between dots \\. right of dots"); + } + + #[test] + fn splits_unescaped_dot_with_preceding_backslashes() { + let input = b"left of dots . between dots \\\\. right of dots"; + let (left, right) = rsplit_once_escaped(input, b'.', b'\\').unwrap(); + assert_eq!(left, b"left of dots . between dots \\\\"); + assert_eq!(right, b" right of dots"); + } + + #[test] + fn splits_unescaped_dot_with_preceding_escaped_dots_with_preceding_backslashes() { + let input = b"left of dots . between dots \\\\\\. right of dots"; + let (left, right) = rsplit_once_escaped(input, b'.', b'\\').unwrap(); + assert_eq!(left, b"left of dots "); + assert_eq!(right, b" between dots \\\\\\. right of dots"); + } +} diff --git a/src/lib.rs b/src/lib.rs index d08b8ad..67aae3b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,5 +1,5 @@ use std::{ - borrow::{Borrow, Cow}, + borrow::Cow, fmt::{Debug, Display}, marker::PhantomData, ops::Deref, @@ -438,7 +438,7 @@ where vec.extend_from_slice(b"."); vec.extend_from_slice(&escape(N::NAME.into())); vec.extend_from_slice(b"."); - vec.extend_from_slice(&escape(self.segment.to_bytes())); + vec.extend_from_slice(&escape(&self.segment.to_bytes())); vec } } @@ -451,7 +451,7 @@ where fn construct(&self) -> Vec { let mut vec = self.inner.construct(); vec.extend_from_slice(b"."); - vec.extend_from_slice(&escape(self.segment.to_bytes())); + vec.extend_from_slice(&escape(&self.segment.to_bytes())); vec } } @@ -469,121 +469,19 @@ where } } -const ESCAPE_CHARS: &'static [u8] = &[b'.', b'\\']; +const DELIMITER: u8 = b'.'; +const ESCAPE_TOKEN: u8 = b'\\'; -fn rfind(s: &[u8], item: u8) -> Option { - s.iter() - .enumerate() - .rev() - .find(|(_, elem)| **elem == item) - .map(|(index, _)| index) -} - -fn find_any(s: &[u8], items: &[u8]) -> Option { - s.iter() - .enumerate() - .find(|(_, elem)| items.contains(*elem)) - .map(|(index, _)| index) -} - -fn find_exact(s: &[u8], pattern: &[u8]) -> Option { - s.windows(pattern.len()) - .enumerate() - .find(|(_, elem)| *elem == pattern) - .map(|(index, _)| index) -} - -struct Split<'a, 'b>(Option<&'a [u8]>, &'b [u8]); - -impl<'a, 'b> Iterator for Split<'a, 'b> { - type Item = &'a [u8]; - - fn next(&mut self) -> Option { - if let Some(source) = self.0.take() { - if let Some(index) = find_exact(source, self.1) { - let (left, right) = source.split_at(index); - self.0 = Some(&right[self.1.len()..]); - return Some(left); - } else { - return Some(source); - } - } - - None - } -} - -fn split<'a, 'b>(s: &'a [u8], items: &'b [u8]) -> Split<'a, 'b> { - Split(Some(s), items) -} - -fn join(iterator: I, joiner: &[U]) -> Vec -where - I: Iterator, - T: Borrow<[U]>, - U: Clone + Debug, -{ - iterator - .fold((true, Vec::new()), |(first, mut vec), elem| { - if first { - vec.extend_from_slice(&elem.borrow()); - } else { - vec.extend_from_slice(joiner); - vec.extend_from_slice(&elem.borrow()); - } - (false, vec) - }) - .1 -} - -fn rsplit_once_escaped<'a>(s: &'a [u8]) -> Option<(&'a [u8], &'a [u8])> { - let mut position: usize = s.len(); - - while let Some(index) = rfind(&s[..position], b'.') { - let mut escaped = false; - for prev in (0..index).rev() { - if s.get(prev..prev + 1) != Some(b"\\") { - break; - } - - escaped = !escaped; - } - - if !escaped { - let (left, right) = s.split_at(index); - return Some((left, &right[1..])); - } - - position = index; - } - - None -} - -fn escape<'a>(s: Cow<'a, [u8]>) -> Cow<'a, [u8]> { - if find_any(&s, ESCAPE_CHARS).is_some() { - let v = join( - split(&s, b".").map(|part| join(split(part, b"\\"), b"\\\\")), - b"\\.", - ); - - return Cow::Owned(v); - } - - s +fn escape(s: &[u8]) -> Cow<'_, [u8]> { + flee::escape(s, DELIMITER, ESCAPE_TOKEN) } fn unescape(s: &[u8]) -> Cow<'_, [u8]> { - if find_any(s, ESCAPE_CHARS).is_some() { - let v = join( - split(s, b"\\\\").map(|part| join(split(part, b"\\."), b".")), - b"\\", - ); + flee::unescape(s, DELIMITER, ESCAPE_TOKEN) +} - return Cow::Owned(v); - } - - Cow::Borrowed(s) +fn rsplit_once_escaped(s: &[u8]) -> Option<(&[u8], &[u8])> { + flee::rsplit_once_escaped(s, DELIMITER, ESCAPE_TOKEN) } #[cfg(feature = "test")] @@ -633,106 +531,10 @@ mod tests { str::{from_utf8, Utf8Error}, }; - use super::{escape, rsplit_once_escaped, unescape, PathField, PathGen, PathItem, PathNode}; + use super::{PathField, PathGen, PathItem, PathNode}; const ROOT: PathGen<'static> = PathGen::new("test-root"); - #[test] - fn splits_on_dot() { - let input = b"left of dot . right of dot"; - let (left, right) = rsplit_once_escaped(input).unwrap(); - assert_eq!(left, b"left of dot "); - assert_eq!(right, b" right of dot"); - } - - #[test] - fn splits_on_ending_dot() { - let input = b"left of dot ."; - let (left, right) = rsplit_once_escaped(input).unwrap(); - assert_eq!(left, b"left of dot "); - assert_eq!(right, b""); - } - - #[test] - fn splits_on_starting_dot() { - let input = b". right of dot"; - let (left, right) = rsplit_once_escaped(input).unwrap(); - assert_eq!(left, b""); - assert_eq!(right, b" right of dot"); - } - - #[test] - fn splits_last_dot() { - let input = b"left of dots . between dots . right of dots"; - let (left, right) = rsplit_once_escaped(input).unwrap(); - assert_eq!(left, b"left of dots . between dots "); - assert_eq!(right, b" right of dots"); - } - - #[test] - fn doesnt_split_escaped_dot() { - let input = b"left of dot \\. right of dot"; - let opt = rsplit_once_escaped(input); - assert!(opt.is_none()); - } - - #[test] - fn splits_unescaped_dot_with_preceding_escaped_dots() { - let input = b"left of dots . between dots \\. right of dots"; - let (left, right) = rsplit_once_escaped(input).unwrap(); - assert_eq!(left, b"left of dots "); - assert_eq!(right, b" between dots \\. right of dots"); - } - - #[test] - fn splits_unescaped_dot_with_preceding_backslashes() { - let input = b"left of dots . between dots \\\\. right of dots"; - let (left, right) = rsplit_once_escaped(input).unwrap(); - assert_eq!(left, b"left of dots . between dots \\\\"); - assert_eq!(right, b" right of dots"); - } - - #[test] - fn splits_unescaped_dot_with_preceding_escaped_dots_with_preceding_backslashes() { - let input = b"left of dots . between dots \\\\\\. right of dots"; - let (left, right) = rsplit_once_escaped(input).unwrap(); - assert_eq!(left, b"left of dots "); - assert_eq!(right, b" between dots \\\\\\. right of dots"); - } - - #[test] - fn escape_output() { - let inputs: [(&[u8], &[u8]); 13] = [ - ( - b"..\\.\\.\\..\\\\\\...", - b"\\.\\.\\\\\\.\\\\\\.\\\\\\.\\.\\\\\\\\\\\\\\.\\.\\.", - ), - (b".", b"\\."), - (b"\\", b"\\\\"), - (b"\\.", b"\\\\\\."), - (b".\\", b"\\.\\\\"), - (b".\\\\\\", b"\\.\\\\\\\\\\\\"), - (b"\\\\\\.", b"\\\\\\\\\\\\\\."), - (b"...\\", b"\\.\\.\\.\\\\"), - (b"\\...", b"\\\\\\.\\.\\."), - (b"Some text with a . in it", b"Some text with a \\. in it"), - (b"Some text with a \\ in it", b"Some text with a \\\\ in it"), - ( - b"Some text with a \\ and a . in it", - b"Some text with a \\\\ and a \\. in it", - ), - ( - b"Some text with a . and a \\ in it", - b"Some text with a \\. and a \\\\ in it", - ), - ]; - - for (start, escaped) in inputs { - assert_eq!(escape(start.into()).as_ref(), escaped); - assert_eq!(unescape(escaped).as_ref(), start); - } - } - #[test] fn construct_field() { let s = ROOT