use super::{count, StoreError}; use sled::{Db, Transactional, Tree}; use std::collections::HashSet; #[derive(Clone, Debug)] pub struct TermSearch { trees: Vec, counts: Tree, } impl TermSearch { pub(super) fn build(name: &str, max_len: usize, db: &Db) -> Result { let mut trees = Vec::with_capacity(max_len); for i in 0..max_len { trees.push(db.open_tree(&format!("/profiles/search/{}/{}", name, i))?); } let counts = db.open_tree(&format!("/profiles/search/{}/counts", name))?; Ok(TermSearch { trees, counts }) } pub(crate) fn insert(&self, term: &str) -> Result<(), StoreError> { let term = term.to_lowercase(); let term_vec = term.as_bytes().to_vec(); if term.is_empty() { return Err(StoreError::Empty); } if term.len() > self.trees.len() { return Err(StoreError::TooLong); } let tree = &self.trees[term.len() - 1]; [tree, &self.counts].transaction(|trees| { let tree = &trees[0]; let counts = &trees[1]; if count(counts, &term, |c| c.saturating_add(1))? == 1 { for i in 0..term_vec.len() { let key = key(&term_vec, i); tree.insert(key.as_slice(), term_vec.as_slice())?; } } Ok(()) })?; Ok(()) } pub(crate) fn remove(&self, term: &str) -> Result<(), StoreError> { let term = term.to_lowercase(); let term_vec = term.as_bytes().to_vec(); if term.is_empty() { return Err(StoreError::Empty); } if term.len() > self.trees.len() { return Err(StoreError::TooLong); } let tree = &self.trees[term.len() - 1]; [tree, &self.counts].transaction(|trees| { let tree = &trees[0]; let counts = &trees[1]; if count(counts, &term, |c| c.saturating_add(1))? == 0 { for i in 0..term_vec.len() { let key = key(&term_vec, i); tree.remove(key.as_slice())?; } counts.remove(term.as_bytes())?; } Ok(()) })?; Ok(()) } pub(crate) fn search<'a>( &'a self, term: &'a str, ) -> impl DoubleEndedIterator + 'a { self.trees .iter() .skip(term.len().saturating_sub(1)) .flat_map(move |tree| { let iter = tree .scan_prefix(term.to_lowercase()) .values() .filter_map(|res| res.ok()); Deduplicate::new(iter).map(|ivec| String::from_utf8_lossy(&ivec).to_string()) }) } } fn key(term_vec: &[u8], i: usize) -> Vec { let (start, end) = term_vec.split_at(i); let mut key = Vec::with_capacity(term_vec.len() + 1); key.extend(end); key.push(127); key.extend(start); key } struct Deduplicate { iter: T, seen: HashSet, } impl Deduplicate { fn new(iter: T) -> Self { Deduplicate { iter, seen: HashSet::new(), } } } impl Iterator for Deduplicate where T: DoubleEndedIterator, { type Item = sled::IVec; fn next(&mut self) -> Option { while let Some(ivec) = self.iter.next() { if !self.seen.contains(&ivec) { self.seen.insert(ivec.clone()); return Some(ivec); } } None } } impl DoubleEndedIterator for Deduplicate where T: DoubleEndedIterator, { fn next_back(&mut self) -> Option { while let Some(ivec) = self.iter.next_back() { if !self.seen.contains(&ivec) { self.seen.insert(ivec.clone()); return Some(ivec); } } None } }