166 lines
4 KiB
Rust
166 lines
4 KiB
Rust
use super::{count, StoreError};
|
|
use sled::{Db, Transactional, Tree};
|
|
use std::collections::HashSet;
|
|
|
|
#[derive(Clone, Debug)]
|
|
pub struct TermSearch {
|
|
trees: Vec<Tree>,
|
|
counts: Tree,
|
|
}
|
|
|
|
impl TermSearch {
|
|
pub(super) fn build(name: &str, max_len: usize, db: &Db) -> Result<Self, sled::Error> {
|
|
let mut trees = Vec::with_capacity(max_len);
|
|
|
|
for i in 0..max_len {
|
|
trees.push(db.open_tree(&format!("/profiles/search/{}/{}", name, i))?);
|
|
}
|
|
|
|
let counts = db.open_tree(&format!("/profiles/search/{}/counts", name))?;
|
|
|
|
Ok(TermSearch { trees, counts })
|
|
}
|
|
|
|
pub(crate) fn insert(&self, term: &str) -> Result<(), StoreError> {
|
|
let term = term.to_lowercase();
|
|
let term_vec = term.as_bytes().to_vec();
|
|
|
|
if term.is_empty() {
|
|
return Err(StoreError::Empty);
|
|
}
|
|
|
|
if term.len() > self.trees.len() {
|
|
return Err(StoreError::TooLong);
|
|
}
|
|
|
|
let tree = &self.trees[term.len() - 1];
|
|
|
|
[tree, &self.counts].transaction(|trees| {
|
|
let tree = &trees[0];
|
|
let counts = &trees[1];
|
|
|
|
if count(counts, &term, |c| c.saturating_add(1))? == 1 {
|
|
for i in 0..term_vec.len() {
|
|
let key = key(&term_vec, i);
|
|
|
|
tree.insert(key.as_slice(), term_vec.as_slice())?;
|
|
}
|
|
}
|
|
|
|
Ok(())
|
|
})?;
|
|
|
|
Ok(())
|
|
}
|
|
|
|
pub(crate) fn remove(&self, term: &str) -> Result<(), StoreError> {
|
|
let term = term.to_lowercase();
|
|
let term_vec = term.as_bytes().to_vec();
|
|
|
|
if term.is_empty() {
|
|
return Err(StoreError::Empty);
|
|
}
|
|
|
|
if term.len() > self.trees.len() {
|
|
return Err(StoreError::TooLong);
|
|
}
|
|
|
|
let tree = &self.trees[term.len() - 1];
|
|
|
|
[tree, &self.counts].transaction(|trees| {
|
|
let tree = &trees[0];
|
|
let counts = &trees[1];
|
|
|
|
if count(counts, &term, |c| c.saturating_add(1))? == 0 {
|
|
for i in 0..term_vec.len() {
|
|
let key = key(&term_vec, i);
|
|
|
|
tree.remove(key.as_slice())?;
|
|
}
|
|
|
|
counts.remove(term.as_bytes())?;
|
|
}
|
|
|
|
Ok(())
|
|
})?;
|
|
|
|
Ok(())
|
|
}
|
|
|
|
pub(crate) fn search<'a>(
|
|
&'a self,
|
|
term: &'a str,
|
|
) -> impl DoubleEndedIterator<Item = String> + 'a {
|
|
self.trees
|
|
.iter()
|
|
.skip(term.len().saturating_sub(1))
|
|
.flat_map(move |tree| {
|
|
let iter = tree
|
|
.scan_prefix(term.to_lowercase())
|
|
.values()
|
|
.filter_map(|res| res.ok());
|
|
|
|
Deduplicate::new(iter).map(|ivec| String::from_utf8_lossy(&ivec).to_string())
|
|
})
|
|
}
|
|
}
|
|
|
|
fn key(term_vec: &[u8], i: usize) -> Vec<u8> {
|
|
let (start, end) = term_vec.split_at(i);
|
|
|
|
let mut key = Vec::with_capacity(term_vec.len() + 1);
|
|
key.extend(end);
|
|
key.push(127);
|
|
key.extend(start);
|
|
|
|
key
|
|
}
|
|
|
|
struct Deduplicate<T> {
|
|
iter: T,
|
|
seen: HashSet<sled::IVec>,
|
|
}
|
|
|
|
impl<T> Deduplicate<T> {
|
|
fn new(iter: T) -> Self {
|
|
Deduplicate {
|
|
iter,
|
|
seen: HashSet::new(),
|
|
}
|
|
}
|
|
}
|
|
|
|
impl<T> Iterator for Deduplicate<T>
|
|
where
|
|
T: DoubleEndedIterator<Item = sled::IVec>,
|
|
{
|
|
type Item = sled::IVec;
|
|
|
|
fn next(&mut self) -> Option<Self::Item> {
|
|
while let Some(ivec) = self.iter.next() {
|
|
if !self.seen.contains(&ivec) {
|
|
self.seen.insert(ivec.clone());
|
|
return Some(ivec);
|
|
}
|
|
}
|
|
|
|
None
|
|
}
|
|
}
|
|
|
|
impl<T> DoubleEndedIterator for Deduplicate<T>
|
|
where
|
|
T: DoubleEndedIterator<Item = sled::IVec>,
|
|
{
|
|
fn next_back(&mut self) -> Option<Self::Item> {
|
|
while let Some(ivec) = self.iter.next_back() {
|
|
if !self.seen.contains(&ivec) {
|
|
self.seen.insert(ivec.clone());
|
|
return Some(ivec);
|
|
}
|
|
}
|
|
|
|
None
|
|
}
|
|
}
|