hyaenidae/profiles/src/store/term_search.rs

166 lines
4 KiB
Rust

use super::{count, StoreError};
use sled::{Db, Transactional, Tree};
use std::collections::HashSet;
#[derive(Clone, Debug)]
pub struct TermSearch {
trees: Vec<Tree>,
counts: Tree,
}
impl TermSearch {
pub(super) fn build(name: &str, max_len: usize, db: &Db) -> Result<Self, sled::Error> {
let mut trees = Vec::with_capacity(max_len);
for i in 0..max_len {
trees.push(db.open_tree(&format!("/profiles/search/{}/{}", name, i))?);
}
let counts = db.open_tree(&format!("/profiles/search/{}/counts", name))?;
Ok(TermSearch { trees, counts })
}
pub(crate) fn insert(&self, term: &str) -> Result<(), StoreError> {
let term = term.to_lowercase();
let term_vec = term.as_bytes().to_vec();
if term.is_empty() {
return Err(StoreError::Empty);
}
if term.len() > self.trees.len() {
return Err(StoreError::TooLong);
}
let tree = &self.trees[term.len() - 1];
[tree, &self.counts].transaction(|trees| {
let tree = &trees[0];
let counts = &trees[1];
if count(counts, &term, |c| c.saturating_add(1))? == 1 {
for i in 0..term_vec.len() {
let key = key(&term_vec, i);
tree.insert(key.as_slice(), term_vec.as_slice())?;
}
}
Ok(())
})?;
Ok(())
}
pub(crate) fn remove(&self, term: &str) -> Result<(), StoreError> {
let term = term.to_lowercase();
let term_vec = term.as_bytes().to_vec();
if term.is_empty() {
return Err(StoreError::Empty);
}
if term.len() > self.trees.len() {
return Err(StoreError::TooLong);
}
let tree = &self.trees[term.len() - 1];
[tree, &self.counts].transaction(|trees| {
let tree = &trees[0];
let counts = &trees[1];
if count(counts, &term, |c| c.saturating_add(1))? == 0 {
for i in 0..term_vec.len() {
let key = key(&term_vec, i);
tree.remove(key.as_slice())?;
}
counts.remove(term.as_bytes())?;
}
Ok(())
})?;
Ok(())
}
pub(crate) fn search<'a>(
&'a self,
term: &'a str,
) -> impl DoubleEndedIterator<Item = String> + 'a {
self.trees
.iter()
.skip(term.len().saturating_sub(1))
.flat_map(move |tree| {
let iter = tree
.scan_prefix(term.to_lowercase())
.values()
.filter_map(|res| res.ok());
Deduplicate::new(iter).map(|ivec| String::from_utf8_lossy(&ivec).to_string())
})
}
}
fn key(term_vec: &[u8], i: usize) -> Vec<u8> {
let (start, end) = term_vec.split_at(i);
let mut key = Vec::with_capacity(term_vec.len() + 1);
key.extend(end);
key.push(127);
key.extend(start);
key
}
struct Deduplicate<T> {
iter: T,
seen: HashSet<sled::IVec>,
}
impl<T> Deduplicate<T> {
fn new(iter: T) -> Self {
Deduplicate {
iter,
seen: HashSet::new(),
}
}
}
impl<T> Iterator for Deduplicate<T>
where
T: DoubleEndedIterator<Item = sled::IVec>,
{
type Item = sled::IVec;
fn next(&mut self) -> Option<Self::Item> {
while let Some(ivec) = self.iter.next() {
if !self.seen.contains(&ivec) {
self.seen.insert(ivec.clone());
return Some(ivec);
}
}
None
}
}
impl<T> DoubleEndedIterator for Deduplicate<T>
where
T: DoubleEndedIterator<Item = sled::IVec>,
{
fn next_back(&mut self) -> Option<Self::Item> {
while let Some(ivec) = self.iter.next_back() {
if !self.seen.contains(&ivec) {
self.seen.insert(ivec.clone());
return Some(ivec);
}
}
None
}
}