Multiple items:

- Reduce duplicate work in generate job
- Use hash instead of identifier for unique processing
- Move motion ident generation behind concurrent processor lock
This commit is contained in:
Aode (Lion) 2022-04-02 17:40:04 -05:00
parent 09f53b9ce6
commit 132e395e5c
6 changed files with 139 additions and 81 deletions

View file

@ -1,7 +1,6 @@
use crate::{ use crate::{
details::Details, details::Details,
error::{Error, UploadError}, error::{Error, UploadError},
store::Identifier,
}; };
use actix_web::web; use actix_web::web;
use dashmap::{mapref::entry::Entry, DashMap}; use dashmap::{mapref::entry::Entry, DashMap};
@ -42,10 +41,8 @@ impl<F> CancelSafeProcessor<F>
where where
F: Future<Output = Result<(Details, web::Bytes), Error>>, F: Future<Output = Result<(Details, web::Bytes), Error>>,
{ {
pub(super) fn new<I: Identifier>(identifier: I, path: PathBuf, fut: F) -> Result<Self, Error> { pub(super) fn new(hash: &[u8], path: PathBuf, fut: F) -> Self {
let id_bytes = identifier.to_bytes()?; let key = (hash.to_vec(), path.clone());
let key = (id_bytes, path.clone());
let entry = PROCESS_MAP.entry(key.clone()); let entry = PROCESS_MAP.entry(key.clone());
@ -54,7 +51,7 @@ where
vacant.insert(Vec::new()); vacant.insert(Vec::new());
let span = tracing::info_span!( let span = tracing::info_span!(
"Processing image", "Processing image",
identifier = &tracing::field::debug(&identifier), hash = &tracing::field::debug(&hash),
path = &tracing::field::debug(&path), path = &tracing::field::debug(&path),
completed = &tracing::field::Empty, completed = &tracing::field::Empty,
); );
@ -65,21 +62,21 @@ where
occupied.get_mut().push(tx); occupied.get_mut().push(tx);
let span = tracing::info_span!( let span = tracing::info_span!(
"Waiting for processed image", "Waiting for processed image",
identifier = &tracing::field::debug(&identifier), hash = &tracing::field::debug(&hash),
path = &tracing::field::debug(&path), path = &tracing::field::debug(&path),
); );
(Some(rx), span) (Some(rx), span)
} }
}; };
Ok(CancelSafeProcessor { CancelSafeProcessor {
cancel_token: CancelToken { cancel_token: CancelToken {
span, span,
key, key,
receiver, receiver,
}, },
fut, fut,
}) }
} }
} }

93
src/generate.rs Normal file
View file

@ -0,0 +1,93 @@
use crate::{
concurrent_processor::CancelSafeProcessor,
config::ImageFormat,
details::Details,
error::Error,
ffmpeg::{InputFormat, ThumbnailFormat},
repo::{Alias, FullRepo},
store::Store,
};
use actix_web::web::Bytes;
use std::path::PathBuf;
use tokio::io::AsyncReadExt;
pub(crate) async fn generate<R: FullRepo, S: Store + 'static>(
repo: &R,
store: &S,
format: ImageFormat,
alias: Alias,
thumbnail_path: PathBuf,
thumbnail_args: Vec<String>,
hash: R::Bytes,
) -> Result<(Details, Bytes), Error> {
let process_fut = process(
repo,
store,
format,
alias,
thumbnail_path.clone(),
thumbnail_args,
hash.clone(),
);
let (details, bytes) =
CancelSafeProcessor::new(hash.as_ref(), thumbnail_path, process_fut).await?;
Ok((details, bytes))
}
async fn process<R: FullRepo, S: Store + 'static>(
repo: &R,
store: &S,
format: ImageFormat,
alias: Alias,
thumbnail_path: PathBuf,
thumbnail_args: Vec<String>,
hash: R::Bytes,
) -> Result<(Details, Bytes), Error> {
let permit = crate::PROCESS_SEMAPHORE.acquire().await?;
let identifier = if let Some(identifier) = repo
.still_identifier_from_alias::<S::Identifier>(&alias)
.await?
{
identifier
} else {
let identifier = repo.identifier(hash.clone()).await?;
let mut reader = crate::ffmpeg::thumbnail(
store.clone(),
identifier,
InputFormat::Mp4,
ThumbnailFormat::Jpeg,
)
.await?;
let motion_identifier = store.save_async_read(&mut reader).await?;
repo.relate_motion_identifier(hash.clone(), &motion_identifier)
.await?;
motion_identifier
};
let mut processed_reader =
crate::magick::process_image_store_read(store.clone(), identifier, thumbnail_args, format)?;
let mut vec = Vec::new();
processed_reader.read_to_end(&mut vec).await?;
let bytes = Bytes::from(vec);
drop(permit);
let details = Details::from_bytes(bytes.clone(), format.as_hint()).await?;
let identifier = store.save_bytes(bytes.clone()).await?;
repo.relate_details(&identifier, &details).await?;
repo.relate_variant_identifier(
hash,
thumbnail_path.to_string_lossy().to_string(),
&identifier,
)
.await?;
Ok((details, bytes)) as Result<(Details, Bytes), Error>
}

View file

@ -7,16 +7,12 @@ use crate::{
}; };
use actix_web::web::{Bytes, BytesMut}; use actix_web::web::{Bytes, BytesMut};
use futures_util::{Stream, StreamExt}; use futures_util::{Stream, StreamExt};
use once_cell::sync::Lazy;
use sha2::{Digest, Sha256}; use sha2::{Digest, Sha256};
use tokio::sync::Semaphore;
use tracing::debug; use tracing::debug;
mod hasher; mod hasher;
use hasher::Hasher; use hasher::Hasher;
static PROCESS_SEMAPHORE: Lazy<Semaphore> = Lazy::new(|| Semaphore::new(num_cpus::get()));
pub(crate) struct Session<R, S> pub(crate) struct Session<R, S>
where where
R: FullRepo + 'static, R: FullRepo + 'static,
@ -39,7 +35,7 @@ where
R: FullRepo + 'static, R: FullRepo + 'static,
S: Store, S: Store,
{ {
let permit = PROCESS_SEMAPHORE.acquire().await; let permit = crate::PROCESS_SEMAPHORE.acquire().await;
let mut bytes_mut = BytesMut::new(); let mut bytes_mut = BytesMut::new();

View file

@ -16,7 +16,7 @@ use std::{
sync::atomic::{AtomicU64, Ordering}, sync::atomic::{AtomicU64, Ordering},
time::{Duration, SystemTime}, time::{Duration, SystemTime},
}; };
use tokio::{io::AsyncReadExt, sync::Semaphore}; use tokio::sync::Semaphore;
use tracing::{debug, info, instrument}; use tracing::{debug, info, instrument};
use tracing_actix_web::TracingLogger; use tracing_actix_web::TracingLogger;
use tracing_awc::Tracing; use tracing_awc::Tracing;
@ -30,6 +30,7 @@ mod error;
mod exiftool; mod exiftool;
mod ffmpeg; mod ffmpeg;
mod file; mod file;
mod generate;
mod ingest; mod ingest;
mod init_tracing; mod init_tracing;
mod magick; mod magick;
@ -47,12 +48,10 @@ mod tmp_file;
mod validate; mod validate;
use self::{ use self::{
concurrent_processor::CancelSafeProcessor,
config::{Configuration, ImageFormat, Operation}, config::{Configuration, ImageFormat, Operation},
details::Details, details::Details,
either::Either, either::Either,
error::{Error, UploadError}, error::{Error, UploadError},
ffmpeg::{InputFormat, ThumbnailFormat},
ingest::Session, ingest::Session,
init_tracing::init_tracing, init_tracing::init_tracing,
magick::details_hint, magick::details_hint,
@ -94,6 +93,7 @@ async fn upload<R: FullRepo, S: Store + 'static>(
.into_iter() .into_iter()
.filter_map(|i| i.file()) .filter_map(|i| i.file())
.collect::<Vec<_>>(); .collect::<Vec<_>>();
for image in &images { for image in &images {
if let Some(alias) = image.result.alias() { if let Some(alias) = image.result.alias() {
info!("Uploaded {} as {:?}", image.filename, alias); info!("Uploaded {} as {:?}", image.filename, alias);
@ -295,66 +295,16 @@ async fn process<R: FullRepo, S: Store + 'static>(
return ranged_file_resp(&**store, identifier, range, details).await; return ranged_file_resp(&**store, identifier, range, details).await;
} }
let identifier = if let Some(identifier) = repo let (details, bytes) = generate::generate(
.still_identifier_from_alias::<S::Identifier>(&alias) &**repo,
.await? &**store,
{ format,
identifier alias,
} else { thumbnail_path,
let identifier = repo.identifier(hash.clone()).await?; thumbnail_args,
let permit = PROCESS_SEMAPHORE.acquire().await; hash,
let mut reader = crate::ffmpeg::thumbnail( )
(**store).clone(), .await?;
identifier,
InputFormat::Mp4,
ThumbnailFormat::Jpeg,
)
.await?;
let motion_identifier = store.save_async_read(&mut reader).await?;
drop(permit);
repo.relate_motion_identifier(hash.clone(), &motion_identifier)
.await?;
motion_identifier
};
let thumbnail_path2 = thumbnail_path.clone();
let identifier2 = identifier.clone();
let process_fut = async {
let thumbnail_path = thumbnail_path2;
let permit = PROCESS_SEMAPHORE.acquire().await?;
let mut processed_reader = crate::magick::process_image_store_read(
(**store).clone(),
identifier2,
thumbnail_args,
format,
)?;
let mut vec = Vec::new();
processed_reader.read_to_end(&mut vec).await?;
let bytes = web::Bytes::from(vec);
drop(permit);
let details = Details::from_bytes(bytes.clone(), format.as_hint()).await?;
let identifier = store.save_bytes(bytes.clone()).await?;
repo.relate_details(&identifier, &details).await?;
repo.relate_variant_identifier(
hash,
thumbnail_path.to_string_lossy().to_string(),
&identifier,
)
.await?;
Ok((details, bytes)) as Result<(Details, web::Bytes), Error>
};
let (details, bytes) =
CancelSafeProcessor::new(identifier, thumbnail_path.clone(), process_fut)?.await?;
let (builder, stream) = if let Some(web::Header(range_header)) = range { let (builder, stream) = if let Some(web::Header(range_header)) = range {
if let Some(range) = range::single_bytes_range(&range_header) { if let Some(range) = range::single_bytes_range(&range_header) {

View file

@ -114,7 +114,7 @@ pub(crate) async fn process_cleanup<R: FullRepo, S: Store>(repo: R, store: S, wo
process_jobs(&repo, &store, worker_id, cleanup::perform).await process_jobs(&repo, &store, worker_id, cleanup::perform).await
} }
pub(crate) async fn process_images<R: FullRepo + 'static, S: Store>( pub(crate) async fn process_images<R: FullRepo + 'static, S: Store + 'static>(
repo: R, repo: R,
store: S, store: S,
worker_id: String, worker_id: String,

View file

@ -17,7 +17,7 @@ pub(super) fn perform<'a, R, S>(
) -> LocalBoxFuture<'a, Result<(), Error>> ) -> LocalBoxFuture<'a, Result<(), Error>>
where where
R: FullRepo + 'static, R: FullRepo + 'static,
S: Store, S: Store + 'static,
{ {
Box::pin(async move { Box::pin(async move {
match serde_json::from_slice(job) { match serde_json::from_slice(job) {
@ -114,7 +114,7 @@ where
Ok(()) Ok(())
} }
async fn generate<R, S>( async fn generate<R: FullRepo, S: Store + 'static>(
repo: &R, repo: &R,
store: &S, store: &S,
target_format: ImageFormat, target_format: ImageFormat,
@ -122,5 +122,27 @@ async fn generate<R, S>(
process_path: PathBuf, process_path: PathBuf,
process_args: Vec<String>, process_args: Vec<String>,
) -> Result<(), Error> { ) -> Result<(), Error> {
unimplemented!("do this") let hash = repo.hash(&source).await?;
let path_string = process_path.to_string_lossy().to_string();
let identifier_opt = repo
.variant_identifier::<S::Identifier>(hash.clone(), path_string)
.await?;
if identifier_opt.is_some() {
return Ok(());
}
crate::generate::generate(
repo,
store,
target_format,
source,
process_path,
process_args,
hash,
)
.await?;
Ok(())
} }