From 68c96fa56e8ad4b6441efe9f279a08a48207d997 Mon Sep 17 00:00:00 2001 From: asonix Date: Sun, 18 Feb 2024 13:04:04 -0600 Subject: [PATCH] Take better advantage of SIMDish features --- src/lib.rs | 56 +++++++++++++++++++++++++++--------------------------- 1 file changed, 28 insertions(+), 28 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 0b7888e..74602ff 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -5,6 +5,8 @@ use std::f32::consts::PI; use srgb_lookup::srgb_to_linear; +const BYTES_PER_PIXEL: usize = 4; + /// How many components should be used in blurhash creation /// /// More components will increase the definition of the blurhash, but also increase processing @@ -35,7 +37,7 @@ pub struct ComponentError; pub struct Encoder { index: usize, components: Components, - factors: Box<[(ComponentState, [f32; 3])]>, + factors: Box<[(ComponentState, [f32; BYTES_PER_PIXEL])]>, bounds: ImageBounds, } @@ -70,7 +72,7 @@ impl Encoder { factors: Box::from( (0..y) .flat_map(|y| { - (0..x).map(move |x| (ComponentState { x, y, basis: 0. }, [0., 0., 0.])) + (0..x).map(move |x| (ComponentState { x, y, basis: 0. }, [0., 0., 0., 0.])) }) .collect::>(), ), @@ -84,8 +86,6 @@ impl Encoder { /// The input doesn't need to contain whole pixels, the encoder is capable of handling partial /// pixels pub fn update(&mut self, rgba8_image: &[u8]) { - const BYTES_PER_PIXEL: usize = 4; - // get offset in terms of already-processed bytes let offset = self.index % BYTES_PER_PIXEL; // get offset in terms of remaining bytes on head of rgba8_image @@ -94,13 +94,13 @@ impl Encoder { let basis_scale_x = PI / self.bounds.width as f32; let basis_scale_y = PI / self.bounds.height as f32; - for (ComponentState { basis, .. }, [_, g, b]) in self.factors.iter_mut() { - for (byte, value) in rgba8_image[..offset].iter().zip( - [&mut *b, &mut *g][..offset.saturating_sub(BYTES_PER_PIXEL - 2)] + for (ComponentState { basis, .. }, [_, g, b, _]) in self.factors.iter_mut() { + for (byte, slot) in rgba8_image[..offset].iter().zip( + [b, g][..offset.saturating_sub(BYTES_PER_PIXEL - 2)] .iter_mut() .rev(), ) { - **value += *basis * srgb_to_linear(*byte); + **slot += *basis * srgb_to_linear(*byte); } } @@ -115,12 +115,16 @@ impl Encoder { let scale_x = px_x as f32 * basis_scale_x; let scale_y = px_y as f32 * basis_scale_y; - for (ComponentState { x, y, .. }, [r, g, b]) in self.factors.iter_mut() { + for (ComponentState { x, y, .. }, rgb) in self.factors.iter_mut() { let basis = f32::cos(*x as f32 * scale_x) * f32::cos(*y as f32 * scale_y); - *r += basis * srgb_to_linear(chunk[0]); - *g += basis * srgb_to_linear(chunk[1]); - *b += basis * srgb_to_linear(chunk[2]); + for (val, slot) in chunk + .iter() + .map(|byte| basis * srgb_to_linear(*byte)) + .zip(rgb) + { + *slot += val; + } } } @@ -131,11 +135,11 @@ impl Encoder { let scale_x = px_x as f32 * basis_scale_x; let scale_y = px_y as f32 * basis_scale_y; - for (ComponentState { x, y, basis }, [r, g, b]) in self.factors.iter_mut() { + for (ComponentState { x, y, basis }, rgb) in self.factors.iter_mut() { *basis = f32::cos(*x as f32 * scale_x) * f32::cos(*y as f32 * scale_y); - for (byte, value) in chunks.remainder().iter().zip([&mut *r, &mut *g, &mut *b]) { - *value += *basis * srgb_to_linear(*byte); + for (byte, slot) in chunks.remainder().iter().zip(rgb) { + *slot += *basis * srgb_to_linear(*byte); } } } @@ -145,14 +149,14 @@ impl Encoder { /// Produce a blurhash from the provided encoder pub fn finalize(mut self) -> String { - for (ComponentState { x, y, .. }, [r, g, b]) in self.factors.iter_mut() { + for (ComponentState { x, y, .. }, rgb) in self.factors.iter_mut() { let normalisation = if *x == 0 && *y == 0 { 1. } else { 2. }; let scale = normalisation / (self.bounds.width * self.bounds.height) as f32; - *r *= scale; - *g *= scale; - *b *= scale; + for slot in rgb { + *slot *= scale; + } } let mut blurhash = String::new(); @@ -163,7 +167,7 @@ impl Encoder { let size_flag = self.components.x - 1 + (self.components.y - 1) * 9; base83::encode(size_flag, 1, &mut blurhash); - let maximum = ac.iter().fold(0.0_f32, |maximum, (_, [r, g, b])| { + let maximum = ac.iter().fold(0.0_f32, |maximum, (_, [r, g, b, _])| { maximum.max(r.abs()).max(g.abs()).max(b.abs()) }); @@ -183,18 +187,14 @@ impl Encoder { } } -fn encode_dc([r, g, b]: [f32; 3]) -> u32 { - let r = linear_to_srgb(r); - let g = linear_to_srgb(g); - let b = linear_to_srgb(b); +fn encode_dc(rgb: [f32; BYTES_PER_PIXEL]) -> u32 { + let [r, g, b, _] = rgb.map(linear_to_srgb); (r << 16) + (g << 8) + b } -fn encode_ac([r, g, b]: [f32; 3], maximum_value: f32) -> u32 { - let r = encode_ac_digit(r, maximum_value); - let g = encode_ac_digit(g, maximum_value); - let b = encode_ac_digit(b, maximum_value); +fn encode_ac(rgb: [f32; BYTES_PER_PIXEL], maximum_value: f32) -> u32 { + let [r, g, b, _] = rgb.map(|c| encode_ac_digit(c, maximum_value)); r * 19 * 19 + g * 19 + b }