Take better advantage of SIMDish features

2024-02-18 13:04:04 -06:00 · 2024-02-18 13:04:04 -06:00 · 68c96fa56e
parent 04f4d6f9b1
commit 68c96fa56e
1 changed files with 28 additions and 28 deletions
--- a/src/lib.rs
+++ b/src/lib.rs
@ -5,6 +5,8 @@ use std::f32::consts::PI;

 use srgb_lookup::srgb_to_linear;

+const BYTES_PER_PIXEL: usize = 4;
+
 /// How many components should be used in blurhash creation
 ///
 /// More components will increase the definition of the blurhash, but also increase processing
@ -35,7 +37,7 @@ pub struct ComponentError;
 pub struct Encoder {
    index: usize,
    components: Components,
-    factors: Box<[(ComponentState, [f32; 3])]>,
+    factors: Box<[(ComponentState, [f32; BYTES_PER_PIXEL])]>,
    bounds: ImageBounds,
 }

@ -70,7 +72,7 @@ impl Encoder {
            factors: Box::from(
                (0..y)
                    .flat_map(|y| {
-                        (0..x).map(move |x| (ComponentState { x, y, basis: 0. }, [0., 0., 0.]))
+                        (0..x).map(move |x| (ComponentState { x, y, basis: 0. }, [0., 0., 0., 0.]))
                    })
                    .collect::<Vec<_>>(),
            ),
@ -84,8 +86,6 @@ impl Encoder {
    /// The input doesn't need to contain whole pixels, the encoder is capable of handling partial
    /// pixels
    pub fn update(&mut self, rgba8_image: &[u8]) {
-        const BYTES_PER_PIXEL: usize = 4;
-
        // get offset in terms of already-processed bytes
        let offset = self.index % BYTES_PER_PIXEL;
        // get offset in terms of remaining bytes on head of rgba8_image
@ -94,13 +94,13 @@ impl Encoder {
        let basis_scale_x = PI / self.bounds.width as f32;
        let basis_scale_y = PI / self.bounds.height as f32;

-        for (ComponentState { basis, .. }, [_, g, b]) in self.factors.iter_mut() {
-            for (byte, value) in rgba8_image[..offset].iter().zip(
-                [&mut *b, &mut *g][..offset.saturating_sub(BYTES_PER_PIXEL - 2)]
+        for (ComponentState { basis, .. }, [_, g, b, _]) in self.factors.iter_mut() {
+            for (byte, slot) in rgba8_image[..offset].iter().zip(
+                [b, g][..offset.saturating_sub(BYTES_PER_PIXEL - 2)]
                    .iter_mut()
                    .rev(),
            ) {
-                **value += *basis * srgb_to_linear(*byte);
+                **slot += *basis * srgb_to_linear(*byte);
            }
        }

@ -115,12 +115,16 @@ impl Encoder {
            let scale_x = px_x as f32 * basis_scale_x;
            let scale_y = px_y as f32 * basis_scale_y;

-            for (ComponentState { x, y, .. }, [r, g, b]) in self.factors.iter_mut() {
+            for (ComponentState { x, y, .. }, rgb) in self.factors.iter_mut() {
                let basis = f32::cos(*x as f32 * scale_x) * f32::cos(*y as f32 * scale_y);

-                *r += basis * srgb_to_linear(chunk[0]);
-                *g += basis * srgb_to_linear(chunk[1]);
-                *b += basis * srgb_to_linear(chunk[2]);
+                for (val, slot) in chunk
+                    .iter()
+                    .map(|byte| basis * srgb_to_linear(*byte))
+                    .zip(rgb)
+                {
+                    *slot += val;
+                }
            }
        }

@ -131,11 +135,11 @@ impl Encoder {
            let scale_x = px_x as f32 * basis_scale_x;
            let scale_y = px_y as f32 * basis_scale_y;

-            for (ComponentState { x, y, basis }, [r, g, b]) in self.factors.iter_mut() {
+            for (ComponentState { x, y, basis }, rgb) in self.factors.iter_mut() {
                *basis = f32::cos(*x as f32 * scale_x) * f32::cos(*y as f32 * scale_y);

-                for (byte, value) in chunks.remainder().iter().zip([&mut *r, &mut *g, &mut *b]) {
-                    *value += *basis * srgb_to_linear(*byte);
+                for (byte, slot) in chunks.remainder().iter().zip(rgb) {
+                    *slot += *basis * srgb_to_linear(*byte);
                }
            }
        }
@ -145,14 +149,14 @@ impl Encoder {

    /// Produce a blurhash from the provided encoder
    pub fn finalize(mut self) -> String {
-        for (ComponentState { x, y, .. }, [r, g, b]) in self.factors.iter_mut() {
+        for (ComponentState { x, y, .. }, rgb) in self.factors.iter_mut() {
            let normalisation = if *x == 0 && *y == 0 { 1. } else { 2. };

            let scale = normalisation / (self.bounds.width * self.bounds.height) as f32;

-            *r *= scale;
-            *g *= scale;
-            *b *= scale;
+            for slot in rgb {
+                *slot *= scale;
+            }
        }

        let mut blurhash = String::new();
@ -163,7 +167,7 @@ impl Encoder {
        let size_flag = self.components.x - 1 + (self.components.y - 1) * 9;
        base83::encode(size_flag, 1, &mut blurhash);

-        let maximum = ac.iter().fold(0.0_f32, |maximum, (_, [r, g, b])| {
+        let maximum = ac.iter().fold(0.0_f32, |maximum, (_, [r, g, b, _])| {
            maximum.max(r.abs()).max(g.abs()).max(b.abs())
        });

@ -183,18 +187,14 @@ impl Encoder {
    }
 }

-fn encode_dc([r, g, b]: [f32; 3]) -> u32 {
-    let r = linear_to_srgb(r);
-    let g = linear_to_srgb(g);
-    let b = linear_to_srgb(b);
+fn encode_dc(rgb: [f32; BYTES_PER_PIXEL]) -> u32 {
+    let [r, g, b, _] = rgb.map(linear_to_srgb);

    (r << 16) + (g << 8) + b
 }

-fn encode_ac([r, g, b]: [f32; 3], maximum_value: f32) -> u32 {
-    let r = encode_ac_digit(r, maximum_value);
-    let g = encode_ac_digit(g, maximum_value);
-    let b = encode_ac_digit(b, maximum_value);
+fn encode_ac(rgb: [f32; BYTES_PER_PIXEL], maximum_value: f32) -> u32 {
+    let [r, g, b, _] = rgb.map(|c| encode_ac_digit(c, maximum_value));

    r * 19 * 19 + g * 19 + b
 }