From 68c96fa56e8ad4b6441efe9f279a08a48207d997 Mon Sep 17 00:00:00 2001
From: asonix <asonix@asonix.dog>
Date: Sun, 18 Feb 2024 13:04:04 -0600
Subject: [PATCH] Take better advantage of SIMDish features

---
 src/lib.rs | 56 +++++++++++++++++++++++++++---------------------------
 1 file changed, 28 insertions(+), 28 deletions(-)

diff --git a/src/lib.rs b/src/lib.rs
index 0b7888e..74602ff 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -5,6 +5,8 @@ use std::f32::consts::PI;
 
 use srgb_lookup::srgb_to_linear;
 
+const BYTES_PER_PIXEL: usize = 4;
+
 /// How many components should be used in blurhash creation
 ///
 /// More components will increase the definition of the blurhash, but also increase processing
@@ -35,7 +37,7 @@ pub struct ComponentError;
 pub struct Encoder {
     index: usize,
     components: Components,
-    factors: Box<[(ComponentState, [f32; 3])]>,
+    factors: Box<[(ComponentState, [f32; BYTES_PER_PIXEL])]>,
     bounds: ImageBounds,
 }
 
@@ -70,7 +72,7 @@ impl Encoder {
             factors: Box::from(
                 (0..y)
                     .flat_map(|y| {
-                        (0..x).map(move |x| (ComponentState { x, y, basis: 0. }, [0., 0., 0.]))
+                        (0..x).map(move |x| (ComponentState { x, y, basis: 0. }, [0., 0., 0., 0.]))
                     })
                     .collect::<Vec<_>>(),
             ),
@@ -84,8 +86,6 @@ impl Encoder {
     /// The input doesn't need to contain whole pixels, the encoder is capable of handling partial
     /// pixels
     pub fn update(&mut self, rgba8_image: &[u8]) {
-        const BYTES_PER_PIXEL: usize = 4;
-
         // get offset in terms of already-processed bytes
         let offset = self.index % BYTES_PER_PIXEL;
         // get offset in terms of remaining bytes on head of rgba8_image
@@ -94,13 +94,13 @@ impl Encoder {
         let basis_scale_x = PI / self.bounds.width as f32;
         let basis_scale_y = PI / self.bounds.height as f32;
 
-        for (ComponentState { basis, .. }, [_, g, b]) in self.factors.iter_mut() {
-            for (byte, value) in rgba8_image[..offset].iter().zip(
-                [&mut *b, &mut *g][..offset.saturating_sub(BYTES_PER_PIXEL - 2)]
+        for (ComponentState { basis, .. }, [_, g, b, _]) in self.factors.iter_mut() {
+            for (byte, slot) in rgba8_image[..offset].iter().zip(
+                [b, g][..offset.saturating_sub(BYTES_PER_PIXEL - 2)]
                     .iter_mut()
                     .rev(),
             ) {
-                **value += *basis * srgb_to_linear(*byte);
+                **slot += *basis * srgb_to_linear(*byte);
             }
         }
 
@@ -115,12 +115,16 @@ impl Encoder {
             let scale_x = px_x as f32 * basis_scale_x;
             let scale_y = px_y as f32 * basis_scale_y;
 
-            for (ComponentState { x, y, .. }, [r, g, b]) in self.factors.iter_mut() {
+            for (ComponentState { x, y, .. }, rgb) in self.factors.iter_mut() {
                 let basis = f32::cos(*x as f32 * scale_x) * f32::cos(*y as f32 * scale_y);
 
-                *r += basis * srgb_to_linear(chunk[0]);
-                *g += basis * srgb_to_linear(chunk[1]);
-                *b += basis * srgb_to_linear(chunk[2]);
+                for (val, slot) in chunk
+                    .iter()
+                    .map(|byte| basis * srgb_to_linear(*byte))
+                    .zip(rgb)
+                {
+                    *slot += val;
+                }
             }
         }
 
@@ -131,11 +135,11 @@ impl Encoder {
             let scale_x = px_x as f32 * basis_scale_x;
             let scale_y = px_y as f32 * basis_scale_y;
 
-            for (ComponentState { x, y, basis }, [r, g, b]) in self.factors.iter_mut() {
+            for (ComponentState { x, y, basis }, rgb) in self.factors.iter_mut() {
                 *basis = f32::cos(*x as f32 * scale_x) * f32::cos(*y as f32 * scale_y);
 
-                for (byte, value) in chunks.remainder().iter().zip([&mut *r, &mut *g, &mut *b]) {
-                    *value += *basis * srgb_to_linear(*byte);
+                for (byte, slot) in chunks.remainder().iter().zip(rgb) {
+                    *slot += *basis * srgb_to_linear(*byte);
                 }
             }
         }
@@ -145,14 +149,14 @@ impl Encoder {
 
     /// Produce a blurhash from the provided encoder
     pub fn finalize(mut self) -> String {
-        for (ComponentState { x, y, .. }, [r, g, b]) in self.factors.iter_mut() {
+        for (ComponentState { x, y, .. }, rgb) in self.factors.iter_mut() {
             let normalisation = if *x == 0 && *y == 0 { 1. } else { 2. };
 
             let scale = normalisation / (self.bounds.width * self.bounds.height) as f32;
 
-            *r *= scale;
-            *g *= scale;
-            *b *= scale;
+            for slot in rgb {
+                *slot *= scale;
+            }
         }
 
         let mut blurhash = String::new();
@@ -163,7 +167,7 @@ impl Encoder {
         let size_flag = self.components.x - 1 + (self.components.y - 1) * 9;
         base83::encode(size_flag, 1, &mut blurhash);
 
-        let maximum = ac.iter().fold(0.0_f32, |maximum, (_, [r, g, b])| {
+        let maximum = ac.iter().fold(0.0_f32, |maximum, (_, [r, g, b, _])| {
             maximum.max(r.abs()).max(g.abs()).max(b.abs())
         });
 
@@ -183,18 +187,14 @@ impl Encoder {
     }
 }
 
-fn encode_dc([r, g, b]: [f32; 3]) -> u32 {
-    let r = linear_to_srgb(r);
-    let g = linear_to_srgb(g);
-    let b = linear_to_srgb(b);
+fn encode_dc(rgb: [f32; BYTES_PER_PIXEL]) -> u32 {
+    let [r, g, b, _] = rgb.map(linear_to_srgb);
 
     (r << 16) + (g << 8) + b
 }
 
-fn encode_ac([r, g, b]: [f32; 3], maximum_value: f32) -> u32 {
-    let r = encode_ac_digit(r, maximum_value);
-    let g = encode_ac_digit(g, maximum_value);
-    let b = encode_ac_digit(b, maximum_value);
+fn encode_ac(rgb: [f32; BYTES_PER_PIXEL], maximum_value: f32) -> u32 {
+    let [r, g, b, _] = rgb.map(|c| encode_ac_digit(c, maximum_value));
 
     r * 19 * 19 + g * 19 + b
 }