Take better advantage of SIMDish features
All checks were successful
/ tests (push) Successful in 2m29s
/ clippy (push) Successful in 8s
/ check (x86_64-unknown-linux-musl) (push) Successful in 6s
/ check (aarch64-unknown-linux-musl) (push) Successful in 9s
/ check (armv7-unknown-linux-musleabihf) (push) Successful in 8s

This commit is contained in:
asonix 2024-02-18 13:04:04 -06:00
parent 04f4d6f9b1
commit 68c96fa56e

View file

@ -5,6 +5,8 @@ use std::f32::consts::PI;
use srgb_lookup::srgb_to_linear;
const BYTES_PER_PIXEL: usize = 4;
/// How many components should be used in blurhash creation
///
/// More components will increase the definition of the blurhash, but also increase processing
@ -35,7 +37,7 @@ pub struct ComponentError;
pub struct Encoder {
index: usize,
components: Components,
factors: Box<[(ComponentState, [f32; 3])]>,
factors: Box<[(ComponentState, [f32; BYTES_PER_PIXEL])]>,
bounds: ImageBounds,
}
@ -70,7 +72,7 @@ impl Encoder {
factors: Box::from(
(0..y)
.flat_map(|y| {
(0..x).map(move |x| (ComponentState { x, y, basis: 0. }, [0., 0., 0.]))
(0..x).map(move |x| (ComponentState { x, y, basis: 0. }, [0., 0., 0., 0.]))
})
.collect::<Vec<_>>(),
),
@ -84,8 +86,6 @@ impl Encoder {
/// The input doesn't need to contain whole pixels, the encoder is capable of handling partial
/// pixels
pub fn update(&mut self, rgba8_image: &[u8]) {
const BYTES_PER_PIXEL: usize = 4;
// get offset in terms of already-processed bytes
let offset = self.index % BYTES_PER_PIXEL;
// get offset in terms of remaining bytes on head of rgba8_image
@ -94,13 +94,13 @@ impl Encoder {
let basis_scale_x = PI / self.bounds.width as f32;
let basis_scale_y = PI / self.bounds.height as f32;
for (ComponentState { basis, .. }, [_, g, b]) in self.factors.iter_mut() {
for (byte, value) in rgba8_image[..offset].iter().zip(
[&mut *b, &mut *g][..offset.saturating_sub(BYTES_PER_PIXEL - 2)]
for (ComponentState { basis, .. }, [_, g, b, _]) in self.factors.iter_mut() {
for (byte, slot) in rgba8_image[..offset].iter().zip(
[b, g][..offset.saturating_sub(BYTES_PER_PIXEL - 2)]
.iter_mut()
.rev(),
) {
**value += *basis * srgb_to_linear(*byte);
**slot += *basis * srgb_to_linear(*byte);
}
}
@ -115,12 +115,16 @@ impl Encoder {
let scale_x = px_x as f32 * basis_scale_x;
let scale_y = px_y as f32 * basis_scale_y;
for (ComponentState { x, y, .. }, [r, g, b]) in self.factors.iter_mut() {
for (ComponentState { x, y, .. }, rgb) in self.factors.iter_mut() {
let basis = f32::cos(*x as f32 * scale_x) * f32::cos(*y as f32 * scale_y);
*r += basis * srgb_to_linear(chunk[0]);
*g += basis * srgb_to_linear(chunk[1]);
*b += basis * srgb_to_linear(chunk[2]);
for (val, slot) in chunk
.iter()
.map(|byte| basis * srgb_to_linear(*byte))
.zip(rgb)
{
*slot += val;
}
}
}
@ -131,11 +135,11 @@ impl Encoder {
let scale_x = px_x as f32 * basis_scale_x;
let scale_y = px_y as f32 * basis_scale_y;
for (ComponentState { x, y, basis }, [r, g, b]) in self.factors.iter_mut() {
for (ComponentState { x, y, basis }, rgb) in self.factors.iter_mut() {
*basis = f32::cos(*x as f32 * scale_x) * f32::cos(*y as f32 * scale_y);
for (byte, value) in chunks.remainder().iter().zip([&mut *r, &mut *g, &mut *b]) {
*value += *basis * srgb_to_linear(*byte);
for (byte, slot) in chunks.remainder().iter().zip(rgb) {
*slot += *basis * srgb_to_linear(*byte);
}
}
}
@ -145,14 +149,14 @@ impl Encoder {
/// Produce a blurhash from the provided encoder
pub fn finalize(mut self) -> String {
for (ComponentState { x, y, .. }, [r, g, b]) in self.factors.iter_mut() {
for (ComponentState { x, y, .. }, rgb) in self.factors.iter_mut() {
let normalisation = if *x == 0 && *y == 0 { 1. } else { 2. };
let scale = normalisation / (self.bounds.width * self.bounds.height) as f32;
*r *= scale;
*g *= scale;
*b *= scale;
for slot in rgb {
*slot *= scale;
}
}
let mut blurhash = String::new();
@ -163,7 +167,7 @@ impl Encoder {
let size_flag = self.components.x - 1 + (self.components.y - 1) * 9;
base83::encode(size_flag, 1, &mut blurhash);
let maximum = ac.iter().fold(0.0_f32, |maximum, (_, [r, g, b])| {
let maximum = ac.iter().fold(0.0_f32, |maximum, (_, [r, g, b, _])| {
maximum.max(r.abs()).max(g.abs()).max(b.abs())
});
@ -183,18 +187,14 @@ impl Encoder {
}
}
fn encode_dc([r, g, b]: [f32; 3]) -> u32 {
let r = linear_to_srgb(r);
let g = linear_to_srgb(g);
let b = linear_to_srgb(b);
fn encode_dc(rgb: [f32; BYTES_PER_PIXEL]) -> u32 {
let [r, g, b, _] = rgb.map(linear_to_srgb);
(r << 16) + (g << 8) + b
}
fn encode_ac([r, g, b]: [f32; 3], maximum_value: f32) -> u32 {
let r = encode_ac_digit(r, maximum_value);
let g = encode_ac_digit(g, maximum_value);
let b = encode_ac_digit(b, maximum_value);
fn encode_ac(rgb: [f32; BYTES_PER_PIXEL], maximum_value: f32) -> u32 {
let [r, g, b, _] = rgb.map(|c| encode_ac_digit(c, maximum_value));
r * 19 * 19 + g * 19 + b
}