From c67bfa1722a53de208050bfde1188e958278a433 Mon Sep 17 00:00:00 2001
From: asonix <asonix@asonix.dog>
Date: Sat, 7 Oct 2023 14:55:39 -0500
Subject: [PATCH] Postgres is implemented, remove ideas doc

---
 docs/postgres-planning.md | 314 --------------------------------------
 1 file changed, 314 deletions(-)
 delete mode 100644 docs/postgres-planning.md

diff --git a/docs/postgres-planning.md b/docs/postgres-planning.md
deleted file mode 100644
index d39d5cb..0000000
--- a/docs/postgres-planning.md
+++ /dev/null
@@ -1,314 +0,0 @@
-# Planning for implementing a postgres repo for pict-rs
-
-## Technology
-
-I've identified these crates as useful for achieving a reasonable postgres experience
-- [diesel-async](https://docs.rs/diesel-async/latest/diesel_async/)
-- [refinery](https://docs.rs/refinery/latest/refinery/)
-- [deadpool](https://docs.rs/deadpool/latest/deadpool/)
-- [tokio-postgres](https://docs.rs/tokio-postgres/latest/tokio_postgres/)
-
-tokio-postgres will actually do the work of talking to postgres in all cases. diesel-async can use
-tokio-postgres to execute queries. refinery can use tokio-postgres to run migrations. deadpool can
-pool tokio-postgres connections.
-
-I've chosen this stack specifically to avoid depending on `libpq`, the c implementation of a
-postgres client. This is not because it's written in C. 99.9% of postgres client libraries use libpq
-to great success. It is to keep the build process for pict-rs simple. Sticking with a full stack of
-rust means that only a rust compiler is required to build pict-rs.
-
-
-## Plan
-
-pict-rs isolates different concepts between different "Repo" traits. There's a single top-level
-FullRepo that depends on the others to ensure everything gets implemented properly. Since there's
-only been one repo implementation so far, it's not optimized for network databases and some things
-are less efficient than they could be.
-
-
-### HashRepo
-This is a little complicated because one of the things a HashRepo can do is return a stream of
-hashes from the repo. This can likely be implemented as a batch-retrieval operation that fetches
-1000 hashes at once and then drains them on each call to `poll_next`
-
-methods:
-- size
-- hashes
-- hash_page
-- hash_page_by_date
-- bound
-- create_hash
-- create_hash_with_timestamp
-- update_identifier
-- identifier
-- relate_variant_identifier
-- variant_identifier
-- variants
-- remove_variant
-- relate_motion_identifier
-- motion_identifier
-- cleanup_hash
-
-```sql
-CREATE TABLE hashes (
-    hash BYTEA PRIMARY KEY,
-    identifer TEXT NOT NULL,
-    motion_identifier TEXT,
-    created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP
-);
-
--- paging through hashes
-CREATE INDEX ordered_hash_index ON hashes (created_at, hash);
-
-
-CREATE TABLE variants (
-    id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
-    hash BYTEA REFERENCES hashes(hash) ON DELETE CASCADE,
-    variant TEXT NOT NULL,
-    identifier TEXT NOT NULL
-);
-
-
-CREATE UNIQUE INDEX hash_variant_index ON variants (hash, variant);
-```
-
-
-### AliasRepo
-Used to relate Aliases to Hashes, and to relate Delete Tokens to Aliases. Hashes are always bytes,
-but could be hex-encoded. postgres's `bytea` type can work with hex-encoding on storage and
-retrieval so maybe this can be used. Delete Tokens are not always UUIDs, even though they have been
-UUIDs in all recent versions of pict-rs.
-
-methods:
-- create_alias
-- delete_token
-- hash
-- aliases_for_hash
-- cleanup_alias
-
-```sql
-CREATE TABLE aliases (
-    alias VARCHAR(50) PRIMARY KEY,
-    hash BYTEA NOT NULL REFERENCES hashes(hash) ON DELETE CASCADE,
-    delete_token VARCHAR(30) NOT NULL
-);
-
-
-CREATE INDEX aliases_hash_index ON aliases (hash);
-```
-
-
-### SettingsRepo
-This is used for generic server-level storage. The file & object stores keep their current path
-generator values here. This is also used in some migrations to mark completion.
-
-methods:
-- set
-- get
-- remove
-
-could be a simple table with String key & values
-
-pict-rs currently treats the value here as Bytes, so it could either be hex-encoded or translated to
-a string
-```sql
-CREATE TABLE settings (
-    key VARCHAR(80) PRIMARY KEY,
-    value VARCHAR(80) NOT NULL
-);
-```
-
-
-### DetailsRepo
-Used to relate details (image metadata) to identifiers (image paths). Identifiers are currently
-treated as bytes, so may need hex-encoding to store in the database. They _should_ be valid strings
-in most environments, so it might be possible to drop the bytes requirement & instead have a string
-requirement.
-
-methods:
-- relate_details
-- details
-- cleanup_details
-
-```sql
-CREATE TABLE details (
-    identifier TEXT PRIMARY KEY,
-    details JSONB NOT NULL,
-);
-```
-
-
-### QueueRepo
-This is going to be the troublesome table. It represents jobs that will be processed. Jobs are
-pushed as Bytes, but at a higher level are actually JSON strings. The QueueRepo API could be updated
-to take `T: Serialize` as input rather than bytes, and then we can store it as JSONB. With the
-current API, the repo doesn't need to know the shape of a job, and maybe that is a benefit. We
-should take care in the future not to query on the contents of the job.
-
-methods:
-- push
-- pop
-- heartbeat
-- complete_job
-
-```sql
-CREATE TYPE job_status AS ENUM ('new', 'running');
-
-
-CREATE TABLE job_queue (
-    id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
-    queue VARCHAR(30) NOT NULL,
-    job JSONB NOT NULL,
-    status job_status NOT NULL DEFAULT 'new',
-    queue_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
-    heartbeat TIMESTAMP
-);
-
-
-CREATE INDEX queue_status_index ON queue INCLUDE queue, status;
-CREATE INDEX heartbeat_index ON queue INCLUDE heartbeat;
-```
-
-claiming a job can be
-```sql
-UPDATE job_queue SET status = 'new', heartbeat = NULL
-WHERE
-    heartbeat IS NOT NULL AND heartbeat < NOW - INTERVAL '2 MINUTES';
-
-UPDATE job_queue SET status = 'running', heartbeat = CURRENT_TIMESTAMP
-WHERE id = (
-    SELECT id
-    FROM job_queue
-    WHERE status = 'new' AND queue = '$QUEUE'
-    ORDER BY queue_time ASC
-    FOR UPDATE SKIP LOCKED
-    LIMIT 1
-)
-returning *;
-```
-
-notifying pict-rs of a ready job could be
-```sql
-CREATE OR REPLACE FUNCTION queue_status_notify()
-	RETURNS trigger AS
-$$
-BEGIN
-	PERFORM pg_notify('queue_status_channel', NEW.id::text);
-	RETURN NEW;
-END;
-$$ LANGUAGE plpgsql;
-
-
-CREATE TRIGGER queue_status
-	AFTER INSERT OR UPDATE OF status
-	ON queue
-	FOR EACH ROW
-EXECUTE PROCEDURE queue_status_notify();
-```
-
-Postgres queue implementation from this blog post: https://webapp.io/blog/postgres-is-the-answer/
-
-
-### StoreMigrationRepo
-This is used for migrating from local storage to object storage. It keeps track of which identifiers
-have been migrated, and on a successful migration, it is fully cleared.
-
-methods:
-- is_continuing_migration
-- mark_migrated
-- is_migrated
-- clear
-
-```sql
-CREATE TABLE migrations (
-    identifier TEXT PRIMARY KEY,
-);
-```
-
-
-### ProxyRepo
-This is used for keeping track of URLs that map to Aliases for proxied media.
-
-methods:
-- relate_url
-- related
-- remove_relation
-
-```sql
-CREATE TABLE proxies (
-    url PRIMARY KEY,
-    alias VARCHAR(30) NOT NULL REFERENCES aliases(alias)
-);
-```
-
-
-### AliasAccessRepo
-This is used for keeping track of aliases that are "cached" in pict-rs and can be safely removed
-when they are not useful to keep around. This might be able to piggyback on the aliases table or the
-proxies table.
-
-methods:
-- accessed_alias
-- set_accessed_alias
-- alias_accessed_at
-- older_aliases
-- remove_alias_access
-
-```sql
-ALTER TABLE aliases ADD COLUMN accessed TIMESTAMP;
-```
-or
-```sql
-ALTER TABLE proxies ADD COLUMN accessed TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP;
-```
-
-
-### VariantAccessRepo
-This is used for keeping track of access times for variants of an image to enable freeing up space
-from seldom-accessed variants. This might be able to piggyback on the variants table.
-
-methods:
-- accessed_variant
-- set_accessed_variant
-- variant_accessed_at
-- older_variants
-- remove_variant_access
-
-```sql
-ALTER TABLE variants ADD COLUMN accessed TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP;
-```
-
-### UploadRepo
-Used to keep track of backgrounded uploads.
-
-methods:
-- create_upload
-- wait
-- claim
-- complete_upload
-
-```sql
-CREATE TABLE uploads (
-    id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
-    result JSONB,
-);
-```
-
-Waiting for an upload
-```sql
-CREATE OR REPLACE FUNCTION upload_completion_notify()
-	RETURNS trigger AS
-$$
-BEGIN
-	PERFORM pg_notify('upload_completion_channel', NEW.id::text);
-	RETURN NEW;
-END;
-$$ LANGUAGE plpgsql;
-
-
-CREATE TRIGGER upload_result
-	AFTER INSERT OR UPDATE OF result
-	ON uploads
-	FOR EACH ROW
-EXECUTE PROCEDURE upload_completion_notify();
-```