It compiles and runs but I think I need to flatten before getting quads
This commit is contained in:
parent
8432bb542d
commit
a1f40b3f67
18
Cargo.toml
18
Cargo.toml
|
@ -3,14 +3,28 @@ name = "json-ld-normalization"
|
|||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
[[example]]
|
||||
name = "masto"
|
||||
required-features = ["rustcrypto"]
|
||||
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
[features]
|
||||
default = ["rustcrypto"]
|
||||
rustcrypto = ["sha2", "hex"]
|
||||
|
||||
[dependencies]
|
||||
contextual = "0.1.3"
|
||||
indexmap = "1.9.2"
|
||||
iref = "2.2.0"
|
||||
itertools = "0.10.5"
|
||||
json-ld = "0.9.1"
|
||||
locspan = "0.7.9"
|
||||
rdf-types = "0.12.4"
|
||||
static-iref = "2.0"
|
||||
sha2 = { version = "0.10.6", optional = true }
|
||||
hex = { version = "0.4.3", optional = true }
|
||||
|
||||
[dev-dependencies]
|
||||
iref = "2.2.0"
|
||||
reqwest = "0.11.13"
|
||||
static-iref = "2.0.0"
|
||||
tokio = { version = "1", features = ["full"] }
|
||||
|
|
460
examples/masto.rs
Normal file
460
examples/masto.rs
Normal file
|
@ -0,0 +1,460 @@
|
|||
use contextual::WithContext;
|
||||
use iref::{Iri, IriBuf};
|
||||
use json_ld::{
|
||||
syntax::{parse::MetaError, Parse, Value},
|
||||
JsonLdProcessor, Loader, RemoteDocument,
|
||||
};
|
||||
use locspan::{Location, Meta};
|
||||
use rdf_types::{vocabulary::Index, IriVocabulary, IriVocabularyMut};
|
||||
use reqwest::Client;
|
||||
use static_iref::iri;
|
||||
use std::{
|
||||
collections::HashMap,
|
||||
future::Future,
|
||||
pin::Pin,
|
||||
sync::{Arc, RwLock},
|
||||
};
|
||||
|
||||
const ACTIVITYSTREAMS: &'static str = "https://www.w3.org/ns/activitystreams";
|
||||
const SECURITY: &'static str = "https://w3id.org/security/v1";
|
||||
|
||||
const PERMITTED_CONTEXTS: [&'static str; 2] = [ACTIVITYSTREAMS, SECURITY];
|
||||
|
||||
type DynParser<I, M, T, E> = dyn 'static
|
||||
+ Send
|
||||
+ Sync
|
||||
+ FnMut(&dyn IriVocabulary<Iri = I>, &I, &str) -> Result<Meta<T, M>, E>;
|
||||
|
||||
#[derive(Clone, Default)]
|
||||
struct Cache {
|
||||
inner: Arc<RwLock<HashMap<IriBuf, String>>>,
|
||||
}
|
||||
|
||||
struct ReqwestLoader<I = Index, M = Location<I>, T = Value<M>, E = MetaError<M>> {
|
||||
cache: Cache,
|
||||
parser: Box<DynParser<I, M, T, E>>,
|
||||
client: Client,
|
||||
}
|
||||
|
||||
impl Cache {
|
||||
fn new() -> Self {
|
||||
Self::default()
|
||||
}
|
||||
|
||||
fn get(&self, url: &IriBuf) -> Option<String> {
|
||||
let guard = self.inner.read().unwrap();
|
||||
|
||||
guard.get(url).map(String::from)
|
||||
}
|
||||
|
||||
fn store(&self, url: IriBuf, body: String) {
|
||||
self.inner.write().unwrap().insert(url, body);
|
||||
}
|
||||
}
|
||||
|
||||
impl<I, M, T, E> ReqwestLoader<I, M, T, E> {
|
||||
fn new(
|
||||
cache: Cache,
|
||||
parser: impl 'static
|
||||
+ Send
|
||||
+ Sync
|
||||
+ FnMut(&dyn IriVocabulary<Iri = I>, &I, &str) -> Result<Meta<T, M>, E>,
|
||||
) -> Self {
|
||||
Self {
|
||||
cache,
|
||||
parser: Box::new(parser),
|
||||
client: Client::builder()
|
||||
.user_agent("json-ld-playground")
|
||||
.build()
|
||||
.expect("Successful client"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<I: Clone> ReqwestLoader<I, Location<I>, Value<Location<I>>, MetaError<Location<I>>> {
|
||||
fn default_with_cache(cache: Cache) -> Self {
|
||||
Self::new(cache, |_, file: &I, s| {
|
||||
Value::parse_str(s, |span| Location::new(file.clone(), span))
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl<I: Clone> Default
|
||||
for ReqwestLoader<I, Location<I>, Value<Location<I>>, MetaError<Location<I>>>
|
||||
{
|
||||
fn default() -> Self {
|
||||
Self::default_with_cache(Cache::default())
|
||||
}
|
||||
}
|
||||
|
||||
impl<I, M, T, E> ReqwestLoader<I, M, T, E> {
|
||||
async fn resolve_context(
|
||||
&self,
|
||||
vocabulary: &impl IriVocabulary<Iri = I>,
|
||||
url: &I,
|
||||
) -> Result<String, Error<E>> {
|
||||
let url = vocabulary.iri(url).unwrap().to_owned();
|
||||
|
||||
if !PERMITTED_CONTEXTS.contains(&url.as_str()) {
|
||||
println!("Fetching {url} is not permitted");
|
||||
return Err(Error::NotPermitted(url));
|
||||
}
|
||||
|
||||
if let Some(cached) = self.cache.get(&url) {
|
||||
println!("Got {url} from cache");
|
||||
return Ok(cached);
|
||||
}
|
||||
|
||||
println!("Fetching {url}");
|
||||
|
||||
let response = self
|
||||
.client
|
||||
.get(url.as_str())
|
||||
.header("Accept", "application/ld+json")
|
||||
.send()
|
||||
.await
|
||||
.map_err(|_| Error::Fetch)?;
|
||||
|
||||
if !response.status().is_success() {
|
||||
return Err(Error::Fetch);
|
||||
}
|
||||
|
||||
let body = response.text().await.map_err(|_| Error::Fetch)?;
|
||||
|
||||
self.cache.store(url, body.clone());
|
||||
|
||||
Ok(body)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
enum Error<E> {
|
||||
NotPermitted(IriBuf),
|
||||
Fetch,
|
||||
Parse(E),
|
||||
}
|
||||
|
||||
type AnyError = Box<dyn std::error::Error + Send + Sync>;
|
||||
type BoxFuture<'a, T> = Pin<Box<dyn Future<Output = T> + Send + 'a>>;
|
||||
|
||||
impl<I: Send + Sync, T: Send, M: Send, E> Loader<I, M> for ReqwestLoader<I, M, T, E> {
|
||||
type Output = T;
|
||||
type Error = Error<E>;
|
||||
|
||||
fn load_with<'a>(
|
||||
&'a mut self,
|
||||
vocabulary: &'a mut (impl Sync + Send + rdf_types::IriVocabularyMut<Iri = I>),
|
||||
url: I,
|
||||
) -> BoxFuture<'a, json_ld::LoadingResult<I, M, Self::Output, Self::Error>>
|
||||
where
|
||||
I: 'a,
|
||||
{
|
||||
Box::pin(async move {
|
||||
let s = self.resolve_context(vocabulary, &url).await?;
|
||||
|
||||
let doc = (*self.parser)(vocabulary, &url, &s).map_err(Error::Parse)?;
|
||||
|
||||
Ok(RemoteDocument::new(
|
||||
Some(url),
|
||||
Some("application/ld+json".parse().unwrap()),
|
||||
doc,
|
||||
))
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> Result<(), AnyError> {
|
||||
let cache = Cache::new();
|
||||
|
||||
for (iri, document) in [
|
||||
(
|
||||
iri!("https://masto.asonix.dog/actor"),
|
||||
MASTO_ASONIX_DOG_ACTOR,
|
||||
),
|
||||
(
|
||||
iri!("https://relay.asonix.dog/actor"),
|
||||
RELAY_ASONIX_DOG_ACTOR,
|
||||
),
|
||||
(
|
||||
iri!("https://masto.asonix.dog/users/asonix"),
|
||||
MASTO_ASONIX_DOG_ASONIX_ACTOR,
|
||||
),
|
||||
] {
|
||||
do_the_thing(cache.clone(), iri, document).await?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn do_the_thing(
|
||||
cache: Cache,
|
||||
iri: Iri<'static>,
|
||||
document: &'static str,
|
||||
) -> Result<(), AnyError> {
|
||||
let mut vocabulary: rdf_types::IndexVocabulary = rdf_types::IndexVocabulary::new();
|
||||
|
||||
let iri_index = vocabulary.insert(iri);
|
||||
|
||||
let input = RemoteDocument::new(
|
||||
Some(iri_index.clone()),
|
||||
Some("application/activity+json".parse().expect("Invalid mime")),
|
||||
Value::parse_str(document, |span| Location::new(iri_index, span))
|
||||
.expect("Unable to parse actor"),
|
||||
);
|
||||
|
||||
let mut loader = ReqwestLoader::default_with_cache(cache);
|
||||
|
||||
let expanded = input
|
||||
.expand_with(&mut vocabulary, &mut loader)
|
||||
.await
|
||||
.expect("Expansion failed");
|
||||
|
||||
let output_document = json_ld_normalization::normalize::<_, _, sha2::Sha256>(
|
||||
&mut vocabulary,
|
||||
iri_index,
|
||||
expanded,
|
||||
true,
|
||||
)
|
||||
.expect("Document is not time-complex");
|
||||
|
||||
for quad in output_document.quads {
|
||||
let (subject, predicate, object, graph) = quad.into_parts();
|
||||
|
||||
let subject = subject.with(&vocabulary);
|
||||
let predicate = predicate.with(&vocabulary);
|
||||
let object = object.with(&vocabulary);
|
||||
|
||||
if let Some(graph) = graph {
|
||||
let graph = graph.with(&vocabulary);
|
||||
println!("{subject} {predicate} {object} {graph}");
|
||||
} else {
|
||||
println!("{subject} {predicate} {object}");
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
const MASTO_ASONIX_DOG_ACTOR: &'static str = r#"
|
||||
{
|
||||
"@context": [
|
||||
"https://www.w3.org/ns/activitystreams",
|
||||
"https://w3id.org/security/v1",
|
||||
{
|
||||
"manuallyApprovesFollowers": "as:manuallyApprovesFollowers",
|
||||
"toot": "http://joinmastodon.org/ns#",
|
||||
"featured": {
|
||||
"@id": "toot:featured",
|
||||
"@type": "@id"
|
||||
},
|
||||
"featuredTags": {
|
||||
"@id": "toot:featuredTags",
|
||||
"@type": "@id"
|
||||
},
|
||||
"alsoKnownAs": {
|
||||
"@id": "as:alsoKnownAs",
|
||||
"@type": "@id"
|
||||
},
|
||||
"movedTo": {
|
||||
"@id": "as:movedTo",
|
||||
"@type": "@id"
|
||||
},
|
||||
"schema": "http://schema.org#",
|
||||
"PropertyValue": "schema:PropertyValue",
|
||||
"value": "schema:value",
|
||||
"discoverable": "toot:discoverable",
|
||||
"Device": "toot:Device",
|
||||
"Ed25519Signature": "toot:Ed25519Signature",
|
||||
"Ed25519Key": "toot:Ed25519Key",
|
||||
"Curve25519Key": "toot:Curve25519Key",
|
||||
"EncryptedMessage": "toot:EncryptedMessage",
|
||||
"publicKeyBase64": "toot:publicKeyBase64",
|
||||
"deviceId": "toot:deviceId",
|
||||
"claim": {
|
||||
"@type": "@id",
|
||||
"@id": "toot:claim"
|
||||
},
|
||||
"fingerprintKey": {
|
||||
"@type": "@id",
|
||||
"@id": "toot:fingerprintKey"
|
||||
},
|
||||
"identityKey": {
|
||||
"@type": "@id",
|
||||
"@id": "toot:identityKey"
|
||||
},
|
||||
"devices": {
|
||||
"@type": "@id",
|
||||
"@id": "toot:devices"
|
||||
},
|
||||
"messageFranking": "toot:messageFranking",
|
||||
"messageType": "toot:messageType",
|
||||
"cipherText": "toot:cipherText",
|
||||
"suspended": "toot:suspended"
|
||||
}
|
||||
],
|
||||
"id": "https://masto.asonix.dog/actor",
|
||||
"type": "Application",
|
||||
"inbox": "https://masto.asonix.dog/actor/inbox",
|
||||
"outbox": "https://masto.asonix.dog/actor/outbox",
|
||||
"preferredUsername": "masto.asonix.dog",
|
||||
"url": "https://masto.asonix.dog/about/more?instance_actor=true",
|
||||
"manuallyApprovesFollowers": true,
|
||||
"publicKey": {
|
||||
"id": "https://masto.asonix.dog/actor#main-key",
|
||||
"owner": "https://masto.asonix.dog/actor",
|
||||
"publicKeyPem": "-----BEGIN PUBLIC KEY-----\nMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAx8zXS0QNg9YGUBsxAOBH\nJaxIn7i6t+Z4UOpSFDVa2kP0NvQgIJsq3wzRqvaiuncRWpkyFk1fTakiRGD32xnY\nt+juuAaIBlU8eswKyANFqhcLAvFHmT3rA1848M4/YM19djvlL/PR9T53tPNHU+el\nS9MlsG3o6Zsj8YaUJtCI8RgEuJoROLHUb/V9a3oMQ7CfuIoSvF3VEz3/dRT09RW6\n0wQX7yhka9WlKuayWLWmTcB9lAIX6neBk+qKc8VSEsO7mHkzB8mRgVcS2uYZl1eA\nD8/jTT+SlpeFNDZk0Oh35GNFoOxh9qjRw3NGxu7jJCVBehDODzasOv4xDxKAhONa\njQIDAQAB\n-----END PUBLIC KEY-----\n"
|
||||
},
|
||||
"endpoints": {
|
||||
"sharedInbox": "https://masto.asonix.dog/inbox"
|
||||
}
|
||||
}
|
||||
"#;
|
||||
|
||||
const RELAY_ASONIX_DOG_ACTOR: &'static str = r#"
|
||||
{
|
||||
"publicKey": {
|
||||
"id": "https://relay.asonix.dog/actor#main-key",
|
||||
"owner": "https://relay.asonix.dog/actor",
|
||||
"publicKeyPem": "-----BEGIN PUBLIC KEY-----\nMIICIjANBgkqhkiG9w0BAQEFAAOCAg8AMIICCgKCAgEAw8UKc+men4z4Ex3Nx1be\ngti6DXAiRvCMp5pgOld7jjvSv2OqfdT71c1ZmAhNh6fMCSgusrGA+JGysExGODb4\nQw5NFa1jDxQkufMawEPQkvmB24vFi2C0PjVeSzu0oHZWfw4zlt+Lg1pOtlFOf41U\npFg282T+kDqIy968v8bH1wY3XzQ/jyb3jZD58jsXKtNUUzHs2K2IqJSuhuCJSkGd\nMkrCCXgLZ8Tw+dgYF+jLv1YNCPS2zBgM0v3IujNuszyzCobWUMfsGFfIvMORyvNP\nr5oxM1r3JV3CxMOOmskeYyh5DrtVSJ/ZVNabmj+Cy1WMadO8Rpd7e5j12pbY6ORu\nNu6mLOQ0qxX101bKYzVxVCP8KDN3j7RSkPB/K4gKPXZcHAcdRIjnlthDRicwZqW6\n1OsdP45tmfHxSIVNPmn1Wc4xWMsGU+EubmsxfXxkGNSaNpD1sjBf9vcf/c7q3mj8\nqcp74dOAmmO/O1AQnoQKCVqZaT26Db6YRn1P/lBHefdCetapmsyTkJ39Htxp3aAK\n+FHTxxX/hM6PlQwvteIvFHnaYBQgvp37L+zdXh87E2MQN4azj7LoUfmJG+7UVatn\nS61VCwLvKbDvlS8zg4RZo2hCBXG4rnyW8DpD2YdCMTKXvSySEzUF64Jomsxq7vqC\nNLWSay9Y9rhvQswxPe2XYEUCAwEAAQ==\n-----END PUBLIC KEY-----\n"
|
||||
},
|
||||
"inbox": "https://relay.asonix.dog/inbox",
|
||||
"outbox": "https://relay.asonix.dog/outbox",
|
||||
"following": "https://relay.asonix.dog/following",
|
||||
"followers": "https://relay.asonix.dog/followers",
|
||||
"preferredUsername": "relay",
|
||||
"endpoints": {
|
||||
"sharedInbox": "https://relay.asonix.dog/inbox"
|
||||
},
|
||||
"summary": "AodeRelay bot",
|
||||
"url": "https://relay.asonix.dog/actor",
|
||||
"@context": [
|
||||
"https://www.w3.org/ns/activitystreams",
|
||||
"https://w3id.org/security/v1"
|
||||
],
|
||||
"id": "https://relay.asonix.dog/actor",
|
||||
"type": "Application",
|
||||
"name": "AodeRelay"
|
||||
}
|
||||
"#;
|
||||
|
||||
const MASTO_ASONIX_DOG_ASONIX_ACTOR: &'static str = r#"
|
||||
{
|
||||
"@context": [
|
||||
"https://www.w3.org/ns/activitystreams",
|
||||
"https://w3id.org/security/v1",
|
||||
{
|
||||
"manuallyApprovesFollowers": "as:manuallyApprovesFollowers",
|
||||
"toot": "http://joinmastodon.org/ns#",
|
||||
"featured": {
|
||||
"@id": "toot:featured",
|
||||
"@type": "@id"
|
||||
},
|
||||
"featuredTags": {
|
||||
"@id": "toot:featuredTags",
|
||||
"@type": "@id"
|
||||
},
|
||||
"alsoKnownAs": {
|
||||
"@id": "as:alsoKnownAs",
|
||||
"@type": "@id"
|
||||
},
|
||||
"movedTo": {
|
||||
"@id": "as:movedTo",
|
||||
"@type": "@id"
|
||||
},
|
||||
"schema": "http://schema.org#",
|
||||
"PropertyValue": "schema:PropertyValue",
|
||||
"value": "schema:value",
|
||||
"discoverable": "toot:discoverable",
|
||||
"Device": "toot:Device",
|
||||
"Ed25519Signature": "toot:Ed25519Signature",
|
||||
"Ed25519Key": "toot:Ed25519Key",
|
||||
"Curve25519Key": "toot:Curve25519Key",
|
||||
"EncryptedMessage": "toot:EncryptedMessage",
|
||||
"publicKeyBase64": "toot:publicKeyBase64",
|
||||
"deviceId": "toot:deviceId",
|
||||
"claim": {
|
||||
"@type": "@id",
|
||||
"@id": "toot:claim"
|
||||
},
|
||||
"fingerprintKey": {
|
||||
"@type": "@id",
|
||||
"@id": "toot:fingerprintKey"
|
||||
},
|
||||
"identityKey": {
|
||||
"@type": "@id",
|
||||
"@id": "toot:identityKey"
|
||||
},
|
||||
"devices": {
|
||||
"@type": "@id",
|
||||
"@id": "toot:devices"
|
||||
},
|
||||
"messageFranking": "toot:messageFranking",
|
||||
"messageType": "toot:messageType",
|
||||
"cipherText": "toot:cipherText",
|
||||
"suspended": "toot:suspended",
|
||||
"focalPoint": {
|
||||
"@container": "@list",
|
||||
"@id": "toot:focalPoint"
|
||||
}
|
||||
}
|
||||
],
|
||||
"id": "https://masto.asonix.dog/users/asonix",
|
||||
"type": "Person",
|
||||
"following": "https://masto.asonix.dog/users/asonix/following",
|
||||
"followers": "https://masto.asonix.dog/users/asonix/followers",
|
||||
"inbox": "https://masto.asonix.dog/users/asonix/inbox",
|
||||
"outbox": "https://masto.asonix.dog/users/asonix/outbox",
|
||||
"featured": "https://masto.asonix.dog/users/asonix/collections/featured",
|
||||
"featuredTags": "https://masto.asonix.dog/users/asonix/collections/tags",
|
||||
"preferredUsername": "asonix",
|
||||
"name": "Liom on Mane -> ANE",
|
||||
"summary": "<p>26, local liom, friend, rust (lang) stan, bi </p><p>icon by <span class=\"h-card\"><a href=\"https://furaffinity.net/user/lalupine\" target=\"blank\" rel=\"noopener noreferrer\" class=\"u-url mention\">@<span>lalupine@furaffinity.net</span></a></span><br />header by <span class=\"h-card\"><a href=\"https://furaffinity.net/user/tronixx\" target=\"blank\" rel=\"noopener noreferrer\" class=\"u-url mention\">@<span>tronixx@furaffinity.net</span></a></span></p><p>Testimonials:</p><p>Stand: LIONS<br />Stand User: AODE<br />- Keris (not on here)</p>",
|
||||
"url": "https://masto.asonix.dog/@asonix",
|
||||
"manuallyApprovesFollowers": true,
|
||||
"discoverable": true,
|
||||
"published": "2021-02-09T00:00:00Z",
|
||||
"devices": "https://masto.asonix.dog/users/asonix/collections/devices",
|
||||
"publicKey": {
|
||||
"id": "https://masto.asonix.dog/users/asonix#main-key",
|
||||
"owner": "https://masto.asonix.dog/users/asonix",
|
||||
"publicKeyPem": "-----BEGIN PUBLIC KEY-----\nMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAm+YpyXb3bUp5EyryHqRA\npKSvl4RamJh6CLlngYYPFU8lcx92oQR8nFlqOwInAczGPoCoIfojQpZfqV4hFq1I\nlETy6jHHeoO/YkUsH2dYtz6gjEqiZFCFpoWuGxUQO3lwfmPYpxl2/GFEDR4MrUNp\n9fPn9jHUlKydiDkFQqluajqSJgv0BCwnUGBanTEfeQKahnc3OqPTi4xNbsd2cbAW\nZtJ6VYepphQCRHElvkzefe1ra5qm5i8YBdan3Z3oo5wN1vo3u41tqjVGhDptKZkv\nwBevdL0tedoLp5Lj1l/HLTSBP0D0ZT/HUFuo6Zq27PCq/4ZgJaZkMi7YCVVtpjim\nmQIDAQAB\n-----END PUBLIC KEY-----\n"
|
||||
},
|
||||
"tag": [],
|
||||
"attachment": [
|
||||
{
|
||||
"type": "PropertyValue",
|
||||
"name": "pronouns",
|
||||
"value": "he/they"
|
||||
},
|
||||
{
|
||||
"type": "PropertyValue",
|
||||
"name": "software",
|
||||
"value": "bad"
|
||||
},
|
||||
{
|
||||
"type": "PropertyValue",
|
||||
"name": "gitea",
|
||||
"value": "<a href=\"https://git.asonix.dog\" target=\"_blank\" rel=\"nofollow noopener noreferrer me\"><span class=\"invisible\">https://</span><span class=\"\">git.asonix.dog</span><span class=\"invisible\"></span></a>"
|
||||
},
|
||||
{
|
||||
"type": "PropertyValue",
|
||||
"name": "join my",
|
||||
"value": "relay"
|
||||
}
|
||||
],
|
||||
"endpoints": {
|
||||
"sharedInbox": "https://masto.asonix.dog/inbox"
|
||||
},
|
||||
"icon": {
|
||||
"type": "Image",
|
||||
"mediaType": "image/png",
|
||||
"url": "https://masto.asonix.dog/system/accounts/avatars/000/000/001/original/00852df0e6fee7e0.png"
|
||||
},
|
||||
"image": {
|
||||
"type": "Image",
|
||||
"mediaType": "image/png",
|
||||
"url": "https://masto.asonix.dog/system/accounts/headers/000/000/001/original/8122ce3e5a745385.png"
|
||||
}
|
||||
}
|
||||
"#;
|
|
@ -1,6 +1,10 @@
|
|||
use crate::{BlankNodeGenerator, Expanded};
|
||||
use crate::Expanded;
|
||||
use json_ld::{RdfQuads, ValidId as Subject};
|
||||
use rdf_types::{BlankIdVocabulary, IriVocabulary, Quad, Vocabulary, VocabularyMut};
|
||||
use locspan::Location;
|
||||
use rdf_types::{
|
||||
generator::{Blank, WithMetadata},
|
||||
BlankIdVocabulary, IriVocabulary, Quad, Vocabulary, VocabularyMut,
|
||||
};
|
||||
use std::hash::Hash;
|
||||
|
||||
pub(crate) type QuadSubject<N> =
|
||||
|
@ -14,23 +18,26 @@ pub(crate) type NormalizingQuad<N> =
|
|||
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub(crate) struct Position(usize);
|
||||
|
||||
pub struct InputDataset<N>
|
||||
pub(crate) struct InputDataset<N>
|
||||
where
|
||||
N: Vocabulary,
|
||||
{
|
||||
quads: Vec<NormalizingQuad<N>>,
|
||||
}
|
||||
|
||||
type BlankNodeGenerator<D> = WithMetadata<Blank, Location<D>>;
|
||||
|
||||
impl<N> InputDataset<N>
|
||||
where
|
||||
N: Vocabulary,
|
||||
{
|
||||
pub(crate) fn from_expanded(
|
||||
expanded: Expanded<N>,
|
||||
pub(crate) fn from_expanded<D>(
|
||||
expanded: Expanded<N, D>,
|
||||
vocabulary: &mut N,
|
||||
generator: &mut BlankNodeGenerator,
|
||||
generator: &mut BlankNodeGenerator<D>,
|
||||
) -> InputDataset<N>
|
||||
where
|
||||
D: Clone,
|
||||
N: VocabularyMut,
|
||||
N::Iri: Clone + Eq + Hash + Send + Sync,
|
||||
N::BlankId: Clone + Eq + Hash + Send + Sync,
|
||||
|
@ -63,3 +70,16 @@ where
|
|||
.map(|(index, quad)| (Position(index), quad))
|
||||
}
|
||||
}
|
||||
|
||||
impl<N> std::fmt::Debug for InputDataset<N>
|
||||
where
|
||||
N: Vocabulary,
|
||||
N::BlankId: std::fmt::Debug,
|
||||
N::Iri: std::fmt::Debug,
|
||||
{
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
f.debug_struct("InputDataset")
|
||||
.field("quads", &self.quads)
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
|
|
97
src/lib.rs
97
src/lib.rs
|
@ -1,15 +1,14 @@
|
|||
use contextual::WithContext;
|
||||
use indexmap::IndexMap;
|
||||
use iref::IriBuf;
|
||||
use itertools::Itertools;
|
||||
use json_ld::{rdf::Value, ExpandedDocument, ValidId as Subject};
|
||||
use locspan::{Location, Meta, Span};
|
||||
use rdf_types::{
|
||||
generator::{Blank, WithMetadata},
|
||||
BlankIdVocabulary, BlankIdVocabularyMut, IriVocabulary, Vocabulary, VocabularyMut,
|
||||
generator::Blank, BlankIdVocabulary, BlankIdVocabularyMut, IriVocabulary, Vocabulary,
|
||||
VocabularyMut,
|
||||
};
|
||||
use std::{
|
||||
borrow::{Borrow, Cow},
|
||||
borrow::Cow,
|
||||
collections::{BTreeMap, HashMap, HashSet},
|
||||
hash::Hash,
|
||||
};
|
||||
|
@ -18,6 +17,23 @@ mod input_dataset;
|
|||
|
||||
use input_dataset::{InputDataset, NormalizingQuad, Position, QuadSubject, QuadValue};
|
||||
|
||||
#[cfg(feature = "rustcrypto")]
|
||||
mod sha2_impls {
|
||||
use sha2::{Digest, Sha256};
|
||||
|
||||
impl super::Sha256 for Sha256 {
|
||||
fn update(&mut self, bytes: &[u8]) {
|
||||
Digest::update(self, bytes)
|
||||
}
|
||||
|
||||
fn finalize_hex_and_reset(&mut self) -> crate::HexHash {
|
||||
let output = self.finalize_reset();
|
||||
|
||||
crate::HexHash(hex::encode(&output))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct Security;
|
||||
|
||||
|
@ -30,17 +46,11 @@ pub trait Sha256 {
|
|||
fn finalize_hex_and_reset(&mut self) -> HexHash;
|
||||
}
|
||||
|
||||
type Expanded<N> = Meta<
|
||||
ExpandedDocument<
|
||||
<N as IriVocabulary>::Iri,
|
||||
<N as BlankIdVocabulary>::BlankId,
|
||||
Location<IriBuf>,
|
||||
>,
|
||||
Location<IriBuf>,
|
||||
type Expanded<N, D> = Meta<
|
||||
ExpandedDocument<<N as IriVocabulary>::Iri, <N as BlankIdVocabulary>::BlankId, Location<D>>,
|
||||
Location<D>,
|
||||
>;
|
||||
|
||||
type BlankNodeGenerator = WithMetadata<Blank, Location<IriBuf>>;
|
||||
|
||||
pub struct OutputDataset<N>
|
||||
where
|
||||
N: Vocabulary,
|
||||
|
@ -48,7 +58,7 @@ where
|
|||
pub quads: Vec<NormalizingQuad<N>>,
|
||||
}
|
||||
|
||||
pub struct CanonicalizationState<N, S>
|
||||
pub struct CanonicalizationState<'a, N, S>
|
||||
where
|
||||
N: Vocabulary,
|
||||
{
|
||||
|
@ -56,7 +66,7 @@ where
|
|||
|
||||
// Identifier Prefix and Identifier Counter
|
||||
blank_node_generator: Blank,
|
||||
vocabulary: N,
|
||||
vocabulary: &'a mut N,
|
||||
|
||||
// Issued Identifier List
|
||||
issued_identifier_list: IndexMap<N::BlankId, N::BlankId>,
|
||||
|
@ -65,17 +75,18 @@ where
|
|||
hash_to_blank_nodes: BTreeMap<HexHash, HashSet<N::BlankId>>,
|
||||
}
|
||||
|
||||
pub fn normalize<N, S>(
|
||||
vocabulary: N,
|
||||
document_id: IriBuf,
|
||||
expanded: Expanded<N>,
|
||||
pub fn normalize<N, D, S>(
|
||||
vocabulary: &mut N,
|
||||
document_id: D,
|
||||
expanded: Expanded<N, D>,
|
||||
bail_on_large_inputs: bool,
|
||||
) -> Result<OutputDataset<N>, Security>
|
||||
where
|
||||
D: Clone,
|
||||
S: Sha256 + Default,
|
||||
N: Vocabulary + VocabularyMut + Default,
|
||||
N::Iri: Clone + Eq + Hash + Send + Sync,
|
||||
N::BlankId: Clone + Eq + Hash + Send + Sync + for<'a> Borrow<&'a N::BlankId>,
|
||||
N::Iri: Clone + Eq + Hash + Send + Sync + std::fmt::Debug,
|
||||
N::BlankId: Clone + Eq + Hash + Send + Sync + std::fmt::Debug,
|
||||
{
|
||||
CanonicalizationState::<N, S>::new(vocabulary).normalize(
|
||||
document_id,
|
||||
|
@ -84,12 +95,12 @@ where
|
|||
)
|
||||
}
|
||||
|
||||
impl<N, S> CanonicalizationState<N, S>
|
||||
impl<'a, N, S> CanonicalizationState<'a, N, S>
|
||||
where
|
||||
N: Vocabulary,
|
||||
{
|
||||
/// Step 1
|
||||
fn new(vocabulary: N) -> Self
|
||||
fn new(vocabulary: &'a mut N) -> Self
|
||||
where
|
||||
S: Default,
|
||||
{
|
||||
|
@ -103,20 +114,23 @@ where
|
|||
}
|
||||
}
|
||||
|
||||
fn normalize(
|
||||
fn normalize<D>(
|
||||
mut self,
|
||||
document_id: IriBuf,
|
||||
expanded: Expanded<N>,
|
||||
document_id: D,
|
||||
expanded: Expanded<N, D>,
|
||||
bail_on_large_inputs: bool,
|
||||
) -> Result<OutputDataset<N>, Security>
|
||||
where
|
||||
D: Clone,
|
||||
S: Sha256,
|
||||
N: VocabularyMut + Default,
|
||||
N::Iri: Clone + Eq + Hash + Send + Sync,
|
||||
N::BlankId: Clone + Eq + Hash + Send + Sync + for<'a> Borrow<&'a N::BlankId>,
|
||||
N::Iri: Clone + Eq + Hash + Send + Sync + std::fmt::Debug,
|
||||
N::BlankId: Clone + Eq + Hash + Send + Sync + std::fmt::Debug,
|
||||
{
|
||||
let input_dataset = self.input_dataset(document_id, expanded);
|
||||
|
||||
println!("{:?}", input_dataset);
|
||||
|
||||
// Step 2
|
||||
self.find_blank_nodes(&input_dataset);
|
||||
|
||||
|
@ -131,8 +145,9 @@ where
|
|||
}
|
||||
|
||||
// (preparing input dataset is not a step, but we're coming from json ld types here)
|
||||
fn input_dataset(&mut self, document_id: IriBuf, expanded: Expanded<N>) -> InputDataset<N>
|
||||
fn input_dataset<D>(&mut self, document_id: D, expanded: Expanded<N, D>) -> InputDataset<N>
|
||||
where
|
||||
D: Clone,
|
||||
N: VocabularyMut,
|
||||
N::Iri: Clone + Eq + Hash + Send + Sync,
|
||||
N::BlankId: Clone + Eq + Hash + Send + Sync,
|
||||
|
@ -193,7 +208,7 @@ where
|
|||
// step 5.3.1
|
||||
let hash = hash_first_degree_quads(
|
||||
&self.blank_node_to_quads,
|
||||
&self.vocabulary,
|
||||
self.vocabulary,
|
||||
(*identifier).clone(),
|
||||
input_dataset,
|
||||
&mut self.sha256,
|
||||
|
@ -223,7 +238,7 @@ where
|
|||
issue_identifier_algorithm(
|
||||
identifier,
|
||||
&mut self.blank_node_generator,
|
||||
&mut self.vocabulary,
|
||||
self.vocabulary,
|
||||
&mut self.issued_identifier_list,
|
||||
);
|
||||
}
|
||||
|
@ -245,7 +260,7 @@ where
|
|||
) -> Result<(), Security>
|
||||
where
|
||||
N: Default + BlankIdVocabularyMut + VocabularyMut,
|
||||
N::BlankId: Clone + Eq + Hash + for<'a> Borrow<&'a N::BlankId>,
|
||||
N::BlankId: Clone + Eq + Hash,
|
||||
S: Sha256,
|
||||
{
|
||||
let hash_to_blank_nodes =
|
||||
|
@ -303,7 +318,7 @@ where
|
|||
issue_identifier_algorithm(
|
||||
existing_identifier,
|
||||
&mut self.blank_node_generator,
|
||||
&mut self.vocabulary,
|
||||
self.vocabulary,
|
||||
&mut self.issued_identifier_list,
|
||||
);
|
||||
}
|
||||
|
@ -316,7 +331,7 @@ where
|
|||
// Step 7
|
||||
fn normalize_quads(&self, input_dataset: &InputDataset<N>) -> OutputDataset<N>
|
||||
where
|
||||
N::BlankId: Eq + Hash + Clone + for<'a> Borrow<&'a N::BlankId>,
|
||||
N::BlankId: Eq + Hash + Clone,
|
||||
N::Iri: Clone,
|
||||
{
|
||||
let quads = input_dataset
|
||||
|
@ -344,7 +359,7 @@ where
|
|||
|
||||
fn translate_object(&self, object: &QuadValue<N>) -> Option<QuadValue<N>>
|
||||
where
|
||||
N::BlankId: Eq + Hash + Clone + for<'a> Borrow<&'a N::BlankId>,
|
||||
N::BlankId: Eq + Hash + Clone,
|
||||
N::Iri: Clone,
|
||||
{
|
||||
match object {
|
||||
|
@ -355,13 +370,13 @@ where
|
|||
|
||||
fn translate_subject(&self, subject: &QuadSubject<N>) -> Option<QuadSubject<N>>
|
||||
where
|
||||
N::BlankId: Eq + Hash + Clone + for<'a> Borrow<&'a N::BlankId>,
|
||||
N::BlankId: Eq + Hash + Clone,
|
||||
N::Iri: Clone,
|
||||
{
|
||||
match subject {
|
||||
Subject::Iri(iri) => Some(Subject::Iri(iri.clone())),
|
||||
Subject::Blank(blank) => Some(Subject::Blank(
|
||||
self.issued_identifier_list.get(&blank)?.clone(),
|
||||
self.issued_identifier_list.get(blank)?.clone(),
|
||||
)),
|
||||
}
|
||||
}
|
||||
|
@ -380,14 +395,14 @@ fn hash_n_degree_quads<N, S>(
|
|||
) -> HexHash
|
||||
where
|
||||
N: Vocabulary + VocabularyMut,
|
||||
N::BlankId: Clone + Eq + Hash + for<'a> Borrow<&'a N::BlankId>,
|
||||
N::BlankId: Clone + Eq + Hash,
|
||||
S: Sha256,
|
||||
{
|
||||
// step 1
|
||||
let mut hash_to_related_blank_nodes: HashMap<HexHash, HashSet<N::BlankId>> = HashMap::new();
|
||||
|
||||
// step 2
|
||||
if let Some(quad_positions) = blank_node_to_quads.get(&&identifier) {
|
||||
if let Some(quad_positions) = blank_node_to_quads.get(&identifier) {
|
||||
// step 3
|
||||
for quad_position in quad_positions {
|
||||
let quad = input_dataset
|
||||
|
@ -426,6 +441,8 @@ where
|
|||
.insert(related.clone());
|
||||
}
|
||||
}
|
||||
} else {
|
||||
eprintln!("No quad positions");
|
||||
}
|
||||
|
||||
// step 4
|
||||
|
@ -566,7 +583,7 @@ fn hash_related_blank_node<N, S>(
|
|||
) -> HexHash
|
||||
where
|
||||
N: Vocabulary,
|
||||
N::BlankId: Clone + Eq + Hash + for<'a> Borrow<&'a N::BlankId>,
|
||||
N::BlankId: Clone + Eq + Hash,
|
||||
S: Sha256,
|
||||
{
|
||||
// step 1
|
||||
|
|
Loading…
Reference in a new issue