From e8f119847a98b9ef67c57b467eb8cce656dfa2b4 Mon Sep 17 00:00:00 2001 From: asonix Date: Wed, 14 Dec 2022 23:35:53 -0600 Subject: [PATCH] Serialization works (step 6 untested, step 8.1 untested) --- normalization/Cargo.toml | 2 +- .../examples/{masto.rs => normalize_ap.rs} | 0 serialization/Cargo.toml | 8 + serialization/examples/serialize_ap.rs | 199 +++ serialization/src/lib.rs | 1074 +++++++++++++---- 5 files changed, 1028 insertions(+), 255 deletions(-) rename normalization/examples/{masto.rs => normalize_ap.rs} (100%) create mode 100644 serialization/examples/serialize_ap.rs diff --git a/normalization/Cargo.toml b/normalization/Cargo.toml index 3cd5f9d..e66f61e 100644 --- a/normalization/Cargo.toml +++ b/normalization/Cargo.toml @@ -4,7 +4,7 @@ version = "0.1.0" edition = "2021" [[example]] -name = "masto" +name = "normalize_ap" required-features = ["rustcrypto-sha2"] # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html diff --git a/normalization/examples/masto.rs b/normalization/examples/normalize_ap.rs similarity index 100% rename from normalization/examples/masto.rs rename to normalization/examples/normalize_ap.rs diff --git a/serialization/Cargo.toml b/serialization/Cargo.toml index 3751417..a0957a0 100644 --- a/serialization/Cargo.toml +++ b/serialization/Cargo.toml @@ -7,6 +7,7 @@ edition = "2021" [dependencies] contextual = "0.1.3" +indexmap = "1.9.2" iref = "2.2.0" json-ld = "0.9.1" json-ld-syntax = "0.9.1" @@ -16,3 +17,10 @@ locspan = "0.7.9" rdf-types = "0.12.4" smallvec = "1.10.0" static-iref = "2.0.0" + +[dev-dependencies] +json-ld = { version = "0.9.1", features = ["reqwest"] } +json-ld-normalization = { path = "../normalization" } +reqwest = "0.11.13" +sha2 = "0.10" +tokio = { version = "1", features = ["full"] } diff --git a/serialization/examples/serialize_ap.rs b/serialization/examples/serialize_ap.rs new file mode 100644 index 0000000..545c75c --- /dev/null +++ b/serialization/examples/serialize_ap.rs @@ -0,0 +1,199 @@ +use iref::Iri; +use json_ld::{ + syntax::Parse, Compact, ExpandedDocument, JsonLdProcessor, Print, Process, RemoteDocument, + RemoteDocumentReference, ReqwestLoader, TryFromJson, +}; +use json_ld_syntax::TryFromJson as _; +use locspan::{Location, Span}; +use rdf_types::{generator::Blank, IriVocabularyMut}; +use reqwest::Client; +use static_iref::iri; + +const SIMPLE_CONTEXT: &str = r#"[ + "https://www.w3.org/ns/activitystreams", + "https://w3id.org/security/v1" +]"#; + +const MASTODON_CONTEXT: &str = r#"[ + "https://www.w3.org/ns/activitystreams", + "https://w3id.org/security/v1", + { + "manuallyApprovesFollowers":"as:manuallyApprovesFollowers", + "toot":"http://joinmastodon.org/ns#", + "featured":{ + "@id":"toot:featured", + "@type":"@id" + }, + "featuredTags":{ + "@id":"toot:featuredTags", + "@type":"@id" + }, + "alsoKnownAs":{ + "@id":"as:alsoKnownAs", + "@type":"@id" + }, + "movedTo":{ + "@id":"as:movedTo", + "@type":"@id" + }, + "schema":"http://schema.org#", + "PropertyValue":"schema:PropertyValue", + "value":"schema:value", + "discoverable":"toot:discoverable", + "Device":"toot:Device", + "Ed25519Signature":"toot:Ed25519Signature", + "Ed25519Key":"toot:Ed25519Key", + "Curve25519Key":"toot:Curve25519Key", + "EncryptedMessage":"toot:EncryptedMessage", + "publicKeyBase64":"toot:publicKeyBase64", + "deviceId":"toot:deviceId", + "claim":{ + "@type":"@id", + "@id":"toot:claim" + }, + "fingerprintKey":{ + "@type":"@id", + "@id":"toot:fingerprintKey" + }, + "identityKey":{ + "@type":"@id", + "@id":"toot:identityKey" + }, + "devices":{ + "@type":"@id", + "@id":"toot:devices" + }, + "messageFranking":"toot:messageFranking", + "messageType":"toot:messageType", + "cipherText":"toot:cipherText", + "suspended":"toot:suspended", + "Hashtag": "as:Hashtag", + "focalPoint":{ + "@container":"@list", + "@id":"toot:focalPoint" + } + } +]"#; + +type AnyError = Box; + +#[tokio::main] +async fn main() -> Result<(), AnyError> { + let client = Client::builder() + .user_agent("json-ld-playground") + .build() + .expect("Successful client"); + + let iris = [ + iri!("https://relay.asonix.dog/actor"), + iri!("https://masto.asonix.dog/actor"), + iri!("https://masto.asonix.dog/users/asonix"), + iri!("https://masto.asonix.dog/users/kumu"), + iri!("https://yiff.life/users/6my"), + iri!("https://meow.social/users/6my"), + ]; + + for iri in iris { + let document = client + .get(iri.as_str()) + .header("accept", "application/activity+json") + .send() + .await? + .text() + .await?; + + normalize_document(iri, &document).await?; + } + + Ok(()) +} + +async fn normalize_document(iri: Iri<'static>, document: &str) -> Result<(), AnyError> { + let mut vocabulary: rdf_types::IndexVocabulary = rdf_types::IndexVocabulary::new(); + + let iri_index = vocabulary.insert(iri); + + let input = RemoteDocument::new( + Some(iri_index.clone()), + Some("application/activity+json".parse()?), + json_ld_syntax::Value::parse_str(document, |span| Location::new(iri_index.clone(), span)) + .expect("Failed to parse"), + ); + + let mut loader = ReqwestLoader::default(); + + let mut expanded = input + .expand_with(&mut vocabulary, &mut loader) + .await + .expect("Failed to expand"); + + let mut pre_gen = Blank::new().with_metadata(Location::new(iri_index.clone(), Span::default())); + + expanded.identify_all_with(&mut vocabulary, &mut pre_gen); + + let output_document = json_ld_normalization::normalize::<_, _, _, sha2::Sha256>( + &mut vocabulary, + iri_index, + expanded.0, + true, + )?; + + let serialized = json_ld_serialization::rdf_to_json_ld( + output_document.into_quads(), + Location::new(iri_index.clone(), Span::default()), + true, + None, + true, + false, + &vocabulary, + ) + .expect("Failed to normalize"); + + println!("{}", serialized.pretty_print()); + + let expanded = ExpandedDocument::try_from_json_in(&mut vocabulary, serialized) + .expect("Invalid expanded json"); + + for context in [MASTODON_CONTEXT, SIMPLE_CONTEXT] { + let context = RemoteDocumentReference::Loaded(RemoteDocument::new( + Some(iri_index.clone()), + Some("application/ld+json".parse()?), + json_ld_syntax::context::Value::try_from_json( + json_ld_syntax::Value::parse_str(context, |span| { + Location::new(iri_index.clone(), span) + }) + .expect("Failed to parse"), + ) + .expect("Failed to parse context"), + )) + .load_context_with(&mut vocabulary, &mut loader) + .await + .expect("Context is loaded") + .into_document(); + + let processed = context + .process(&mut vocabulary, &mut loader, None) + .await + .expect("Failed to process context"); + + let compacted = expanded + .compact_full( + &mut vocabulary, + processed.as_ref(), + &mut loader, + json_ld::compaction::Options { + processing_mode: json_ld::ProcessingMode::JsonLd1_1, + compact_to_relative: true, + compact_arrays: true, + ordered: true, + }, + ) + .await + .map_err(|_| ()) + .expect("Failed to compact"); + + println!("output: {}", compacted.pretty_print()); + } + + Ok(()) +} diff --git a/serialization/src/lib.rs b/serialization/src/lib.rs index c2c41ea..a6531a1 100644 --- a/serialization/src/lib.rs +++ b/serialization/src/lib.rs @@ -1,28 +1,32 @@ -use std::{ - collections::{HashMap, HashSet}, - hash::Hash, -}; - +use indexmap::IndexMap; use iref::Iri; use json_ld::{ - object::{Literal, LiteralString}, - rdf::RdfDirection, - Direction, Id, LangString, LenientLanguageTagBuf, Node, Object, ValidId as Subject, Value, + rdf::{ + RdfDirection, RDF_DIRECTION, RDF_FIRST, RDF_JSON, RDF_NIL, RDF_REST, RDF_TYPE, RDF_VALUE, + XSD_BOOLEAN, XSD_DOUBLE, XSD_INTEGER, XSD_STRING, + }, + ValidId as Subject, }; use json_ld_syntax::Parse; use locspan::Meta; use rdf_types::{BlankId, BlankIdVocabulary, IriVocabulary, Quad, Triple, Vocabulary}; use smallvec::SmallVec; use static_iref::iri; +use std::{ + cell::{Ref, RefCell, RefMut}, + collections::HashMap, + hash::Hash, + rc::Rc, +}; + +const RDF_LANGUAGE: Iri<'static> = iri!("http://www.w3.org/1999/02/22-rdf-syntax-ns#language"); +const RDF_LIST: Iri<'static> = iri!("http://www.w3.org/1999/02/22-rdf-syntax-ns#list"); type QuadSubject = Subject<::Iri, ::BlankId>; type QuadValue = json_ld::rdf::Value<::Iri, ::BlankId>; type NormalizingQuad = Quad, QuadSubject, QuadValue, QuadSubject>; -type SerializedObject = - Object<::Iri, ::BlankId, M>; - type SerializingTriple = Triple, QuadSubject, QuadValue>; struct InputDataset @@ -70,28 +74,14 @@ where ) -> impl Iterator>, &'a [SerializingTriple])> { self.graphs.iter().map(|(k, v)| (k.as_ref(), v.as_slice())) } - - fn graph_names<'a>( - &'a self, - vocabulary: &'a N, - ) -> impl Iterator, &'a BlankId>> - where - N::BlankId: Eq + Hash, - N::Iri: Eq + Hash, - { - self.graphs.keys().filter_map(|subject| match subject { - Some(Subject::Iri(iri)) => Some(Subject::Iri(vocabulary.iri(iri)?)), - Some(Subject::Blank(blank)) => Some(Subject::Blank(vocabulary.blank_id(blank)?)), - None => None, - }) - } } fn expect_iri(id: &N::Iri, iri: Iri<'_>, vocabulary: &N) -> bool where N: Vocabulary, + N::Iri: Eq, { - get_iri(id, vocabulary) == iri + vocabulary.get(iri).map(|iri| iri == *id).unwrap_or(false) || get_iri(id, vocabulary) == iri } fn get_iri<'a, N>(id: &'a N::Iri, vocabulary: &'a N) -> Iri<'a> @@ -101,22 +91,354 @@ where vocabulary.iri(id).expect("Id in vocabulary") } -fn get_subject<'a, N>(id: &'a QuadSubject, vocabulary: &'a N) -> &'a str +fn get_subject<'a, N>(id: &'a QuadSubject, vocabulary: &'a N) -> Subject, &'a BlankId> where N: Vocabulary, { match id { - Subject::Iri(iri) => vocabulary.iri(iri).expect("Id in vocabulary").as_str(), - Subject::Blank(blank) => vocabulary - .blank_id(blank) - .expect("Id in vocabulary") - .as_str(), + Subject::Iri(iri) => Subject::Iri(vocabulary.iri(iri).expect("Id in vocabulary")), + Subject::Blank(blank) => { + Subject::Blank(vocabulary.blank_id(blank).expect("Id in vocabulary")) + } + } +} + +fn subject_str<'a>(subject: &'a Subject, &'a BlankId>) -> &'a str { + match subject { + Subject::Iri(ref iri) => iri.as_str(), + Subject::Blank(ref blank) => blank.as_str(), + } +} + +fn subject_matches(subject: &Subject, iri: Iri<'_>, vocabulary: &N) -> bool +where + N: Vocabulary, + N::Iri: Eq, +{ + match subject { + Subject::Iri(id) => expect_iri(id, iri, vocabulary), + Subject::Blank(_) => false, + } +} + +fn subject_string(id: &Subject, vocabulary: &N) -> String +where + N: Vocabulary, +{ + let subject = get_subject(id, vocabulary); + let s = subject_str(&subject); + s.to_string() +} + +fn subject_json(id: &Subject, vocabulary: &N) -> JsonValue +where + N: Vocabulary, +{ + let subject = get_subject(id, vocabulary); + let s = subject_str(&subject); + s.into() +} + +#[derive(Clone, Debug, PartialEq, Eq)] +struct JsonValue { + value: JsonValueKind, +} + +impl JsonValue { + fn is_blank_node(&self) -> bool { + self.get_string() + .map(|s| s.starts_with("_:")) + .unwrap_or(false) + } + + fn matches_string(&self, s: &str) -> bool { + self.get_string() + .map(|string| &*string == s) + .unwrap_or(false) + } + + fn get_string(&self) -> Option<&str> { + match self.value { + JsonValueKind::String(ref s) => Some(s.as_str()), + _ => None, + } + } + + fn get_array(&self) -> Option> { + match self.value { + JsonValueKind::Array(ref array) => Some(array.borrow()), + _ => None, + } + } + + fn get_array_mut(&self) -> Option> { + match self.value { + JsonValueKind::Array(ref array) => Some(array.borrow_mut()), + _ => None, + } + } + + fn get_map(&self) -> Option> { + match self.value { + JsonValueKind::Map(ref map) => Some(map.borrow()), + _ => None, + } + } + + fn get_map_mut(&self) -> Option> { + match self.value { + JsonValueKind::Map(ref map) => Some(map.borrow_mut()), + _ => None, + } + } + + fn to_map(&self) -> Option>> { + match self.value { + JsonValueKind::Map(ref map) => Some(map.clone()), + _ => None, + } + } +} + +type Map = IndexMap; +type Array = Vec; + +#[derive(Clone, Debug, PartialEq, Eq)] +enum JsonValueKind { + Map(Rc>), + Array(Rc>), + String(json_syntax::String), + Number(json_syntax::NumberBuf), + Boolean(bool), + Null, +} + +impl From for JsonValue { + fn from(value: Map) -> Self { + JsonValue { + value: JsonValueKind::Map(Rc::new(RefCell::new(value))), + } + } +} + +impl From>> for JsonValue { + fn from(value: Rc>) -> Self { + JsonValue { + value: JsonValueKind::Map(value), + } + } +} + +impl From for JsonValue { + fn from(value: Array) -> Self { + JsonValue { + value: JsonValueKind::Array(Rc::new(RefCell::new(value))), + } + } +} + +impl From>> for JsonValue { + fn from(value: Rc>) -> Self { + JsonValue { + value: JsonValueKind::Array(value), + } + } +} + +impl<'a> From<&'a str> for JsonValue { + fn from(value: &'a str) -> Self { + JsonValue { + value: JsonValueKind::String(value.into()), + } + } +} + +impl From for JsonValue { + fn from(value: json_syntax::String) -> Self { + JsonValue { + value: JsonValueKind::String(value), + } + } +} + +impl<'a> From for JsonValue { + fn from(value: json_syntax::NumberBuf) -> Self { + JsonValue { + value: JsonValueKind::Number(value), + } + } +} + +impl From for JsonValue { + fn from(value: bool) -> Self { + JsonValue { + value: JsonValueKind::Boolean(value), + } + } +} + +impl From<()> for JsonValue { + fn from(_: ()) -> Self { + JsonValue { + value: JsonValueKind::Null, + } + } +} + +impl From, M>> for JsonValue { + fn from(Meta(value, _): Meta, M>) -> Self { + match value { + json_syntax::Value::Null => ().into(), + json_syntax::Value::Boolean(boolean) => boolean.into(), + json_syntax::Value::Number(num) => num.into(), + json_ld_syntax::Value::String(string) => string.into(), + json_ld_syntax::Value::Array(array) => array.into(), + json_ld_syntax::Value::Object(object) => object.into(), + } + } +} + +struct WithMeta(JsonValue, M); + +impl Into, M>> for WithMeta +where + M: Clone, +{ + fn into(self) -> Meta, M> { + let WithMeta(value, meta) = self; + + let value = match value.value { + JsonValueKind::Null => json_syntax::Value::Null, + JsonValueKind::Boolean(boolean) => json_syntax::Value::Boolean(boolean), + JsonValueKind::Number(number) => json_syntax::Value::Number(number), + JsonValueKind::String(string) => json_syntax::Value::String(string), + JsonValueKind::Array(array) => json_syntax::Value::Array( + array + .borrow() + .iter() + .map(|value| WithMeta(value.clone(), meta.clone()).into()) + .collect(), + ), + JsonValueKind::Map(map) => json_syntax::Value::Object(map.borrow().iter().fold( + json_syntax::Object::new(), + |mut object, (k, v)| { + object.insert( + Meta(k.as_str().into(), meta.clone()), + WithMeta(v.clone(), meta.clone()).into(), + ); + object + }, + )), + }; + + Meta(value, meta) + } +} + +impl From> for JsonValue { + fn from(values: json_syntax::Array) -> Self { + let array = values + .into_iter() + .map(|value| value.into()) + .collect::>(); + array.into() + } +} + +impl From> for JsonValue { + fn from(value: json_syntax::Object) -> Self { + let mut map = Map::new(); + + for json_syntax::object::Entry { key, value } in value.into_iter() { + if let Some(entry) = map.remove(key.0.as_str()) { + let entry_clone = entry.clone(); + + let entry = if let Some(mut array) = entry.get_array_mut() { + array.push(value.into()); + drop(array); + entry_clone + } else { + let mut array = Array::new(); + array.push(entry_clone); + array.push(value.into()); + array.into() + }; + + map.insert(key.0.to_string(), entry); + } else { + map.insert(key.0.to_string(), value.into()); + } + } + + map.into() } } #[derive(Debug)] pub struct InvalidJson; +#[derive(Debug, Default)] +struct ReferencedOnce { + value: HashMap, +} + +impl ReferencedOnce { + fn new() -> Self { + Self::default() + } + + fn get_mut<'a>(&'a mut self, object: &str) -> Option<&'a mut ReferencedEntry> { + self.value.get_mut(object) + } + + fn get_value(&self, object: &str) -> Option<&ReferencedValue> { + self.value.get(object).and_then(|entry| match entry { + ReferencedEntry::False => None, + ReferencedEntry::Value(value) => Some(value), + }) + } + + fn contains_key(&self, object: &str) -> bool { + self.get_value(object).is_some() + } + + fn insert( + &mut self, + object: String, + node: JsonValue, + property: String, + value: Rc>, + ) { + self.value.insert( + object, + ReferencedEntry::Value(ReferencedValue { + node, + property, + value, + }), + ); + } +} + +#[derive(Debug)] +enum ReferencedEntry { + False, + Value(ReferencedValue), +} + +#[derive(Debug, Clone)] +struct ReferencedValue { + node: JsonValue, + property: String, + value: Rc>, +} + +impl ReferencedEntry { + fn set_false(&mut self) { + *self = Self::False; + } +} + pub fn rdf_to_json_ld( rdf_dataset: Vec>, meta: M, @@ -125,7 +447,7 @@ pub fn rdf_to_json_ld( use_native_types: bool, use_rdf_type: bool, vocabulary: &N, -) -> Result, InvalidJson> +) -> Result, M>, InvalidJson> where N: Vocabulary, N::Iri: Hash + Eq + Clone, @@ -135,43 +457,47 @@ where let input_dataset = InputDataset::::new(rdf_dataset); // step 1 - let mut default_graph: HashMap> = HashMap::new(); + let default_graph = Rc::new(RefCell::new(Map::new())); // step 2 - let mut graph_map = HashMap::new(); - graph_map.insert(String::from("@default"), HashMap::new()); + let mut graph_map = Map::new(); + graph_map.insert("@default".into(), default_graph.clone().into()); // step 3 - let mut referenced_once = HashMap::new(); + let mut referenced_once = ReferencedOnce::new(); // step 4 - let mut compound_literal_subjects = HashMap::new(); + let mut compound_literal_subjects = Map::new(); // step 5 - let graphs = input_dataset.graphs(); - - for (graph, triples) in graphs { + for (graph, triples) in input_dataset.graphs() { // step 5.1 let name = if let Some(graph) = graph { - get_subject(graph, vocabulary) + subject_string(&graph, vocabulary) } else { - "@default" + "@default".to_string() }; // step 5.2 - let node_map = graph_map.entry(String::from(name)).or_default(); + let node_map: JsonValue = graph_map + .entry(name.clone()) + .or_insert_with(|| Map::new().into()) + .clone(); // step 5.3 - let compound_map: &mut HashMap<_, _> = compound_literal_subjects - .entry(String::from(name)) - .or_default(); + let mut compound_map = compound_literal_subjects + .entry(name.clone()) + .or_insert_with(|| Map::new().into()) + .get_map_mut() + .expect("Just set map"); // step 5.4 - if graph.is_some() { - default_graph - .entry(String::from(name)) - .or_default() - .insert(String::from("@id"), String::from(name)); + if name != "@default" && !default_graph.borrow().contains_key(&name) { + default_graph.borrow_mut().insert(name.clone(), { + let mut map = Map::new(); + map.insert("@id".into(), name.as_str().into()); + map.into() + }); } // step 5.5 no-op: get node_map @@ -180,226 +506,507 @@ where // step 5.7 for triple in triples { - let subject = get_subject(triple.subject(), vocabulary); - let predicate = get_subject(triple.predicate(), vocabulary); - // step 5.7.1 - let node: &mut json_ld_syntax::Value = node_map - .entry(String::from(subject)) - .or_insert_with(|| json_ld_syntax::Value::Object(json_ld_syntax::Object::new())); - - if let Some(map) = node.as_object_mut() { - map.push( - Meta(From::from("@id"), meta.clone()), - Meta(json_ld_syntax::Value::String(subject.into()), meta.clone()), - ); - } + let node = node_map + .get_map_mut() + .expect("node_map is map") + .entry(subject_string(triple.subject(), vocabulary)) + .or_insert_with(|| { + let mut map = Map::new(); + map.insert("@id".into(), subject_json(triple.subject(), vocabulary)); + map.into() + }) + .to_map() + .expect("Just set map"); // step 5.7.2 no-op: get node // step 5.7.3 - if rdf_direction == Some(RdfDirection::CompoundLiteral) && predicate == "rdf:direction" + if rdf_direction == Some(RdfDirection::CompoundLiteral) + && subject_matches(triple.predicate(), RDF_DIRECTION, vocabulary) { - compound_map.insert(String::from(subject), true); + compound_map.insert(subject_string(triple.subject(), vocabulary), true.into()); } // step 5.7.4 if let json_ld::rdf::Value::Reference(object) = triple.object() { - let object = get_subject(object, vocabulary); - - if let Some(map) = node_map - .entry(String::from(object)) - .or_insert_with(|| json_ld_syntax::Value::Object(json_ld_syntax::Object::new())) - .as_object_mut() - { - map.push( - Meta(From::from("@d"), meta.clone()), - Meta(json_ld_syntax::Value::String(object.into()), meta.clone()), - ); - } + node_map + .get_map_mut() + .expect("node_map is map") + .entry(subject_string(object, vocabulary)) + .or_insert_with(|| { + let mut map = Map::new(); + map.insert("@id".into(), subject_json(object, vocabulary)); + map.into() + }); } // step 5.7.5 - if predicate == "rdf:type" && !use_rdf_type { + if subject_matches(&triple.predicate(), RDF_TYPE, vocabulary) && !use_rdf_type { if let json_ld::rdf::Value::Reference(object) = triple.object() { - let object = get_subject(object, vocabulary); + let array = node + .borrow_mut() + .entry("@type".into()) + .or_insert_with(|| Array::new().into()) + .clone(); + let mut array = array.get_array_mut().expect("Just set array"); - if let Some(map) = node.as_object_mut() { - if map.get("@value").count() == 0 { - map.push( - Meta("@value".into(), meta.clone()), - Meta( - json_ld_syntax::Value::Array(vec![Meta( - json_ld_syntax::Value::String(object.into()), - meta.clone(), - )]), - meta.clone(), - ), - ); - } else { - for obj in map.get_mut("@value") { - if let Some(arr) = obj.as_array_mut() { - arr.push(Meta( - json_ld_syntax::Value::String(object.into()), - meta.clone(), - )); - break; - } - } - } + let object = subject_json(object, vocabulary); + + if array.iter().find(|item| **item == object).is_none() { + array.push(object); } + + continue; } } // step 5.7.6 - let value = do_thing::(); + let value = rdf_to_object( + triple.object().clone(), + meta.clone(), + rdf_direction, + use_native_types, + vocabulary, + )?; // step 5.7.7 - if let Some(map) = node.as_object_mut() { - if map.get(predicate).count() == 0 { - map.push( - Meta(predicate.into(), meta.clone()), - Meta(json_ld_syntax::Value::Array(Vec::new()), meta.clone()), - ); - } - } + node.borrow_mut() + .entry(subject_string(triple.predicate(), vocabulary)) + .or_insert_with(|| Array::new().into()); // step 5.7.8 - if let Some(map) = node.as_object_mut() { - let predicate_value = Meta(value.into(), meta.clone()); - for obj in map.get_mut(predicate) { - if let Some(arr) = obj.as_array_mut() { - if arr - .into_iter() - .find(|meta| **meta == predicate_value) - .is_none() - { - arr.push(predicate_value); - } - break; + let map = node.borrow_mut(); + + map.get(&subject_string(triple.predicate(), vocabulary)) + .and_then(|obj| { + let mut arr = obj.get_array_mut()?; + + if arr + .iter() + .find(|v| { + v.get_map() + .map(|map| *map == *value.borrow()) + .unwrap_or(false) + }) + .is_none() + { + arr.push(value.clone().into()); } - } - } + Some(()) + }); + drop(map); - // step 5.7.9 if let json_ld::rdf::Value::Reference(object) = triple.object() { - let object = get_subject(object, vocabulary); - - if object == "rdf:nil" { + // step 5.7.9 + if subject_matches(object, RDF_NIL, vocabulary) { // step 5.7.9.1 - let mut usages = node_map - .entry(String::from(object)) - .or_insert_with(|| json_ld_syntax::Value::Array(Vec::new())); + let usages: JsonValue = node_map + .get_map_mut() + .expect("node_map is map") + .entry(subject_string(object, vocabulary)) + .or_insert_with(|| Array::new().into()) + .clone(); // step 5.7.9.2 - if let Some(arr) = usages.as_array_mut() { - let mut map = json_ld_syntax::Object::new(); - map.push(Meta("node".into(), meta.clone()), Meta(/* node */, meta.clone())); - map.push(Meta("property".into(), meta.clone()), Meta(/* predicate */, meta.clone())); - map.push(Meta("value".into(), meta.clone()), Meta(/* value */, meta.clone())); - } + let mut usages_array = usages.get_array_mut().expect("usages is array"); + let mut map = Map::new(); + + map.insert("node".into(), node.into()); + map.insert( + "property".into(), + subject_json(triple.predicate(), vocabulary), + ); + map.insert("value".into(), value.into()); + + usages_array.push(map.into()); + } else if let Some(value) = + referenced_once.get_mut(&subject_string(object, vocabulary)) + { + // step 5.7.10 + value.set_false(); + } else if matches!( + triple.object(), + json_ld::rdf::Value::Reference(Subject::Blank(_)) + ) { + // step 5.7.11, 5.7.11.1 + referenced_once.insert( + subject_string(object, vocabulary), + node.into(), + subject_string(triple.predicate(), vocabulary), + value, + ) } } } } - todo!() + // step 6 + for (name, graph_object) in &graph_map { + // step 6.1 + let Some(entry) = compound_literal_subjects.get(name) else { continue }; + let map = entry.get_map().expect("entry is map"); + + for cl in map.keys() { + // step 6.1.1 + let Some(cl_entry) = referenced_once.get_value(cl) else { + continue; + }; + + // step 6.1.2 + let Some(node) = cl_entry.node.get_map_mut() else { + continue; + }; + + // step 6.1.3 + let property = &cl_entry.property; + + // step 6.1.4, this seems to be ignored + // let value = &cl_entry.value; + + // step 6.1.5 + let Some(cl_node) = graph_object.get_map_mut().and_then(|mut map| map.remove(cl)) else { + continue; + }; + + let Some(cl_node) = cl_node.get_map_mut() else { + continue; + }; + + // step 6.1.6 + let Some(property_array) = node.get(property).and_then(|property| property.get_array()) else { + eprintln!("Expected array from node's property"); + debug_assert!(false, "Expected array from node's property"); + continue; + }; + + for cl_reference in property_array.as_slice() { + let Some(mut cl_reference) = cl_reference.get_map_mut() else { + eprintln!("Expected map from cl_reference"); + debug_assert!(false, "Expected map from cl_reference"); + continue; + }; + + // step 6.1.6.1 + let Some(id) = cl_reference.get("@id") else { + eprintln!("Expected string from @id"); + debug_assert!(false, "Expected string from @id"); + continue; + }; + + if !id.matches_string(&cl) { + continue; + } + drop(id); + + cl_reference.remove("@id"); + + // step 6.1.6.2 + let at_value: JsonValue = cl_node + .get(RDF_VALUE.as_str()) + .and_then(|value| value.get_array()?.get(0)?.get_map()?.get("@value").cloned()) + .unwrap_or_else(|| ().into()); + + cl_reference.insert("@value".into(), at_value); + + // step 6.1.6.3 + let language: Option = cl_node + .get(RDF_LANGUAGE.as_str()) + .and_then(|value| value.get_array()?.get(0)?.get_map()?.get("@value").cloned()); + + if let Some(language) = language { + cl_reference.insert("@language".into(), language); + } + } + } + + // step 6.2, 6.3 + let Some(nil) = graph_object.get_map().expect("graph object is map").get(RDF_NIL.as_str()).cloned() else { + continue; + }; + + // step 6.4 + let usages: JsonValue = nil + .get_map() + .and_then(|map| map.get("usages").cloned()) + .unwrap_or_else(|| Array::new().into()); + + if let Some(usages) = usages.get_array() { + for usage in usages.iter() { + // step 6.4.1 + let Some((mut node, mut property, mut head)) = usage.get_map().and_then(|map| { + let node = map.get("node")?.clone(); + let property = map.get("property")?.clone(); + let head = map.get("value")?.to_map()?; + + Some((node, property, head)) + }) else { + eprintln!("missing required fields in usage"); + debug_assert!(false, "missing required fields in usage"); + continue; + }; + + // step 6.4.2 + let mut list = Array::new(); + let mut list_nodes = Vec::new(); + + // 6.4.3 + while property.matches_string(RDF_REST.as_str()) + && node + .get_map() + .and_then(|map| { + let id = map.get("@id")?; + let id_str = id.get_string()?; + + let first = map.get(RDF_FIRST.as_str())?; + let rest = map.get(RDF_REST.as_str())?; + let first = first.get_array()?; + let rest = rest.get_array()?; + + let valid_type = map + .get("@type") + .and_then(|ty| { + let type_arr = ty.get_array()?; + let first = type_arr.get(0)?; + + Some( + type_arr.len() == 1 + && first.matches_string(RDF_LIST.as_str()), + ) + }) + .unwrap_or(true); + + let only_valid_keys = + map.len() == map.get("@type").map(|_| 4).unwrap_or(3); + + if !only_valid_keys { + eprintln!( + "Invalid keys in node: [{}]", + map.keys() + .map(|s| s.as_str()) + .collect::>() + .join(", ") + ); + } + + Some( + id.is_blank_node() + && referenced_once.contains_key(id_str) + && first.len() == 1 + && rest.len() == 1 + && only_valid_keys + && valid_type, + ) + }) + .unwrap_or(false) + { + let node_map = node.get_map().expect("Node is a map"); + // step 6.4.3.1 + list.push( + node_map + .get(RDF_REST.as_str()) + .expect("verified this key exists already") + .clone(), + ); + + let id = node_map + .get("@id") + .expect("verified this key exists already") + .clone(); + + let id_str = id.get_string().expect("verified ID is a string already"); + + // step 6.4.3.2 + list_nodes.push(id_str.to_string()); + + // step 6.4.3.3 + let node_usage = referenced_once.get_value(id_str).cloned(); + + drop(node_map); + + // step 6.4.3.4 + let Some(node_usage) = node_usage else { + eprintln!("No referenced_once entry matching node's @id"); + debug_assert!(false, "No referenced_once entry matching node's @id"); + break; + }; + + node = node_usage.node; + property = node_usage.property.as_str().into(); + head = node_usage.value; + + // step 6.4.3.4 + if !node + .get_map() + .and_then(|map| Some(map.get("@id")?.is_blank_node())) + .unwrap_or(false) + { + break; + } + } + + // step 6.4.4 + head.borrow_mut().remove("@id"); + + // step 6.4.5 + list.reverse(); + + // step 6.4.6 + head.borrow_mut().insert("@list".into(), list.into()); + + let mut graph_object_map = graph_object.get_map_mut().expect("graph_object is map"); + + // step 6.4.7 + for node_id in list_nodes { + graph_object_map.remove(&node_id); + } + } + }; + } + + // step 7 + let mut result = Array::new(); + + // step 8 + let mut default_graph: Vec<_> = default_graph + .borrow() + .iter() + .map(|(k, v)| (k.clone(), v.clone())) + .collect(); + + if ordered { + default_graph.sort_by_key(|tup| tup.0.clone()); + } + + // step 8.1 + for (subject, node) in default_graph { + let Some(node) = node.to_map() else { continue; }; + + if let Some(entry) = graph_map.get(&subject) { + // step 8.1.1 + let array = Rc::new(RefCell::new(Array::new())); + node.borrow_mut() + .insert("@graph".into(), array.clone().into()); + + // step 8.1.2 + if let Some(entry_map) = entry.get_map() { + let mut s_n: Vec<_> = entry_map.iter().collect(); + + if ordered { + s_n.sort_by_key(|tup| tup.0); + } + + for (_, n) in s_n { + let only_contains_id = n + .get_map_mut() + .map(|mut map| { + map.remove("usages"); + map.len() == 1 && map.contains_key("@id") + }) + .unwrap_or(false); + + if !only_contains_id { + array.borrow_mut().push(n.clone()); + } + } + } + } + + // step 8.2 + node.borrow_mut().remove("usages"); + + if node.borrow().len() > 1 || !node.borrow().contains_key("@id") { + result.push(node.into()); + } + } + + // step 9 + let result: JsonValue = result.into(); + Ok(WithMeta(result, meta).into()) } -fn do_thing() -> json_ld_syntax::Value { - todo!() -} - -pub fn rdf_to_object( +fn rdf_to_object( value: QuadValue, meta: M, rdf_direction: Option, use_native_types: bool, vocabulary: &N, -) -> Result, InvalidJson> +) -> Result>, InvalidJson> where N: Vocabulary, - N::Iri: Clone, + N::Iri: Eq + Clone, M: Clone, { match value { // step 1 - json_ld::rdf::Value::Reference(subject) => Ok(Object::Node(Box::new(Node::with_id( - json_ld_syntax::Entry::new(meta.clone(), Meta(Id::Valid(subject), meta)), - )))), + json_ld::rdf::Value::Reference(subject) => { + let mut map = Map::new(); + map.insert("@id".into(), subject_json(&subject, vocabulary)); + Ok(Rc::new(RefCell::new(map))) + } // step 2 json_ld::rdf::Value::Literal(literal) => { // step 2.1 - let result: Object; + let mut result = Map::new(); // step 2.2 - let converted_value: Value; + let converted_value: JsonValue; - // step 2.3 is no-op, type will be included + // step 2.3 + let ty: Option; - converted_value = match literal { + (converted_value, ty) = match literal { // step 2.4 // step 2.4.1 rdf_types::Literal::TypedString(value, value_ty) - if use_native_types - && expect_iri(&value_ty, iri!("xsd:string"), vocabulary) => + if use_native_types && expect_iri(&value_ty, XSD_STRING, vocabulary) => { - // TODO: un-clone this line - let new_value = value.as_str().to_string(); + let value_ty = get_iri(&value_ty, vocabulary); - Value::Literal( - Literal::String(LiteralString::Expanded(new_value.into())), - Some(value_ty), - ) + let value_ty: json_syntax::String = value_ty.as_str().into(); + (value.as_str().into(), Some(value_ty)) } // step 2.4.2 rdf_types::Literal::TypedString(value, value_ty) - if use_native_types - && expect_iri(&value_ty, iri!("xsd:boolean"), vocabulary) => + if use_native_types && expect_iri(&value_ty, XSD_BOOLEAN, vocabulary) => { - if value.as_str() == "true" { - Value::Literal(Literal::Boolean(true), Some(value_ty)) - } else if value.as_str() == "false" { - Value::Literal(Literal::Boolean(false), Some(value_ty)) - } else { - // TODO: un-clone this line - let new_value = value.as_str().to_string(); + let value_ty = get_iri(&value_ty, vocabulary); - Value::Literal( - Literal::String(LiteralString::Expanded(new_value.into())), - Some(value_ty), - ) + if value.as_str() == "true" { + (true.into(), None) + } else if value.as_str() == "false" { + (false.into(), None) + } else { + (value.as_str().into(), Some(value_ty.as_str().into())) } } // step 2.4.3 rdf_types::Literal::TypedString(value, value_ty) if use_native_types - && (expect_iri(&value_ty, iri!("xsd:integer"), vocabulary) - || expect_iri(&value_ty, iri!("xsd:double"), vocabulary)) => + && (expect_iri(&value_ty, XSD_INTEGER, vocabulary) + || expect_iri(&value_ty, XSD_DOUBLE, vocabulary)) => { + let value_ty = get_iri(&value_ty, vocabulary); + if let Ok(number_buf) = json_number::SmallNumberBuf::new(SmallVec::from_slice(value.as_bytes())) { - Value::Literal(Literal::Number(number_buf), Some(value_ty)) + (number_buf.into(), Some(value_ty.as_str().into())) } else { - // TODO: un-clone this line - let new_value = value.as_str().to_string(); - - Value::Literal( - Literal::String(LiteralString::Expanded(new_value.into())), - Some(value_ty), - ) + (value.as_str().into(), Some(value_ty.as_str().into())) } } // step 2.5 rdf_types::Literal::TypedString(value, value_ty) - if expect_iri(&value_ty, iri!("rdf:JSON"), vocabulary) => + if expect_iri(&value_ty, RDF_JSON, vocabulary) => { - let meta = meta.clone(); let value = json_syntax::Value::parse_str(value.as_str(), |_| meta.clone()) .map_err(|_| InvalidJson)?; - Value::Json(value) + + let value: JsonValue = value.into(); + + (value, Some(get_iri(&value_ty, vocabulary).as_str().into())) } // step 2.6 @@ -409,9 +1016,6 @@ where .starts_with("https://www.w3.org/ns/i18n#") && rdf_direction == Some(RdfDirection::I18nDatatype) => { - // TODO: un-clone this line - let new_value = value.as_str().to_string(); - // step 2.6.2 let iri = get_iri(&value_ty, vocabulary); let lang = iri @@ -419,90 +1023,52 @@ where .trim_start_matches("https://www.w3.org/ns/i18n#"); let (lang, direction) = lang.split_once("_").expect("Invalid language"); - let language = if lang.is_empty() { - None - } else { - Some(LenientLanguageTagBuf::new(lang.to_string()).0) - }; + if !lang.is_empty() { + // TODO: Validate lang + result.insert("@language".into(), lang.into()); + } // step 2.6.3 - let direction = if direction == "ltr" { - Some(Direction::Ltr) - } else if direction == "rtl" { - Some(Direction::Rtl) - } else { - None - }; + if !direction.is_empty() { + result.insert("@direction".into(), direction.into()); + } // step 2.6.1 - match LangString::new( - LiteralString::Expanded(new_value.into()), - language, - direction, - ) { - Ok(lang_string) => Value::LangString(lang_string), - Err(literal_string) => { - Value::Literal(Literal::String(literal_string), None) - } - } + (value.as_str().into(), None) } // step 2.7 rdf_types::Literal::LangString(value, language_tag_buf) => { - // TODO: un-clone this line - let new_value = value.as_str().to_string(); + result.insert("@language".into(), language_tag_buf.as_str().into()); - match LangString::new( - LiteralString::Expanded(new_value.into()), - Some(language_tag_buf.into()), - None, - ) { - Ok(lang_string) => Value::LangString(lang_string), - Err(literal_string) => { - Value::Literal(Literal::String(literal_string), None) - } - } + (value.as_str().into(), None) } // step 2.8 rdf_types::Literal::TypedString(value, value_ty) - if !expect_iri(&value_ty, iri!("xsd:string"), vocabulary) => + if !expect_iri(&value_ty, XSD_STRING, vocabulary) => { - // TODO: un-clone this line - let new_value = value.as_str().to_string(); + let value_ty = get_iri(&value_ty, vocabulary); - Value::Literal( - Literal::String(LiteralString::Expanded(new_value.into())), - Some(value_ty), - ) + (value.as_str().into(), Some(value_ty.as_str().into())) } // step 2.2, all remaining matches - rdf_types::Literal::TypedString(value, _) => { - // TODO: un-clone this line - let new_value = value.as_str().to_string(); - - Value::Literal( - Literal::String(LiteralString::Expanded(new_value.into())), - None, - ) - } - rdf_types::Literal::String(value) => { - // TODO: un-clone this line - let new_value = value.as_str().to_string(); - - Value::Literal( - Literal::String(LiteralString::Expanded(new_value.into())), - None, - ) + rdf_types::Literal::TypedString(value, value_ty) => { + eprintln!("Unknown value_ty: {}", get_iri(&value_ty, vocabulary)); + (value.as_str().into(), None) } + rdf_types::Literal::String(value) => (value.as_str().into(), None), }; // step 2.9 - result = Object::Value(converted_value); + result.insert("@value".into(), converted_value); - // step 2.10 is no-op, type is included already + // step 2.10 + if let Some(ty) = ty { + result.insert("@type".into(), ty.into()); + } - Ok(result) + Ok(Rc::new(RefCell::new(result))) } } }