Start building serialization algorithm using json-ld-syntax

I'll need to rewrite the rdf_to_json_ld one later
This commit is contained in:
asonix 2022-12-12 23:03:09 -06:00
parent 3982ddfcfa
commit 57956e329d

View file

@ -1,3 +1,8 @@
use std::{
collections::{HashMap, HashSet},
hash::Hash,
};
use iref::Iri;
use json_ld::{
object::{Literal, LiteralString},
@ -6,21 +11,82 @@ use json_ld::{
};
use json_ld_syntax::Parse;
use locspan::Meta;
use rdf_types::{BlankIdVocabulary, IriVocabulary, Quad, Vocabulary};
use rdf_types::{BlankId, BlankIdVocabulary, IriVocabulary, Quad, Triple, Vocabulary};
use smallvec::SmallVec;
use static_iref::iri;
pub(crate) type QuadSubject<N> =
Subject<<N as IriVocabulary>::Iri, <N as BlankIdVocabulary>::BlankId>;
pub(crate) type QuadValue<N> =
type QuadSubject<N> = Subject<<N as IriVocabulary>::Iri, <N as BlankIdVocabulary>::BlankId>;
type QuadValue<N> =
json_ld::rdf::Value<<N as IriVocabulary>::Iri, <N as BlankIdVocabulary>::BlankId>;
pub(crate) type NormalizingQuad<N> =
Quad<QuadSubject<N>, QuadSubject<N>, QuadValue<N>, QuadSubject<N>>;
type NormalizingQuad<N> = Quad<QuadSubject<N>, QuadSubject<N>, QuadValue<N>, QuadSubject<N>>;
type SerializedObject<N, M> =
Object<<N as IriVocabulary>::Iri, <N as BlankIdVocabulary>::BlankId, M>;
type SerializingTriple<N> = Triple<QuadSubject<N>, QuadSubject<N>, QuadValue<N>>;
struct InputDataset<N>
where
N: Vocabulary,
{
graphs: HashMap<Option<QuadSubject<N>>, Vec<SerializingTriple<N>>>,
}
impl<N> Default for InputDataset<N>
where
N: Vocabulary,
{
fn default() -> Self {
Self {
graphs: Default::default(),
}
}
}
impl<N> InputDataset<N>
where
N: Vocabulary,
{
fn new(input_dataset: Vec<NormalizingQuad<N>>) -> Self
where
N::Iri: Hash + Eq,
N::BlankId: Hash + Eq,
{
input_dataset
.into_iter()
.fold(Self::default(), |mut this, quad| {
let (subject, predicate, object, graph) = quad.into_parts();
let triple = Triple(subject, predicate, object);
this.graphs.entry(graph).or_default().push(triple);
this
})
}
fn graphs<'a>(
&'a self,
) -> impl Iterator<Item = (Option<&'a QuadSubject<N>>, &'a [SerializingTriple<N>])> {
self.graphs.iter().map(|(k, v)| (k.as_ref(), v.as_slice()))
}
fn graph_names<'a>(
&'a self,
vocabulary: &'a N,
) -> impl Iterator<Item = Subject<Iri<'a>, &'a BlankId>>
where
N::BlankId: Eq + Hash,
N::Iri: Eq + Hash,
{
self.graphs.keys().filter_map(|subject| match subject {
Some(Subject::Iri(iri)) => Some(Subject::Iri(vocabulary.iri(iri)?)),
Some(Subject::Blank(blank)) => Some(Subject::Blank(vocabulary.blank_id(blank)?)),
None => None,
})
}
}
fn expect_iri<N>(id: &N::Iri, iri: Iri<'_>, vocabulary: &N) -> bool
where
N: Vocabulary,
@ -35,13 +101,219 @@ where
vocabulary.iri(id).expect("Id in vocabulary")
}
fn get_subject<'a, N>(id: &'a QuadSubject<N>, vocabulary: &'a N) -> &'a str
where
N: Vocabulary,
{
match id {
Subject::Iri(iri) => vocabulary.iri(iri).expect("Id in vocabulary").as_str(),
Subject::Blank(blank) => vocabulary
.blank_id(blank)
.expect("Id in vocabulary")
.as_str(),
}
}
#[derive(Debug)]
pub struct InvalidJson;
pub fn rdf_to_json_ld<N, M>(
rdf_dataset: Vec<NormalizingQuad<N>>,
meta: M,
ordered: bool,
rdf_direction: Option<RdfDirection>,
use_native_types: bool,
use_rdf_type: bool,
vocabulary: &N,
) -> Result<json_ld_syntax::Value<M>, InvalidJson>
where
N: Vocabulary,
N::Iri: Hash + Eq + Clone,
N::BlankId: Hash + Eq + Clone,
M: Eq + Clone,
{
let input_dataset = InputDataset::<N>::new(rdf_dataset);
// step 1
let mut default_graph: HashMap<String, HashMap<String, String>> = HashMap::new();
// step 2
let mut graph_map = HashMap::new();
graph_map.insert(String::from("@default"), HashMap::new());
// step 3
let mut referenced_once = HashMap::new();
// step 4
let mut compound_literal_subjects = HashMap::new();
// step 5
let graphs = input_dataset.graphs();
for (graph, triples) in graphs {
// step 5.1
let name = if let Some(graph) = graph {
get_subject(graph, vocabulary)
} else {
"@default"
};
// step 5.2
let node_map = graph_map.entry(String::from(name)).or_default();
// step 5.3
let compound_map: &mut HashMap<_, _> = compound_literal_subjects
.entry(String::from(name))
.or_default();
// step 5.4
if graph.is_some() {
default_graph
.entry(String::from(name))
.or_default()
.insert(String::from("@id"), String::from(name));
}
// step 5.5 no-op: get node_map
// step 5.6 no-op: get compound_map
// step 5.7
for triple in triples {
let subject = get_subject(triple.subject(), vocabulary);
let predicate = get_subject(triple.predicate(), vocabulary);
// step 5.7.1
let node: &mut json_ld_syntax::Value<M> = node_map
.entry(String::from(subject))
.or_insert_with(|| json_ld_syntax::Value::Object(json_ld_syntax::Object::new()));
if let Some(map) = node.as_object_mut() {
map.push(
Meta(From::from("@id"), meta.clone()),
Meta(json_ld_syntax::Value::String(subject.into()), meta.clone()),
);
}
// step 5.7.2 no-op: get node
// step 5.7.3
if rdf_direction == Some(RdfDirection::CompoundLiteral) && predicate == "rdf:direction"
{
compound_map.insert(String::from(subject), true);
}
// step 5.7.4
if let json_ld::rdf::Value::Reference(object) = triple.object() {
let object = get_subject(object, vocabulary);
if let Some(map) = node_map
.entry(String::from(object))
.or_insert_with(|| json_ld_syntax::Value::Object(json_ld_syntax::Object::new()))
.as_object_mut()
{
map.push(
Meta(From::from("@d"), meta.clone()),
Meta(json_ld_syntax::Value::String(object.into()), meta.clone()),
);
}
}
// step 5.7.5
if predicate == "rdf:type" && !use_rdf_type {
if let json_ld::rdf::Value::Reference(object) = triple.object() {
let object = get_subject(object, vocabulary);
if let Some(map) = node.as_object_mut() {
if map.get("@value").count() == 0 {
map.push(
Meta("@value".into(), meta.clone()),
Meta(
json_ld_syntax::Value::Array(vec![Meta(
json_ld_syntax::Value::String(object.into()),
meta.clone(),
)]),
meta.clone(),
),
);
} else {
for obj in map.get_mut("@value") {
if let Some(arr) = obj.as_array_mut() {
arr.push(Meta(
json_ld_syntax::Value::String(object.into()),
meta.clone(),
));
break;
}
}
}
}
}
}
// step 5.7.6
let value = do_thing::<M>();
// step 5.7.7
if let Some(map) = node.as_object_mut() {
if map.get(predicate).count() == 0 {
map.push(
Meta(predicate.into(), meta.clone()),
Meta(json_ld_syntax::Value::Array(Vec::new()), meta.clone()),
);
}
}
// step 5.7.8
if let Some(map) = node.as_object_mut() {
let predicate_value = Meta(value.into(), meta.clone());
for obj in map.get_mut(predicate) {
if let Some(arr) = obj.as_array_mut() {
if arr
.into_iter()
.find(|meta| **meta == predicate_value)
.is_none()
{
arr.push(predicate_value);
}
break;
}
}
}
// step 5.7.9
if let json_ld::rdf::Value::Reference(object) = triple.object() {
let object = get_subject(object, vocabulary);
if object == "rdf:nil" {
// step 5.7.9.1
let mut usages = node_map
.entry(String::from(object))
.or_insert_with(|| json_ld_syntax::Value::Array(Vec::new()));
// step 5.7.9.2
if let Some(arr) = usages.as_array_mut() {
let mut map = json_ld_syntax::Object::new();
map.push(Meta("node".into(), meta.clone()), Meta(/* node */, meta.clone()));
map.push(Meta("property".into(), meta.clone()), Meta(/* predicate */, meta.clone()));
map.push(Meta("value".into(), meta.clone()), Meta(/* value */, meta.clone()));
}
}
}
}
}
todo!()
}
fn do_thing<M>() -> json_ld_syntax::Value<M> {
todo!()
}
pub fn rdf_to_object<N, M>(
value: QuadValue<N>,
meta: M,
rdf_direction: RdfDirection,
rdf_direction: Option<RdfDirection>,
use_native_types: bool,
vocabulary: &N,
) -> Result<SerializedObject<N, M>, InvalidJson>
@ -76,7 +348,7 @@ where
let new_value = value.as_str().to_string();
Value::Literal(
Literal::String(LiteralString::Inferred(new_value)),
Literal::String(LiteralString::Expanded(new_value.into())),
Some(value_ty),
)
}
@ -94,7 +366,7 @@ where
let new_value = value.as_str().to_string();
Value::Literal(
Literal::String(LiteralString::Inferred(new_value)),
Literal::String(LiteralString::Expanded(new_value.into())),
Some(value_ty),
)
}
@ -114,7 +386,7 @@ where
let new_value = value.as_str().to_string();
Value::Literal(
Literal::String(LiteralString::Inferred(new_value)),
Literal::String(LiteralString::Expanded(new_value.into())),
Some(value_ty),
)
}
@ -135,7 +407,7 @@ where
if get_iri(&value_ty, vocabulary)
.as_str()
.starts_with("https://www.w3.org/ns/i18n#")
&& rdf_direction == RdfDirection::I18nDatatype =>
&& rdf_direction == Some(RdfDirection::I18nDatatype) =>
{
// TODO: un-clone this line
let new_value = value.as_str().to_string();
@ -163,7 +435,11 @@ where
};
// step 2.6.1
match LangString::new(LiteralString::Inferred(new_value), language, direction) {
match LangString::new(
LiteralString::Expanded(new_value.into()),
language,
direction,
) {
Ok(lang_string) => Value::LangString(lang_string),
Err(literal_string) => {
Value::Literal(Literal::String(literal_string), None)
@ -176,7 +452,7 @@ where
let new_value = value.as_str().to_string();
match LangString::new(
LiteralString::Inferred(new_value),
LiteralString::Expanded(new_value.into()),
Some(language_tag_buf.into()),
None,
) {
@ -195,7 +471,7 @@ where
let new_value = value.as_str().to_string();
Value::Literal(
Literal::String(LiteralString::Inferred(new_value)),
Literal::String(LiteralString::Expanded(new_value.into())),
Some(value_ty),
)
}
@ -205,13 +481,19 @@ where
// TODO: un-clone this line
let new_value = value.as_str().to_string();
Value::Literal(Literal::String(LiteralString::Inferred(new_value)), None)
Value::Literal(
Literal::String(LiteralString::Expanded(new_value.into())),
None,
)
}
rdf_types::Literal::String(value) => {
// TODO: un-clone this line
let new_value = value.as_str().to_string();
Value::Literal(Literal::String(LiteralString::Inferred(new_value)), None)
Value::Literal(
Literal::String(LiteralString::Expanded(new_value.into())),
None,
)
}
};