Clean up files a bit
This commit is contained in:
parent
6d3c929c18
commit
e906063432
|
@ -1,9 +1,9 @@
|
|||
use contextual::WithContext;
|
||||
use iref::{Iri, IriBuf};
|
||||
use json_ld::{
|
||||
syntax::{parse::MetaError, Parse, Value},
|
||||
Flatten, JsonLdProcessor, Loader, RemoteDocument,
|
||||
};
|
||||
use json_ld_normalization::quad_to_string;
|
||||
use locspan::{Location, Meta, Span};
|
||||
use rdf_types::{generator::Blank, vocabulary::Index, IriVocabulary, IriVocabularyMut};
|
||||
use reqwest::Client;
|
||||
|
@ -15,6 +15,80 @@ use std::{
|
|||
sync::{Arc, RwLock},
|
||||
};
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> Result<(), AnyError> {
|
||||
let cache = Cache::new();
|
||||
let client = Client::builder()
|
||||
.user_agent("json-ld-playground")
|
||||
.build()
|
||||
.expect("Successful client");
|
||||
|
||||
let iris = [
|
||||
iri!("https://relay.asonix.dog/actor"),
|
||||
iri!("https://masto.asonix.dog/actor"),
|
||||
iri!("https://masto.asonix.dog/users/asonix"),
|
||||
iri!("https://masto.asonix.dog/users/kumu"),
|
||||
iri!("https://yiff.life/users/6my"),
|
||||
iri!("https://meow.social/users/6my"),
|
||||
];
|
||||
|
||||
for iri in iris {
|
||||
let document = client
|
||||
.get(iri.as_str())
|
||||
.header("accept", "application/activity+json")
|
||||
.send()
|
||||
.await?
|
||||
.text()
|
||||
.await?;
|
||||
|
||||
normalize_document(cache.clone(), client.clone(), iri, &document).await?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn normalize_document(
|
||||
cache: Cache,
|
||||
client: Client,
|
||||
iri: Iri<'static>,
|
||||
document: &str,
|
||||
) -> Result<(), AnyError> {
|
||||
let mut vocabulary: rdf_types::IndexVocabulary = rdf_types::IndexVocabulary::new();
|
||||
|
||||
let iri_index = vocabulary.insert(iri);
|
||||
|
||||
let input = RemoteDocument::new(
|
||||
Some(iri_index.clone()),
|
||||
Some("application/activity+json".parse()?),
|
||||
Value::parse_str(document, |span| Location::new(iri_index.clone(), span))
|
||||
.expect("Failed to parse"),
|
||||
);
|
||||
|
||||
let mut loader = ReqwestLoader::with_default_parser(cache, client);
|
||||
|
||||
let expanded = input
|
||||
.expand_with(&mut vocabulary, &mut loader)
|
||||
.await
|
||||
.expect("Failed to expand");
|
||||
|
||||
let mut pre_gen = Blank::new().with_metadata(Location::new(iri_index.clone(), Span::default()));
|
||||
|
||||
let flattened = expanded
|
||||
.flatten_with(&mut vocabulary, &mut pre_gen, true)
|
||||
.expect("Failed to flatten");
|
||||
|
||||
let output_document = json_ld_normalization::normalize::<_, _, _, sha2::Sha256>(
|
||||
&mut vocabulary,
|
||||
iri_index,
|
||||
flattened.0,
|
||||
true,
|
||||
)?;
|
||||
|
||||
println!("{}", output_document.with(&vocabulary));
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
const ACTIVITYSTREAMS: &'static str = "https://www.w3.org/ns/activitystreams";
|
||||
const SECURITY: &'static str = "https://w3id.org/security/v1";
|
||||
|
||||
|
@ -161,82 +235,3 @@ impl<I: Send + Sync, T: Send, M: Send, E> Loader<I, M> for ReqwestLoader<I, M, T
|
|||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> Result<(), AnyError> {
|
||||
let cache = Cache::new();
|
||||
let client = Client::builder()
|
||||
.user_agent("json-ld-playground")
|
||||
.build()
|
||||
.expect("Successful client");
|
||||
|
||||
let iris = [
|
||||
iri!("https://relay.asonix.dog/actor"),
|
||||
iri!("https://masto.asonix.dog/actor"),
|
||||
iri!("https://masto.asonix.dog/users/asonix"),
|
||||
];
|
||||
|
||||
for iri in iris {
|
||||
let document = client
|
||||
.get(iri.as_str())
|
||||
.header("accept", "application/activity+json")
|
||||
.send()
|
||||
.await?
|
||||
.text()
|
||||
.await?;
|
||||
|
||||
do_the_thing(cache.clone(), client.clone(), iri, &document).await?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn do_the_thing(
|
||||
cache: Cache,
|
||||
client: Client,
|
||||
iri: Iri<'static>,
|
||||
document: &str,
|
||||
) -> Result<(), AnyError> {
|
||||
let mut vocabulary: rdf_types::IndexVocabulary = rdf_types::IndexVocabulary::new();
|
||||
|
||||
let iri_index = vocabulary.insert(iri);
|
||||
|
||||
let input = RemoteDocument::new(
|
||||
Some(iri_index.clone()),
|
||||
Some("application/activity+json".parse()?),
|
||||
Value::parse_str(document, |span| Location::new(iri_index.clone(), span))
|
||||
.expect("Failed to parse"),
|
||||
);
|
||||
|
||||
let mut loader = ReqwestLoader::with_default_parser(cache, client);
|
||||
|
||||
let expanded = input
|
||||
.expand_with(&mut vocabulary, &mut loader)
|
||||
.await
|
||||
.expect("Failed to expand");
|
||||
|
||||
let mut pre_gen = Blank::new().with_metadata(Location::new(iri_index.clone(), Span::default()));
|
||||
|
||||
let flattened = expanded
|
||||
.flatten_with(&mut vocabulary, &mut pre_gen, true)
|
||||
.expect("Failed to flatten");
|
||||
|
||||
let output_document = json_ld_normalization::normalize::<_, _, _, sha2::Sha256>(
|
||||
&mut vocabulary,
|
||||
iri_index,
|
||||
flattened.0,
|
||||
true,
|
||||
)?;
|
||||
|
||||
let mut strings = output_document
|
||||
.quads
|
||||
.iter()
|
||||
.map(|quad| quad_to_string(quad, &vocabulary))
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
strings.sort();
|
||||
|
||||
println!("{}", strings.join(""));
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
|
|
@ -51,7 +51,7 @@ where
|
|||
subject.into_owned(),
|
||||
predicate.into_owned(),
|
||||
object,
|
||||
graph.map(|graph| graph.clone()),
|
||||
graph.cloned(),
|
||||
)
|
||||
})
|
||||
.collect();
|
||||
|
|
100
src/issuer.rs
Normal file
100
src/issuer.rs
Normal file
|
@ -0,0 +1,100 @@
|
|||
use indexmap::IndexMap;
|
||||
use json_ld::ValidId as Subject;
|
||||
use rdf_types::{generator::Blank, BlankIdVocabularyMut, Vocabulary};
|
||||
use std::hash::Hash;
|
||||
|
||||
pub(crate) struct Issuer<B> {
|
||||
// Identifier Prefix and Identifier Counter
|
||||
blank_node_generator: Blank,
|
||||
// Issued Identifier List
|
||||
issued_identifier_list: IndexMap<B, B>,
|
||||
}
|
||||
|
||||
impl<B> Issuer<B> {
|
||||
pub(crate) fn new() -> Self {
|
||||
Self {
|
||||
blank_node_generator: canonicalization_node_generator(),
|
||||
issued_identifier_list: Default::default(),
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn new_with_prefix(prefix: &str) -> Self {
|
||||
Self {
|
||||
blank_node_generator: make_issuer(prefix),
|
||||
issued_identifier_list: Default::default(),
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn iter(&self) -> impl Iterator<Item = (&B, &B)> {
|
||||
self.issued_identifier_list.iter()
|
||||
}
|
||||
|
||||
pub(crate) fn get(&self, identifier: &B) -> Option<&B>
|
||||
where
|
||||
B: Eq + Hash,
|
||||
{
|
||||
self.issued_identifier_list.get(identifier)
|
||||
}
|
||||
|
||||
pub(crate) fn contains(&self, identifier: &B) -> bool
|
||||
where
|
||||
B: Eq + Hash,
|
||||
{
|
||||
self.issued_identifier_list.contains_key(identifier)
|
||||
}
|
||||
|
||||
pub(crate) fn issue_identifier<N>(&mut self, identifier: B, vocabulary: &mut N) -> N::BlankId
|
||||
where
|
||||
N: Vocabulary<BlankId = B> + BlankIdVocabularyMut,
|
||||
B: Eq + Hash + Clone,
|
||||
{
|
||||
use rdf_types::Generator;
|
||||
|
||||
// step 1
|
||||
if let Some(blank) = self.get(&identifier) {
|
||||
return blank.clone();
|
||||
}
|
||||
|
||||
// step 2 and 4
|
||||
let blank = match self.blank_node_generator.next(vocabulary) {
|
||||
Subject::Blank(blank) => blank,
|
||||
Subject::Iri(_) => unreachable!("Blank ID generators should only generate blank IDs"),
|
||||
};
|
||||
|
||||
// step 3
|
||||
self.issued_identifier_list
|
||||
.insert(identifier, blank.clone());
|
||||
|
||||
// step 5
|
||||
blank
|
||||
}
|
||||
}
|
||||
|
||||
fn canonicalization_node_generator() -> Blank {
|
||||
make_issuer("c14n")
|
||||
}
|
||||
|
||||
fn make_issuer(prefix: &str) -> Blank {
|
||||
Blank::new_with_prefix(String::from(prefix))
|
||||
}
|
||||
|
||||
impl<B> Clone for Issuer<B>
|
||||
where
|
||||
B: Clone,
|
||||
{
|
||||
fn clone(&self) -> Self {
|
||||
Self {
|
||||
blank_node_generator: Blank::new_full(
|
||||
self.blank_node_generator.prefix().to_string(),
|
||||
self.blank_node_generator.count(),
|
||||
),
|
||||
issued_identifier_list: self.issued_identifier_list.clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<B> Default for Issuer<B> {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
692
src/lib.rs
692
src/lib.rs
|
@ -1,5 +1,4 @@
|
|||
use contextual::WithContext;
|
||||
use indexmap::IndexMap;
|
||||
use itertools::Itertools;
|
||||
use json_ld::{rdf::Value, RdfQuads, ValidId as Subject};
|
||||
use locspan::{Location, Span};
|
||||
|
@ -11,10 +10,15 @@ use std::{
|
|||
};
|
||||
|
||||
mod input_dataset;
|
||||
mod issuer;
|
||||
mod output_dataset;
|
||||
#[cfg(feature = "rustcrypto")]
|
||||
mod sha2_impls;
|
||||
|
||||
use input_dataset::{InputDataset, NormalizingQuad, Position, QuadSubject, QuadValue};
|
||||
use issuer::Issuer;
|
||||
|
||||
pub use output_dataset::OutputDataset;
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct Security;
|
||||
|
@ -28,27 +32,13 @@ pub trait Sha256 {
|
|||
fn finalize_hex_and_reset(&mut self) -> HexHash;
|
||||
}
|
||||
|
||||
pub struct OutputDataset<N>
|
||||
where
|
||||
N: Vocabulary,
|
||||
{
|
||||
pub quads: Vec<NormalizingQuad<N>>,
|
||||
}
|
||||
|
||||
pub struct Issuer<B> {
|
||||
// Identifier Prefix and Identifier Counter
|
||||
blank_node_generator: Blank,
|
||||
// Issued Identifier List
|
||||
issued_identifier_list: IndexMap<B, B>,
|
||||
}
|
||||
|
||||
pub struct CanonicalizationState<'a, N, S>
|
||||
where
|
||||
N: Vocabulary,
|
||||
{
|
||||
sha256: S,
|
||||
|
||||
vocabulary: &'a mut N,
|
||||
canonical_vocabulary: &'a mut N,
|
||||
|
||||
canonical_issuer: Issuer<N::BlankId>,
|
||||
|
||||
|
@ -77,78 +67,18 @@ where
|
|||
)
|
||||
}
|
||||
|
||||
impl<B> Issuer<B> {
|
||||
fn new() -> Self {
|
||||
Self {
|
||||
blank_node_generator: canonicalization_node_generator(),
|
||||
issued_identifier_list: Default::default(),
|
||||
}
|
||||
}
|
||||
|
||||
fn new_with_prefix(prefix: &str) -> Self {
|
||||
Self {
|
||||
blank_node_generator: make_issuer(prefix),
|
||||
issued_identifier_list: Default::default(),
|
||||
}
|
||||
}
|
||||
|
||||
fn iter(&self) -> impl Iterator<Item = (&B, &B)> {
|
||||
self.issued_identifier_list.iter()
|
||||
}
|
||||
|
||||
fn get(&self, identifier: &B) -> Option<&B>
|
||||
where
|
||||
B: Eq + Hash,
|
||||
{
|
||||
self.issued_identifier_list.get(identifier)
|
||||
}
|
||||
|
||||
fn contains(&self, identifier: &B) -> bool
|
||||
where
|
||||
B: Eq + Hash,
|
||||
{
|
||||
self.issued_identifier_list.contains_key(identifier)
|
||||
}
|
||||
|
||||
fn issue_identifier<N>(&mut self, identifier: B, vocabulary: &mut N) -> N::BlankId
|
||||
where
|
||||
N: Vocabulary<BlankId = B> + BlankIdVocabularyMut,
|
||||
B: Eq + Hash + Clone,
|
||||
{
|
||||
use rdf_types::Generator;
|
||||
|
||||
// step 1
|
||||
if let Some(blank) = self.get(&identifier) {
|
||||
return blank.clone();
|
||||
}
|
||||
|
||||
// step 2 and 4
|
||||
let blank = match self.blank_node_generator.next(vocabulary) {
|
||||
Subject::Blank(blank) => blank,
|
||||
Subject::Iri(_) => unreachable!("Blank ID generators should only generate blank IDs"),
|
||||
};
|
||||
|
||||
// step 3
|
||||
self.issued_identifier_list
|
||||
.insert(identifier, blank.clone());
|
||||
|
||||
// step 5
|
||||
blank
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, N, S> CanonicalizationState<'a, N, S>
|
||||
where
|
||||
N: Vocabulary,
|
||||
{
|
||||
/// Step 1
|
||||
fn new(vocabulary: &'a mut N) -> Self
|
||||
fn new(canonical_vocabulary: &'a mut N) -> Self
|
||||
where
|
||||
S: Default,
|
||||
{
|
||||
Self {
|
||||
sha256: S::default(),
|
||||
vocabulary,
|
||||
canonical_vocabulary,
|
||||
canonical_issuer: Issuer::default(),
|
||||
blank_node_to_quads: Default::default(),
|
||||
hash_to_blank_nodes: Default::default(),
|
||||
|
@ -195,7 +125,7 @@ where
|
|||
{
|
||||
let mut pre_gen = Blank::new().with_metadata(Location::new(document_id, Span::default()));
|
||||
|
||||
InputDataset::from_rdf_quads(rdf_quads, &mut self.vocabulary, &mut pre_gen)
|
||||
InputDataset::from_rdf_quads(rdf_quads, self.canonical_vocabulary, &mut pre_gen)
|
||||
}
|
||||
|
||||
// Step 2
|
||||
|
@ -211,7 +141,7 @@ where
|
|||
quad.graph().and_then(subject_as_blank_id::<N>),
|
||||
]
|
||||
.into_iter()
|
||||
.filter_map(|opt| opt);
|
||||
.flatten();
|
||||
|
||||
for blank_id in iter {
|
||||
self.blank_node_to_quads
|
||||
|
@ -230,8 +160,8 @@ where
|
|||
N: VocabularyMut + BlankIdVocabularyMut,
|
||||
{
|
||||
// step 3
|
||||
let mut non_normalized_identifiers: HashSet<&N::BlankId> =
|
||||
self.blank_node_to_quads.keys().collect();
|
||||
let mut non_normalized_identifiers: HashSet<N::BlankId> =
|
||||
self.blank_node_to_quads.keys().cloned().collect();
|
||||
|
||||
// step 4
|
||||
let mut simple = true;
|
||||
|
@ -247,13 +177,7 @@ where
|
|||
// step 5.3
|
||||
for identifier in non_normalized_identifiers.iter() {
|
||||
// step 5.3.1
|
||||
let hash = hash_first_degree_quads(
|
||||
&self.blank_node_to_quads,
|
||||
self.vocabulary,
|
||||
(*identifier).clone(),
|
||||
input_dataset,
|
||||
&mut self.sha256,
|
||||
);
|
||||
let hash = self.hash_first_degree_quads((*identifier).clone(), input_dataset);
|
||||
|
||||
// step 5.3.2
|
||||
self.hash_to_blank_nodes
|
||||
|
@ -277,7 +201,7 @@ where
|
|||
|
||||
// step 5.4.2
|
||||
self.canonical_issuer
|
||||
.issue_identifier(identifier, self.vocabulary);
|
||||
.issue_identifier(identifier, self.canonical_vocabulary);
|
||||
}
|
||||
|
||||
// step 5.4.5
|
||||
|
@ -300,8 +224,7 @@ where
|
|||
N::BlankId: Clone + Eq + Hash,
|
||||
S: Sha256,
|
||||
{
|
||||
let hash_to_blank_nodes =
|
||||
std::mem::replace(&mut self.hash_to_blank_nodes, Default::default());
|
||||
let hash_to_blank_nodes = std::mem::take(&mut self.hash_to_blank_nodes);
|
||||
|
||||
for (_, identifier_list) in hash_to_blank_nodes {
|
||||
// step 6.1
|
||||
|
@ -327,15 +250,11 @@ where
|
|||
temporary_issuer.issue_identifier(identifier.clone(), &mut temporary_vocabulary);
|
||||
|
||||
// step 6.2.4
|
||||
let hash = hash_n_degree_quads(
|
||||
&self.blank_node_to_quads,
|
||||
let hash = self.hash_n_degree_quads(
|
||||
&mut temporary_vocabulary,
|
||||
&self.vocabulary,
|
||||
&mut temporary_issuer,
|
||||
&self.canonical_issuer,
|
||||
identifier,
|
||||
input_dataset,
|
||||
&mut self.sha256,
|
||||
);
|
||||
|
||||
hash_path_list.push((hash, temporary_issuer));
|
||||
|
@ -346,7 +265,7 @@ where
|
|||
// step 6.3.1
|
||||
for (existing_identifier, _) in temporary_issuer.iter() {
|
||||
self.canonical_issuer
|
||||
.issue_identifier(existing_identifier.clone(), self.vocabulary);
|
||||
.issue_identifier(existing_identifier.clone(), self.canonical_vocabulary);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -380,7 +299,7 @@ where
|
|||
})
|
||||
.collect();
|
||||
|
||||
OutputDataset { quads }
|
||||
OutputDataset::new(quads, self.canonical_vocabulary)
|
||||
}
|
||||
|
||||
fn translate_object(&self, object: &QuadValue<N>) -> Option<QuadValue<N>>
|
||||
|
@ -406,168 +325,304 @@ where
|
|||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn hash_n_degree_quads<N, S>(
|
||||
blank_node_to_quads: &HashMap<N::BlankId, HashSet<Position>>,
|
||||
vocabulary: &mut N,
|
||||
canon_vocabulary: &N,
|
||||
issuer: &mut Issuer<N::BlankId>,
|
||||
canon_issuer: &Issuer<N::BlankId>,
|
||||
identifier: N::BlankId,
|
||||
input_dataset: &InputDataset<N>,
|
||||
sha256: &mut S,
|
||||
) -> HexHash
|
||||
where
|
||||
N: Vocabulary + VocabularyMut,
|
||||
N::BlankId: Clone + Eq + Hash,
|
||||
S: Sha256,
|
||||
{
|
||||
// step 1
|
||||
let mut hash_to_related_blank_nodes: HashMap<HexHash, HashSet<N::BlankId>> = HashMap::new();
|
||||
fn hash_n_degree_quads(
|
||||
&mut self,
|
||||
vocabulary: &mut N,
|
||||
issuer: &mut Issuer<N::BlankId>,
|
||||
identifier: N::BlankId,
|
||||
input_dataset: &InputDataset<N>,
|
||||
) -> HexHash
|
||||
where
|
||||
N: VocabularyMut,
|
||||
N::BlankId: Clone + Eq + Hash,
|
||||
S: Sha256,
|
||||
{
|
||||
// step 1
|
||||
let mut hash_to_related_blank_nodes: HashMap<HexHash, HashSet<N::BlankId>> = HashMap::new();
|
||||
|
||||
// step 2
|
||||
if let Some(quad_positions) = blank_node_to_quads.get(&identifier) {
|
||||
// step 3
|
||||
for quad_position in quad_positions {
|
||||
let quad = input_dataset
|
||||
.get(*quad_position)
|
||||
.expect("Positions are created from the input dataset");
|
||||
// step 2
|
||||
if let Some(quad_positions) = self.blank_node_to_quads.get(&identifier).cloned() {
|
||||
// step 3
|
||||
for quad_position in quad_positions {
|
||||
let quad = input_dataset
|
||||
.get(quad_position)
|
||||
.expect("Positions are created from the input dataset");
|
||||
|
||||
// step 3.1
|
||||
let iter = [
|
||||
("s", subject_as_blank_id::<N>(quad.subject())),
|
||||
("o", object_as_blank_id::<N>(quad.object())),
|
||||
("g", quad.graph().and_then(subject_as_blank_id::<N>)),
|
||||
]
|
||||
.into_iter()
|
||||
.filter_map(|(position, opt)| Some((position, opt?)))
|
||||
.filter(|(_, blank_id)| identifier != **blank_id);
|
||||
// step 3.1
|
||||
let iter = [
|
||||
("s", subject_as_blank_id::<N>(quad.subject())),
|
||||
("o", object_as_blank_id::<N>(quad.object())),
|
||||
("g", quad.graph().and_then(subject_as_blank_id::<N>)),
|
||||
]
|
||||
.into_iter()
|
||||
.filter_map(|(position, opt)| Some((position, opt?)))
|
||||
.filter(|(_, blank_id)| identifier != **blank_id);
|
||||
|
||||
for (position, related) in iter {
|
||||
// step 3.1.1
|
||||
let hash = hash_related_blank_node(
|
||||
blank_node_to_quads,
|
||||
canon_issuer,
|
||||
canon_vocabulary,
|
||||
issuer,
|
||||
vocabulary,
|
||||
related,
|
||||
quad,
|
||||
position,
|
||||
input_dataset,
|
||||
sha256,
|
||||
);
|
||||
for (position, related) in iter {
|
||||
// step 3.1.1
|
||||
let hash = self.hash_related_blank_node(
|
||||
issuer,
|
||||
vocabulary,
|
||||
related,
|
||||
quad,
|
||||
position,
|
||||
input_dataset,
|
||||
);
|
||||
|
||||
// step 3.1.2
|
||||
hash_to_related_blank_nodes
|
||||
.entry(hash)
|
||||
.or_default()
|
||||
.insert(related.clone());
|
||||
// step 3.1.2
|
||||
hash_to_related_blank_nodes
|
||||
.entry(hash)
|
||||
.or_default()
|
||||
.insert(related.clone());
|
||||
}
|
||||
}
|
||||
} else {
|
||||
eprintln!("No quad positions");
|
||||
}
|
||||
} else {
|
||||
eprintln!("No quad positions");
|
||||
}
|
||||
|
||||
// step 4
|
||||
let mut data_to_hash = String::new();
|
||||
// step 4
|
||||
let mut data_to_hash = String::new();
|
||||
|
||||
// step 5
|
||||
for (related_hash, blank_node_list) in hash_to_related_blank_nodes {
|
||||
// step 5.1
|
||||
data_to_hash += &related_hash.0;
|
||||
// step 5
|
||||
for (related_hash, blank_node_list) in hash_to_related_blank_nodes {
|
||||
// step 5.1
|
||||
data_to_hash += &related_hash.0;
|
||||
|
||||
// step 5.2
|
||||
let mut chosen_path = String::new();
|
||||
// step 5.2
|
||||
let mut chosen_path = String::new();
|
||||
|
||||
// step 5.3
|
||||
let mut chosen_issuer = Default::default();
|
||||
// step 5.3
|
||||
let mut chosen_issuer = Default::default();
|
||||
|
||||
'permute: for permutation in permute(blank_node_list) {
|
||||
// step 5.4.1
|
||||
let mut issuer_copy = issuer.clone();
|
||||
// step 5.4.2
|
||||
let mut path = String::new();
|
||||
// step 5.4.3
|
||||
let mut recursion_list = HashSet::new();
|
||||
'permute: for permutation in permute(blank_node_list) {
|
||||
// step 5.4.1
|
||||
let mut issuer_copy = issuer.clone();
|
||||
// step 5.4.2
|
||||
let mut path = String::new();
|
||||
// step 5.4.3
|
||||
let mut recursion_list = HashSet::new();
|
||||
|
||||
// step 5.4.4
|
||||
for related in permutation {
|
||||
if let Some(blank) = canon_issuer.get(&related) {
|
||||
// step 5.4.4.1
|
||||
if let Some(blank_id) = canon_vocabulary.blank_id(blank) {
|
||||
path += &blank_id.to_string();
|
||||
// step 5.4.4
|
||||
for related in permutation {
|
||||
if let Some(blank) = self.canonical_issuer.get(&related) {
|
||||
// step 5.4.4.1
|
||||
if let Some(blank_id) = self.canonical_vocabulary.blank_id(blank) {
|
||||
path += blank_id;
|
||||
} else {
|
||||
eprintln!("No blank in vocabulary");
|
||||
}
|
||||
} else {
|
||||
// step 5.4.4.2
|
||||
// step 5.4.4.2.1
|
||||
recursion_list.insert(related.clone());
|
||||
// step 5.4.4.2.2
|
||||
issuer_copy.issue_identifier(related, vocabulary);
|
||||
}
|
||||
|
||||
// step 5.4.4.3
|
||||
if !chosen_path.is_empty()
|
||||
&& path.len() >= chosen_path.len()
|
||||
&& path > chosen_path
|
||||
{
|
||||
continue 'permute;
|
||||
}
|
||||
}
|
||||
|
||||
// step 5.4.5
|
||||
for related in recursion_list {
|
||||
// step 5.4.5.1
|
||||
let result = self.hash_n_degree_quads(
|
||||
vocabulary,
|
||||
&mut issuer_copy,
|
||||
related.clone(),
|
||||
input_dataset,
|
||||
);
|
||||
// step 5.4.5.2
|
||||
let new_blank = issuer_copy.issue_identifier(related, vocabulary);
|
||||
|
||||
if let Some(blank_id) = vocabulary.blank_id(&new_blank) {
|
||||
path += blank_id;
|
||||
|
||||
// step 5.4.5.3
|
||||
path += "<";
|
||||
path += result.0.as_str();
|
||||
path += ">";
|
||||
} else {
|
||||
eprintln!("No blank in vocabulary");
|
||||
}
|
||||
} else {
|
||||
// step 5.4.4.2
|
||||
// step 5.4.4.2.1
|
||||
recursion_list.insert(related.clone());
|
||||
// step 5.4.4.2.2
|
||||
issuer_copy.issue_identifier(related, vocabulary);
|
||||
|
||||
// step 5.4.5.4 is a no-op
|
||||
|
||||
// step 5.4.5.5
|
||||
if !chosen_path.is_empty()
|
||||
&& path.len() >= chosen_path.len()
|
||||
&& path > chosen_path
|
||||
{
|
||||
continue 'permute;
|
||||
}
|
||||
}
|
||||
|
||||
// step 5.4.4.3
|
||||
if !chosen_path.is_empty() && path.len() >= chosen_path.len() && path > chosen_path
|
||||
{
|
||||
continue 'permute;
|
||||
if chosen_path.is_empty() || path < chosen_path {
|
||||
chosen_path = path;
|
||||
chosen_issuer = issuer_copy;
|
||||
}
|
||||
}
|
||||
|
||||
// step 5.4.5
|
||||
for related in recursion_list {
|
||||
// step 5.4.5.1
|
||||
let result = hash_n_degree_quads(
|
||||
blank_node_to_quads,
|
||||
vocabulary,
|
||||
canon_vocabulary,
|
||||
&mut issuer_copy,
|
||||
canon_issuer,
|
||||
related.clone(),
|
||||
input_dataset,
|
||||
sha256,
|
||||
);
|
||||
// step 5.4.5.2
|
||||
let new_blank = issuer_copy.issue_identifier(related, vocabulary);
|
||||
// step 5.5
|
||||
data_to_hash += &chosen_path;
|
||||
|
||||
if let Some(blank_id) = vocabulary.blank_id(&new_blank) {
|
||||
path += &blank_id.to_string();
|
||||
// step 5.6
|
||||
std::mem::swap(issuer, &mut chosen_issuer);
|
||||
}
|
||||
|
||||
// step 5.4.5.3
|
||||
path += "<";
|
||||
path += result.0.as_str();
|
||||
path += ">";
|
||||
} else {
|
||||
eprintln!("No blank in vocabulary");
|
||||
}
|
||||
// step 6
|
||||
self.sha256.update(data_to_hash.as_bytes());
|
||||
self.sha256.finalize_hex_and_reset()
|
||||
}
|
||||
|
||||
// step 5.4.5.4 is a no-op
|
||||
fn hash_related_blank_node(
|
||||
&mut self,
|
||||
issuer: &Issuer<N::BlankId>,
|
||||
vocabulary: &N,
|
||||
related: &N::BlankId,
|
||||
quad: &NormalizingQuad<N>,
|
||||
position: &str,
|
||||
input_dataset: &InputDataset<N>,
|
||||
) -> HexHash
|
||||
where
|
||||
N::BlankId: Clone + Eq + Hash,
|
||||
S: Sha256,
|
||||
{
|
||||
// step 1
|
||||
let identifier = if let Some(blank_id) = self.canonical_issuer.get(related) {
|
||||
let blank = self
|
||||
.canonical_vocabulary
|
||||
.blank_id(blank_id)
|
||||
.expect("No blank in vocabulary");
|
||||
blank.to_string()
|
||||
} else if let Some(blank_id) = issuer.get(related) {
|
||||
let blank = vocabulary
|
||||
.blank_id(blank_id)
|
||||
.expect("No blank in vocabulary");
|
||||
blank.to_string()
|
||||
} else {
|
||||
self.hash_first_degree_quads(related.clone(), input_dataset)
|
||||
.0
|
||||
};
|
||||
|
||||
// step 5.4.5.5
|
||||
if !chosen_path.is_empty() && path.len() >= chosen_path.len() && path > chosen_path
|
||||
{
|
||||
continue 'permute;
|
||||
}
|
||||
}
|
||||
// step 2
|
||||
let mut input = String::from(position);
|
||||
|
||||
if chosen_path.is_empty() || path < chosen_path {
|
||||
chosen_path = path;
|
||||
chosen_issuer = issuer_copy;
|
||||
// step 3
|
||||
if position != "g" {
|
||||
input += "<";
|
||||
input += quad.predicate().with(&*self.canonical_vocabulary).as_str();
|
||||
input += ">";
|
||||
}
|
||||
|
||||
// step 4
|
||||
input += &identifier;
|
||||
|
||||
// step 5
|
||||
self.sha256.update(input.as_bytes());
|
||||
self.sha256.finalize_hex_and_reset()
|
||||
}
|
||||
|
||||
fn hash_first_degree_quads(
|
||||
&mut self,
|
||||
identifier: N::BlankId,
|
||||
input_dataset: &InputDataset<N>,
|
||||
) -> HexHash
|
||||
where
|
||||
N::BlankId: Eq + Hash + Clone,
|
||||
S: Sha256,
|
||||
{
|
||||
// Step 1
|
||||
let mut nquads = Vec::new();
|
||||
|
||||
// step 2
|
||||
if let Some(quad_positions) = self.blank_node_to_quads.get(&identifier).cloned() {
|
||||
// step 3
|
||||
for quad_position in quad_positions {
|
||||
let quad = input_dataset
|
||||
.get(quad_position)
|
||||
.expect("Positions are created from the input dataset");
|
||||
|
||||
// step 3.1, 3.1.1, and 3.1.1.1
|
||||
let serizlied = self.serialize_quad(&identifier, quad);
|
||||
|
||||
nquads.push(serizlied);
|
||||
}
|
||||
}
|
||||
|
||||
// step 5.5
|
||||
data_to_hash += &chosen_path;
|
||||
// step 4
|
||||
nquads.sort();
|
||||
|
||||
// step 5.6
|
||||
std::mem::swap(issuer, &mut chosen_issuer);
|
||||
// step 5
|
||||
let joined = nquads.join("");
|
||||
|
||||
self.sha256.update(joined.as_bytes());
|
||||
self.sha256.finalize_hex_and_reset()
|
||||
}
|
||||
|
||||
// step 6
|
||||
sha256.update(data_to_hash.as_bytes());
|
||||
sha256.finalize_hex_and_reset()
|
||||
fn serialize_quad(&self, identifier: &N::BlankId, quad: &NormalizingQuad<N>) -> String
|
||||
where
|
||||
N::BlankId: Clone + Eq,
|
||||
{
|
||||
let subject = self.serialize_subject(identifier, quad.subject());
|
||||
let predicate = quad
|
||||
.predicate()
|
||||
.with(&*self.canonical_vocabulary)
|
||||
.rdf_display()
|
||||
.to_string();
|
||||
let object = self.serialize_object(identifier, quad.object());
|
||||
let graph = quad
|
||||
.graph()
|
||||
.map(|graph| self.serialize_subject(identifier, graph));
|
||||
|
||||
if let Some(graph) = graph {
|
||||
format!("{subject} {predicate} {object} {graph} .\n")
|
||||
} else {
|
||||
format!("{subject} {predicate} {object} .\n")
|
||||
}
|
||||
}
|
||||
|
||||
fn serialize_subject(
|
||||
&'a self,
|
||||
identifier: &N::BlankId,
|
||||
subject: &'a QuadSubject<N>,
|
||||
) -> Cow<'a, str>
|
||||
where
|
||||
N::BlankId: Eq,
|
||||
{
|
||||
if subject.is_blank() && matches_identifier::<N>(identifier, subject) {
|
||||
Cow::Borrowed("_:a")
|
||||
} else if subject.is_blank() {
|
||||
Cow::Borrowed("_:z")
|
||||
} else {
|
||||
Cow::Owned(
|
||||
subject
|
||||
.with(&*self.canonical_vocabulary)
|
||||
.rdf_display()
|
||||
.to_string(),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
fn serialize_object(&'a self, identifier: &N::BlankId, object: &'a QuadValue<N>) -> Cow<'a, str>
|
||||
where
|
||||
N::BlankId: Eq,
|
||||
{
|
||||
match object {
|
||||
Value::Literal(lit) => Cow::Owned(
|
||||
lit.with(&*self.canonical_vocabulary)
|
||||
.rdf_display()
|
||||
.to_string(),
|
||||
),
|
||||
Value::Reference(subject) => self.serialize_subject(identifier, subject),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn permute<B>(set: HashSet<B>) -> impl Iterator<Item = Vec<B>>
|
||||
|
@ -579,156 +634,6 @@ where
|
|||
set.into_iter().permutations(len)
|
||||
}
|
||||
|
||||
fn hash_related_blank_node<N, S>(
|
||||
blank_node_to_quads: &HashMap<N::BlankId, HashSet<Position>>,
|
||||
canon_issuer: &Issuer<N::BlankId>,
|
||||
canon_vocabulary: &N,
|
||||
issuer: &Issuer<N::BlankId>,
|
||||
vocabulary: &N,
|
||||
related: &N::BlankId,
|
||||
quad: &NormalizingQuad<N>,
|
||||
position: &str,
|
||||
input_dataset: &InputDataset<N>,
|
||||
sha256: &mut S,
|
||||
) -> HexHash
|
||||
where
|
||||
N: Vocabulary,
|
||||
N::BlankId: Clone + Eq + Hash,
|
||||
S: Sha256,
|
||||
{
|
||||
// step 1
|
||||
let identifier = if let Some(blank_id) = canon_issuer.get(related) {
|
||||
let blank = canon_vocabulary
|
||||
.blank_id(blank_id)
|
||||
.expect("No blank in vocabulary");
|
||||
blank.to_string()
|
||||
} else if let Some(blank_id) = issuer.get(related) {
|
||||
let blank = vocabulary
|
||||
.blank_id(blank_id)
|
||||
.expect("No blank in vocabulary");
|
||||
blank.to_string()
|
||||
} else {
|
||||
hash_first_degree_quads(
|
||||
blank_node_to_quads,
|
||||
canon_vocabulary,
|
||||
related.clone(),
|
||||
input_dataset,
|
||||
sha256,
|
||||
)
|
||||
.0
|
||||
};
|
||||
|
||||
// step 2
|
||||
let mut input = String::from(position);
|
||||
|
||||
// step 3
|
||||
if position != "g" {
|
||||
input += "<";
|
||||
input += &quad.predicate().with(canon_vocabulary).to_string();
|
||||
input += ">";
|
||||
}
|
||||
|
||||
// step 4
|
||||
input += &identifier;
|
||||
|
||||
// step 5
|
||||
sha256.update(input.as_bytes());
|
||||
sha256.finalize_hex_and_reset()
|
||||
}
|
||||
|
||||
fn hash_first_degree_quads<N, S>(
|
||||
blank_node_to_quads: &HashMap<N::BlankId, HashSet<Position>>,
|
||||
vocabulary: &N,
|
||||
identifier: N::BlankId,
|
||||
input_dataset: &InputDataset<N>,
|
||||
sha256: &mut S,
|
||||
) -> HexHash
|
||||
where
|
||||
N: Vocabulary,
|
||||
N::BlankId: Eq + Hash + Clone,
|
||||
S: Sha256,
|
||||
{
|
||||
// Step 1
|
||||
let mut nquads = Vec::new();
|
||||
|
||||
// step 2
|
||||
if let Some(quad_positions) = blank_node_to_quads.get(&identifier) {
|
||||
// step 3
|
||||
for quad_position in quad_positions {
|
||||
let quad = input_dataset
|
||||
.get(*quad_position)
|
||||
.expect("Positions are created from the input dataset");
|
||||
|
||||
// step 3.1, 3.1.1, and 3.1.1.1
|
||||
let serizlied = serialize_quad(&identifier, quad, vocabulary);
|
||||
|
||||
nquads.push(serizlied);
|
||||
}
|
||||
}
|
||||
|
||||
// step 4
|
||||
nquads.sort();
|
||||
|
||||
// step 5
|
||||
let joined = nquads.join("");
|
||||
|
||||
sha256.update(joined.as_bytes());
|
||||
|
||||
sha256.finalize_hex_and_reset()
|
||||
}
|
||||
|
||||
fn serialize_quad<N>(identifier: &N::BlankId, quad: &NormalizingQuad<N>, vocabulary: &N) -> String
|
||||
where
|
||||
N: Vocabulary,
|
||||
N::BlankId: Clone + Eq,
|
||||
{
|
||||
let subject = serialize_subject(identifier, quad.subject(), vocabulary);
|
||||
let predicate = quad.predicate().with(vocabulary).rdf_display().to_string();
|
||||
let object = serialize_object(identifier, quad.object(), vocabulary);
|
||||
let graph = quad
|
||||
.graph()
|
||||
.map(|graph| serialize_subject(identifier, graph, vocabulary));
|
||||
|
||||
if let Some(graph) = graph {
|
||||
format!("{subject} {predicate} {object} {graph} .\n")
|
||||
} else {
|
||||
format!("{subject} {predicate} {object} .\n")
|
||||
}
|
||||
}
|
||||
|
||||
fn serialize_subject<'a, N>(
|
||||
identifier: &N::BlankId,
|
||||
subject: &'a QuadSubject<N>,
|
||||
vocabulary: &'a N,
|
||||
) -> Cow<'a, str>
|
||||
where
|
||||
N: Vocabulary,
|
||||
N::BlankId: Eq,
|
||||
{
|
||||
if subject.is_blank() && matches_identifier::<N>(identifier, subject) {
|
||||
Cow::Borrowed("_:a")
|
||||
} else if subject.is_blank() {
|
||||
Cow::Borrowed("_:z")
|
||||
} else {
|
||||
Cow::Owned(subject.with(vocabulary).rdf_display().to_string())
|
||||
}
|
||||
}
|
||||
|
||||
fn serialize_object<'a, N>(
|
||||
identifier: &N::BlankId,
|
||||
object: &'a QuadValue<N>,
|
||||
vocabulary: &'a N,
|
||||
) -> Cow<'a, str>
|
||||
where
|
||||
N: Vocabulary,
|
||||
N::BlankId: Eq,
|
||||
{
|
||||
match object {
|
||||
Value::Literal(lit) => Cow::Owned(lit.with(vocabulary).rdf_display().to_string()),
|
||||
Value::Reference(subject) => serialize_subject(identifier, subject, vocabulary),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn quad_to_string<N>(quad: &NormalizingQuad<N>, vocabulary: &N) -> String
|
||||
where
|
||||
N: Vocabulary,
|
||||
|
@ -788,14 +693,6 @@ where
|
|||
}
|
||||
}
|
||||
|
||||
fn canonicalization_node_generator() -> Blank {
|
||||
make_issuer("c14n")
|
||||
}
|
||||
|
||||
fn make_issuer(prefix: &str) -> Blank {
|
||||
Blank::new_with_prefix(String::from(prefix))
|
||||
}
|
||||
|
||||
impl std::fmt::Display for Security {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "Aborted due to time complexity")
|
||||
|
@ -803,24 +700,3 @@ impl std::fmt::Display for Security {
|
|||
}
|
||||
|
||||
impl std::error::Error for Security {}
|
||||
|
||||
impl<B> Clone for Issuer<B>
|
||||
where
|
||||
B: Clone,
|
||||
{
|
||||
fn clone(&self) -> Self {
|
||||
Self {
|
||||
blank_node_generator: Blank::new_full(
|
||||
self.blank_node_generator.prefix().to_string(),
|
||||
self.blank_node_generator.count(),
|
||||
),
|
||||
issued_identifier_list: self.issued_identifier_list.clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<B> Default for Issuer<B> {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
|
95
src/output_dataset.rs
Normal file
95
src/output_dataset.rs
Normal file
|
@ -0,0 +1,95 @@
|
|||
use crate::input_dataset::{NormalizingQuad, QuadValue};
|
||||
use contextual::DisplayWithContext;
|
||||
use contextual::WithContext;
|
||||
use json_ld::rdf::Value;
|
||||
use rdf_types::{RdfDisplay, Vocabulary};
|
||||
|
||||
pub struct OutputDataset<N>
|
||||
where
|
||||
N: Vocabulary,
|
||||
{
|
||||
quads: Vec<NormalizingQuad<N>>,
|
||||
}
|
||||
|
||||
impl<N> OutputDataset<N>
|
||||
where
|
||||
N: Vocabulary,
|
||||
{
|
||||
pub(crate) fn new(mut quads: Vec<NormalizingQuad<N>>, vocabulary: &N) -> Self {
|
||||
quads.sort_by_cached_key(|quad| super::quad_to_string(quad, vocabulary));
|
||||
|
||||
Self { quads }
|
||||
}
|
||||
|
||||
pub fn quads(&self) -> &[NormalizingQuad<N>] {
|
||||
&self.quads
|
||||
}
|
||||
|
||||
pub fn into_quads(self) -> Vec<NormalizingQuad<N>> {
|
||||
self.quads
|
||||
}
|
||||
}
|
||||
|
||||
impl<N> DisplayWithContext<N> for OutputDataset<N>
|
||||
where
|
||||
N: Vocabulary,
|
||||
{
|
||||
fn fmt_with(&self, vocabulary: &N, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
for quad in &self.quads {
|
||||
quad.subject().with(vocabulary).rdf_fmt(f)?;
|
||||
write!(f, " ")?;
|
||||
quad.predicate().with(vocabulary).rdf_fmt(f)?;
|
||||
write!(f, " ")?;
|
||||
write_object(quad.object(), vocabulary, f)?;
|
||||
write!(f, " ")?;
|
||||
if let Some(graph) = quad.graph() {
|
||||
graph.with(vocabulary).rdf_fmt(f)?;
|
||||
write!(f, " ")?;
|
||||
}
|
||||
|
||||
writeln!(f, ".")?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl<N> std::fmt::Debug for OutputDataset<N>
|
||||
where
|
||||
N: Vocabulary,
|
||||
N::BlankId: std::fmt::Debug,
|
||||
N::Iri: std::fmt::Debug,
|
||||
{
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
f.debug_struct("OutputDataset")
|
||||
.field("quads", &self.quads)
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
|
||||
impl<N> Clone for OutputDataset<N>
|
||||
where
|
||||
N: Vocabulary,
|
||||
N::BlankId: Clone,
|
||||
N::Iri: Clone,
|
||||
{
|
||||
fn clone(&self) -> Self {
|
||||
Self {
|
||||
quads: self.quads.clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn write_object<'a, N>(
|
||||
object: &'a QuadValue<N>,
|
||||
vocabulary: &'a N,
|
||||
formatter: &mut std::fmt::Formatter,
|
||||
) -> std::fmt::Result
|
||||
where
|
||||
N: Vocabulary,
|
||||
{
|
||||
match object {
|
||||
Value::Literal(ref lit) => lit.with(vocabulary).rdf_fmt(formatter),
|
||||
Value::Reference(ref subject) => subject.with(vocabulary).rdf_fmt(formatter),
|
||||
}
|
||||
}
|
|
@ -1,4 +1,3 @@
|
|||
|
||||
use sha2::{Digest, Sha256};
|
||||
|
||||
impl super::Sha256 for Sha256 {
|
||||
|
@ -9,6 +8,6 @@ impl super::Sha256 for Sha256 {
|
|||
fn finalize_hex_and_reset(&mut self) -> crate::HexHash {
|
||||
let output = self.finalize_reset();
|
||||
|
||||
crate::HexHash(hex::encode(&output))
|
||||
crate::HexHash(hex::encode(output))
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue