Clean up files a bit

This commit is contained in:
asonix 2022-12-11 19:05:24 -06:00
parent 6d3c929c18
commit e906063432
6 changed files with 556 additions and 491 deletions

View file

@ -1,9 +1,9 @@
use contextual::WithContext;
use iref::{Iri, IriBuf};
use json_ld::{
syntax::{parse::MetaError, Parse, Value},
Flatten, JsonLdProcessor, Loader, RemoteDocument,
};
use json_ld_normalization::quad_to_string;
use locspan::{Location, Meta, Span};
use rdf_types::{generator::Blank, vocabulary::Index, IriVocabulary, IriVocabularyMut};
use reqwest::Client;
@ -15,6 +15,80 @@ use std::{
sync::{Arc, RwLock},
};
#[tokio::main]
async fn main() -> Result<(), AnyError> {
let cache = Cache::new();
let client = Client::builder()
.user_agent("json-ld-playground")
.build()
.expect("Successful client");
let iris = [
iri!("https://relay.asonix.dog/actor"),
iri!("https://masto.asonix.dog/actor"),
iri!("https://masto.asonix.dog/users/asonix"),
iri!("https://masto.asonix.dog/users/kumu"),
iri!("https://yiff.life/users/6my"),
iri!("https://meow.social/users/6my"),
];
for iri in iris {
let document = client
.get(iri.as_str())
.header("accept", "application/activity+json")
.send()
.await?
.text()
.await?;
normalize_document(cache.clone(), client.clone(), iri, &document).await?;
}
Ok(())
}
async fn normalize_document(
cache: Cache,
client: Client,
iri: Iri<'static>,
document: &str,
) -> Result<(), AnyError> {
let mut vocabulary: rdf_types::IndexVocabulary = rdf_types::IndexVocabulary::new();
let iri_index = vocabulary.insert(iri);
let input = RemoteDocument::new(
Some(iri_index.clone()),
Some("application/activity+json".parse()?),
Value::parse_str(document, |span| Location::new(iri_index.clone(), span))
.expect("Failed to parse"),
);
let mut loader = ReqwestLoader::with_default_parser(cache, client);
let expanded = input
.expand_with(&mut vocabulary, &mut loader)
.await
.expect("Failed to expand");
let mut pre_gen = Blank::new().with_metadata(Location::new(iri_index.clone(), Span::default()));
let flattened = expanded
.flatten_with(&mut vocabulary, &mut pre_gen, true)
.expect("Failed to flatten");
let output_document = json_ld_normalization::normalize::<_, _, _, sha2::Sha256>(
&mut vocabulary,
iri_index,
flattened.0,
true,
)?;
println!("{}", output_document.with(&vocabulary));
Ok(())
}
const ACTIVITYSTREAMS: &'static str = "https://www.w3.org/ns/activitystreams";
const SECURITY: &'static str = "https://w3id.org/security/v1";
@ -161,82 +235,3 @@ impl<I: Send + Sync, T: Send, M: Send, E> Loader<I, M> for ReqwestLoader<I, M, T
})
}
}
#[tokio::main]
async fn main() -> Result<(), AnyError> {
let cache = Cache::new();
let client = Client::builder()
.user_agent("json-ld-playground")
.build()
.expect("Successful client");
let iris = [
iri!("https://relay.asonix.dog/actor"),
iri!("https://masto.asonix.dog/actor"),
iri!("https://masto.asonix.dog/users/asonix"),
];
for iri in iris {
let document = client
.get(iri.as_str())
.header("accept", "application/activity+json")
.send()
.await?
.text()
.await?;
do_the_thing(cache.clone(), client.clone(), iri, &document).await?;
}
Ok(())
}
async fn do_the_thing(
cache: Cache,
client: Client,
iri: Iri<'static>,
document: &str,
) -> Result<(), AnyError> {
let mut vocabulary: rdf_types::IndexVocabulary = rdf_types::IndexVocabulary::new();
let iri_index = vocabulary.insert(iri);
let input = RemoteDocument::new(
Some(iri_index.clone()),
Some("application/activity+json".parse()?),
Value::parse_str(document, |span| Location::new(iri_index.clone(), span))
.expect("Failed to parse"),
);
let mut loader = ReqwestLoader::with_default_parser(cache, client);
let expanded = input
.expand_with(&mut vocabulary, &mut loader)
.await
.expect("Failed to expand");
let mut pre_gen = Blank::new().with_metadata(Location::new(iri_index.clone(), Span::default()));
let flattened = expanded
.flatten_with(&mut vocabulary, &mut pre_gen, true)
.expect("Failed to flatten");
let output_document = json_ld_normalization::normalize::<_, _, _, sha2::Sha256>(
&mut vocabulary,
iri_index,
flattened.0,
true,
)?;
let mut strings = output_document
.quads
.iter()
.map(|quad| quad_to_string(quad, &vocabulary))
.collect::<Vec<_>>();
strings.sort();
println!("{}", strings.join(""));
Ok(())
}

View file

@ -51,7 +51,7 @@ where
subject.into_owned(),
predicate.into_owned(),
object,
graph.map(|graph| graph.clone()),
graph.cloned(),
)
})
.collect();

100
src/issuer.rs Normal file
View file

@ -0,0 +1,100 @@
use indexmap::IndexMap;
use json_ld::ValidId as Subject;
use rdf_types::{generator::Blank, BlankIdVocabularyMut, Vocabulary};
use std::hash::Hash;
pub(crate) struct Issuer<B> {
// Identifier Prefix and Identifier Counter
blank_node_generator: Blank,
// Issued Identifier List
issued_identifier_list: IndexMap<B, B>,
}
impl<B> Issuer<B> {
pub(crate) fn new() -> Self {
Self {
blank_node_generator: canonicalization_node_generator(),
issued_identifier_list: Default::default(),
}
}
pub(crate) fn new_with_prefix(prefix: &str) -> Self {
Self {
blank_node_generator: make_issuer(prefix),
issued_identifier_list: Default::default(),
}
}
pub(crate) fn iter(&self) -> impl Iterator<Item = (&B, &B)> {
self.issued_identifier_list.iter()
}
pub(crate) fn get(&self, identifier: &B) -> Option<&B>
where
B: Eq + Hash,
{
self.issued_identifier_list.get(identifier)
}
pub(crate) fn contains(&self, identifier: &B) -> bool
where
B: Eq + Hash,
{
self.issued_identifier_list.contains_key(identifier)
}
pub(crate) fn issue_identifier<N>(&mut self, identifier: B, vocabulary: &mut N) -> N::BlankId
where
N: Vocabulary<BlankId = B> + BlankIdVocabularyMut,
B: Eq + Hash + Clone,
{
use rdf_types::Generator;
// step 1
if let Some(blank) = self.get(&identifier) {
return blank.clone();
}
// step 2 and 4
let blank = match self.blank_node_generator.next(vocabulary) {
Subject::Blank(blank) => blank,
Subject::Iri(_) => unreachable!("Blank ID generators should only generate blank IDs"),
};
// step 3
self.issued_identifier_list
.insert(identifier, blank.clone());
// step 5
blank
}
}
fn canonicalization_node_generator() -> Blank {
make_issuer("c14n")
}
fn make_issuer(prefix: &str) -> Blank {
Blank::new_with_prefix(String::from(prefix))
}
impl<B> Clone for Issuer<B>
where
B: Clone,
{
fn clone(&self) -> Self {
Self {
blank_node_generator: Blank::new_full(
self.blank_node_generator.prefix().to_string(),
self.blank_node_generator.count(),
),
issued_identifier_list: self.issued_identifier_list.clone(),
}
}
}
impl<B> Default for Issuer<B> {
fn default() -> Self {
Self::new()
}
}

View file

@ -1,5 +1,4 @@
use contextual::WithContext;
use indexmap::IndexMap;
use itertools::Itertools;
use json_ld::{rdf::Value, RdfQuads, ValidId as Subject};
use locspan::{Location, Span};
@ -11,10 +10,15 @@ use std::{
};
mod input_dataset;
mod issuer;
mod output_dataset;
#[cfg(feature = "rustcrypto")]
mod sha2_impls;
use input_dataset::{InputDataset, NormalizingQuad, Position, QuadSubject, QuadValue};
use issuer::Issuer;
pub use output_dataset::OutputDataset;
#[derive(Clone, Debug)]
pub struct Security;
@ -28,27 +32,13 @@ pub trait Sha256 {
fn finalize_hex_and_reset(&mut self) -> HexHash;
}
pub struct OutputDataset<N>
where
N: Vocabulary,
{
pub quads: Vec<NormalizingQuad<N>>,
}
pub struct Issuer<B> {
// Identifier Prefix and Identifier Counter
blank_node_generator: Blank,
// Issued Identifier List
issued_identifier_list: IndexMap<B, B>,
}
pub struct CanonicalizationState<'a, N, S>
where
N: Vocabulary,
{
sha256: S,
vocabulary: &'a mut N,
canonical_vocabulary: &'a mut N,
canonical_issuer: Issuer<N::BlankId>,
@ -77,78 +67,18 @@ where
)
}
impl<B> Issuer<B> {
fn new() -> Self {
Self {
blank_node_generator: canonicalization_node_generator(),
issued_identifier_list: Default::default(),
}
}
fn new_with_prefix(prefix: &str) -> Self {
Self {
blank_node_generator: make_issuer(prefix),
issued_identifier_list: Default::default(),
}
}
fn iter(&self) -> impl Iterator<Item = (&B, &B)> {
self.issued_identifier_list.iter()
}
fn get(&self, identifier: &B) -> Option<&B>
where
B: Eq + Hash,
{
self.issued_identifier_list.get(identifier)
}
fn contains(&self, identifier: &B) -> bool
where
B: Eq + Hash,
{
self.issued_identifier_list.contains_key(identifier)
}
fn issue_identifier<N>(&mut self, identifier: B, vocabulary: &mut N) -> N::BlankId
where
N: Vocabulary<BlankId = B> + BlankIdVocabularyMut,
B: Eq + Hash + Clone,
{
use rdf_types::Generator;
// step 1
if let Some(blank) = self.get(&identifier) {
return blank.clone();
}
// step 2 and 4
let blank = match self.blank_node_generator.next(vocabulary) {
Subject::Blank(blank) => blank,
Subject::Iri(_) => unreachable!("Blank ID generators should only generate blank IDs"),
};
// step 3
self.issued_identifier_list
.insert(identifier, blank.clone());
// step 5
blank
}
}
impl<'a, N, S> CanonicalizationState<'a, N, S>
where
N: Vocabulary,
{
/// Step 1
fn new(vocabulary: &'a mut N) -> Self
fn new(canonical_vocabulary: &'a mut N) -> Self
where
S: Default,
{
Self {
sha256: S::default(),
vocabulary,
canonical_vocabulary,
canonical_issuer: Issuer::default(),
blank_node_to_quads: Default::default(),
hash_to_blank_nodes: Default::default(),
@ -195,7 +125,7 @@ where
{
let mut pre_gen = Blank::new().with_metadata(Location::new(document_id, Span::default()));
InputDataset::from_rdf_quads(rdf_quads, &mut self.vocabulary, &mut pre_gen)
InputDataset::from_rdf_quads(rdf_quads, self.canonical_vocabulary, &mut pre_gen)
}
// Step 2
@ -211,7 +141,7 @@ where
quad.graph().and_then(subject_as_blank_id::<N>),
]
.into_iter()
.filter_map(|opt| opt);
.flatten();
for blank_id in iter {
self.blank_node_to_quads
@ -230,8 +160,8 @@ where
N: VocabularyMut + BlankIdVocabularyMut,
{
// step 3
let mut non_normalized_identifiers: HashSet<&N::BlankId> =
self.blank_node_to_quads.keys().collect();
let mut non_normalized_identifiers: HashSet<N::BlankId> =
self.blank_node_to_quads.keys().cloned().collect();
// step 4
let mut simple = true;
@ -247,13 +177,7 @@ where
// step 5.3
for identifier in non_normalized_identifiers.iter() {
// step 5.3.1
let hash = hash_first_degree_quads(
&self.blank_node_to_quads,
self.vocabulary,
(*identifier).clone(),
input_dataset,
&mut self.sha256,
);
let hash = self.hash_first_degree_quads((*identifier).clone(), input_dataset);
// step 5.3.2
self.hash_to_blank_nodes
@ -277,7 +201,7 @@ where
// step 5.4.2
self.canonical_issuer
.issue_identifier(identifier, self.vocabulary);
.issue_identifier(identifier, self.canonical_vocabulary);
}
// step 5.4.5
@ -300,8 +224,7 @@ where
N::BlankId: Clone + Eq + Hash,
S: Sha256,
{
let hash_to_blank_nodes =
std::mem::replace(&mut self.hash_to_blank_nodes, Default::default());
let hash_to_blank_nodes = std::mem::take(&mut self.hash_to_blank_nodes);
for (_, identifier_list) in hash_to_blank_nodes {
// step 6.1
@ -327,15 +250,11 @@ where
temporary_issuer.issue_identifier(identifier.clone(), &mut temporary_vocabulary);
// step 6.2.4
let hash = hash_n_degree_quads(
&self.blank_node_to_quads,
let hash = self.hash_n_degree_quads(
&mut temporary_vocabulary,
&self.vocabulary,
&mut temporary_issuer,
&self.canonical_issuer,
identifier,
input_dataset,
&mut self.sha256,
);
hash_path_list.push((hash, temporary_issuer));
@ -346,7 +265,7 @@ where
// step 6.3.1
for (existing_identifier, _) in temporary_issuer.iter() {
self.canonical_issuer
.issue_identifier(existing_identifier.clone(), self.vocabulary);
.issue_identifier(existing_identifier.clone(), self.canonical_vocabulary);
}
}
}
@ -380,7 +299,7 @@ where
})
.collect();
OutputDataset { quads }
OutputDataset::new(quads, self.canonical_vocabulary)
}
fn translate_object(&self, object: &QuadValue<N>) -> Option<QuadValue<N>>
@ -406,168 +325,304 @@ where
}
}
}
}
fn hash_n_degree_quads<N, S>(
blank_node_to_quads: &HashMap<N::BlankId, HashSet<Position>>,
vocabulary: &mut N,
canon_vocabulary: &N,
issuer: &mut Issuer<N::BlankId>,
canon_issuer: &Issuer<N::BlankId>,
identifier: N::BlankId,
input_dataset: &InputDataset<N>,
sha256: &mut S,
) -> HexHash
where
N: Vocabulary + VocabularyMut,
N::BlankId: Clone + Eq + Hash,
S: Sha256,
{
// step 1
let mut hash_to_related_blank_nodes: HashMap<HexHash, HashSet<N::BlankId>> = HashMap::new();
fn hash_n_degree_quads(
&mut self,
vocabulary: &mut N,
issuer: &mut Issuer<N::BlankId>,
identifier: N::BlankId,
input_dataset: &InputDataset<N>,
) -> HexHash
where
N: VocabularyMut,
N::BlankId: Clone + Eq + Hash,
S: Sha256,
{
// step 1
let mut hash_to_related_blank_nodes: HashMap<HexHash, HashSet<N::BlankId>> = HashMap::new();
// step 2
if let Some(quad_positions) = blank_node_to_quads.get(&identifier) {
// step 3
for quad_position in quad_positions {
let quad = input_dataset
.get(*quad_position)
.expect("Positions are created from the input dataset");
// step 2
if let Some(quad_positions) = self.blank_node_to_quads.get(&identifier).cloned() {
// step 3
for quad_position in quad_positions {
let quad = input_dataset
.get(quad_position)
.expect("Positions are created from the input dataset");
// step 3.1
let iter = [
("s", subject_as_blank_id::<N>(quad.subject())),
("o", object_as_blank_id::<N>(quad.object())),
("g", quad.graph().and_then(subject_as_blank_id::<N>)),
]
.into_iter()
.filter_map(|(position, opt)| Some((position, opt?)))
.filter(|(_, blank_id)| identifier != **blank_id);
// step 3.1
let iter = [
("s", subject_as_blank_id::<N>(quad.subject())),
("o", object_as_blank_id::<N>(quad.object())),
("g", quad.graph().and_then(subject_as_blank_id::<N>)),
]
.into_iter()
.filter_map(|(position, opt)| Some((position, opt?)))
.filter(|(_, blank_id)| identifier != **blank_id);
for (position, related) in iter {
// step 3.1.1
let hash = hash_related_blank_node(
blank_node_to_quads,
canon_issuer,
canon_vocabulary,
issuer,
vocabulary,
related,
quad,
position,
input_dataset,
sha256,
);
for (position, related) in iter {
// step 3.1.1
let hash = self.hash_related_blank_node(
issuer,
vocabulary,
related,
quad,
position,
input_dataset,
);
// step 3.1.2
hash_to_related_blank_nodes
.entry(hash)
.or_default()
.insert(related.clone());
// step 3.1.2
hash_to_related_blank_nodes
.entry(hash)
.or_default()
.insert(related.clone());
}
}
} else {
eprintln!("No quad positions");
}
} else {
eprintln!("No quad positions");
}
// step 4
let mut data_to_hash = String::new();
// step 4
let mut data_to_hash = String::new();
// step 5
for (related_hash, blank_node_list) in hash_to_related_blank_nodes {
// step 5.1
data_to_hash += &related_hash.0;
// step 5
for (related_hash, blank_node_list) in hash_to_related_blank_nodes {
// step 5.1
data_to_hash += &related_hash.0;
// step 5.2
let mut chosen_path = String::new();
// step 5.2
let mut chosen_path = String::new();
// step 5.3
let mut chosen_issuer = Default::default();
// step 5.3
let mut chosen_issuer = Default::default();
'permute: for permutation in permute(blank_node_list) {
// step 5.4.1
let mut issuer_copy = issuer.clone();
// step 5.4.2
let mut path = String::new();
// step 5.4.3
let mut recursion_list = HashSet::new();
'permute: for permutation in permute(blank_node_list) {
// step 5.4.1
let mut issuer_copy = issuer.clone();
// step 5.4.2
let mut path = String::new();
// step 5.4.3
let mut recursion_list = HashSet::new();
// step 5.4.4
for related in permutation {
if let Some(blank) = canon_issuer.get(&related) {
// step 5.4.4.1
if let Some(blank_id) = canon_vocabulary.blank_id(blank) {
path += &blank_id.to_string();
// step 5.4.4
for related in permutation {
if let Some(blank) = self.canonical_issuer.get(&related) {
// step 5.4.4.1
if let Some(blank_id) = self.canonical_vocabulary.blank_id(blank) {
path += blank_id;
} else {
eprintln!("No blank in vocabulary");
}
} else {
// step 5.4.4.2
// step 5.4.4.2.1
recursion_list.insert(related.clone());
// step 5.4.4.2.2
issuer_copy.issue_identifier(related, vocabulary);
}
// step 5.4.4.3
if !chosen_path.is_empty()
&& path.len() >= chosen_path.len()
&& path > chosen_path
{
continue 'permute;
}
}
// step 5.4.5
for related in recursion_list {
// step 5.4.5.1
let result = self.hash_n_degree_quads(
vocabulary,
&mut issuer_copy,
related.clone(),
input_dataset,
);
// step 5.4.5.2
let new_blank = issuer_copy.issue_identifier(related, vocabulary);
if let Some(blank_id) = vocabulary.blank_id(&new_blank) {
path += blank_id;
// step 5.4.5.3
path += "<";
path += result.0.as_str();
path += ">";
} else {
eprintln!("No blank in vocabulary");
}
} else {
// step 5.4.4.2
// step 5.4.4.2.1
recursion_list.insert(related.clone());
// step 5.4.4.2.2
issuer_copy.issue_identifier(related, vocabulary);
// step 5.4.5.4 is a no-op
// step 5.4.5.5
if !chosen_path.is_empty()
&& path.len() >= chosen_path.len()
&& path > chosen_path
{
continue 'permute;
}
}
// step 5.4.4.3
if !chosen_path.is_empty() && path.len() >= chosen_path.len() && path > chosen_path
{
continue 'permute;
if chosen_path.is_empty() || path < chosen_path {
chosen_path = path;
chosen_issuer = issuer_copy;
}
}
// step 5.4.5
for related in recursion_list {
// step 5.4.5.1
let result = hash_n_degree_quads(
blank_node_to_quads,
vocabulary,
canon_vocabulary,
&mut issuer_copy,
canon_issuer,
related.clone(),
input_dataset,
sha256,
);
// step 5.4.5.2
let new_blank = issuer_copy.issue_identifier(related, vocabulary);
// step 5.5
data_to_hash += &chosen_path;
if let Some(blank_id) = vocabulary.blank_id(&new_blank) {
path += &blank_id.to_string();
// step 5.6
std::mem::swap(issuer, &mut chosen_issuer);
}
// step 5.4.5.3
path += "<";
path += result.0.as_str();
path += ">";
} else {
eprintln!("No blank in vocabulary");
}
// step 6
self.sha256.update(data_to_hash.as_bytes());
self.sha256.finalize_hex_and_reset()
}
// step 5.4.5.4 is a no-op
fn hash_related_blank_node(
&mut self,
issuer: &Issuer<N::BlankId>,
vocabulary: &N,
related: &N::BlankId,
quad: &NormalizingQuad<N>,
position: &str,
input_dataset: &InputDataset<N>,
) -> HexHash
where
N::BlankId: Clone + Eq + Hash,
S: Sha256,
{
// step 1
let identifier = if let Some(blank_id) = self.canonical_issuer.get(related) {
let blank = self
.canonical_vocabulary
.blank_id(blank_id)
.expect("No blank in vocabulary");
blank.to_string()
} else if let Some(blank_id) = issuer.get(related) {
let blank = vocabulary
.blank_id(blank_id)
.expect("No blank in vocabulary");
blank.to_string()
} else {
self.hash_first_degree_quads(related.clone(), input_dataset)
.0
};
// step 5.4.5.5
if !chosen_path.is_empty() && path.len() >= chosen_path.len() && path > chosen_path
{
continue 'permute;
}
}
// step 2
let mut input = String::from(position);
if chosen_path.is_empty() || path < chosen_path {
chosen_path = path;
chosen_issuer = issuer_copy;
// step 3
if position != "g" {
input += "<";
input += quad.predicate().with(&*self.canonical_vocabulary).as_str();
input += ">";
}
// step 4
input += &identifier;
// step 5
self.sha256.update(input.as_bytes());
self.sha256.finalize_hex_and_reset()
}
fn hash_first_degree_quads(
&mut self,
identifier: N::BlankId,
input_dataset: &InputDataset<N>,
) -> HexHash
where
N::BlankId: Eq + Hash + Clone,
S: Sha256,
{
// Step 1
let mut nquads = Vec::new();
// step 2
if let Some(quad_positions) = self.blank_node_to_quads.get(&identifier).cloned() {
// step 3
for quad_position in quad_positions {
let quad = input_dataset
.get(quad_position)
.expect("Positions are created from the input dataset");
// step 3.1, 3.1.1, and 3.1.1.1
let serizlied = self.serialize_quad(&identifier, quad);
nquads.push(serizlied);
}
}
// step 5.5
data_to_hash += &chosen_path;
// step 4
nquads.sort();
// step 5.6
std::mem::swap(issuer, &mut chosen_issuer);
// step 5
let joined = nquads.join("");
self.sha256.update(joined.as_bytes());
self.sha256.finalize_hex_and_reset()
}
// step 6
sha256.update(data_to_hash.as_bytes());
sha256.finalize_hex_and_reset()
fn serialize_quad(&self, identifier: &N::BlankId, quad: &NormalizingQuad<N>) -> String
where
N::BlankId: Clone + Eq,
{
let subject = self.serialize_subject(identifier, quad.subject());
let predicate = quad
.predicate()
.with(&*self.canonical_vocabulary)
.rdf_display()
.to_string();
let object = self.serialize_object(identifier, quad.object());
let graph = quad
.graph()
.map(|graph| self.serialize_subject(identifier, graph));
if let Some(graph) = graph {
format!("{subject} {predicate} {object} {graph} .\n")
} else {
format!("{subject} {predicate} {object} .\n")
}
}
fn serialize_subject(
&'a self,
identifier: &N::BlankId,
subject: &'a QuadSubject<N>,
) -> Cow<'a, str>
where
N::BlankId: Eq,
{
if subject.is_blank() && matches_identifier::<N>(identifier, subject) {
Cow::Borrowed("_:a")
} else if subject.is_blank() {
Cow::Borrowed("_:z")
} else {
Cow::Owned(
subject
.with(&*self.canonical_vocabulary)
.rdf_display()
.to_string(),
)
}
}
fn serialize_object(&'a self, identifier: &N::BlankId, object: &'a QuadValue<N>) -> Cow<'a, str>
where
N::BlankId: Eq,
{
match object {
Value::Literal(lit) => Cow::Owned(
lit.with(&*self.canonical_vocabulary)
.rdf_display()
.to_string(),
),
Value::Reference(subject) => self.serialize_subject(identifier, subject),
}
}
}
fn permute<B>(set: HashSet<B>) -> impl Iterator<Item = Vec<B>>
@ -579,156 +634,6 @@ where
set.into_iter().permutations(len)
}
fn hash_related_blank_node<N, S>(
blank_node_to_quads: &HashMap<N::BlankId, HashSet<Position>>,
canon_issuer: &Issuer<N::BlankId>,
canon_vocabulary: &N,
issuer: &Issuer<N::BlankId>,
vocabulary: &N,
related: &N::BlankId,
quad: &NormalizingQuad<N>,
position: &str,
input_dataset: &InputDataset<N>,
sha256: &mut S,
) -> HexHash
where
N: Vocabulary,
N::BlankId: Clone + Eq + Hash,
S: Sha256,
{
// step 1
let identifier = if let Some(blank_id) = canon_issuer.get(related) {
let blank = canon_vocabulary
.blank_id(blank_id)
.expect("No blank in vocabulary");
blank.to_string()
} else if let Some(blank_id) = issuer.get(related) {
let blank = vocabulary
.blank_id(blank_id)
.expect("No blank in vocabulary");
blank.to_string()
} else {
hash_first_degree_quads(
blank_node_to_quads,
canon_vocabulary,
related.clone(),
input_dataset,
sha256,
)
.0
};
// step 2
let mut input = String::from(position);
// step 3
if position != "g" {
input += "<";
input += &quad.predicate().with(canon_vocabulary).to_string();
input += ">";
}
// step 4
input += &identifier;
// step 5
sha256.update(input.as_bytes());
sha256.finalize_hex_and_reset()
}
fn hash_first_degree_quads<N, S>(
blank_node_to_quads: &HashMap<N::BlankId, HashSet<Position>>,
vocabulary: &N,
identifier: N::BlankId,
input_dataset: &InputDataset<N>,
sha256: &mut S,
) -> HexHash
where
N: Vocabulary,
N::BlankId: Eq + Hash + Clone,
S: Sha256,
{
// Step 1
let mut nquads = Vec::new();
// step 2
if let Some(quad_positions) = blank_node_to_quads.get(&identifier) {
// step 3
for quad_position in quad_positions {
let quad = input_dataset
.get(*quad_position)
.expect("Positions are created from the input dataset");
// step 3.1, 3.1.1, and 3.1.1.1
let serizlied = serialize_quad(&identifier, quad, vocabulary);
nquads.push(serizlied);
}
}
// step 4
nquads.sort();
// step 5
let joined = nquads.join("");
sha256.update(joined.as_bytes());
sha256.finalize_hex_and_reset()
}
fn serialize_quad<N>(identifier: &N::BlankId, quad: &NormalizingQuad<N>, vocabulary: &N) -> String
where
N: Vocabulary,
N::BlankId: Clone + Eq,
{
let subject = serialize_subject(identifier, quad.subject(), vocabulary);
let predicate = quad.predicate().with(vocabulary).rdf_display().to_string();
let object = serialize_object(identifier, quad.object(), vocabulary);
let graph = quad
.graph()
.map(|graph| serialize_subject(identifier, graph, vocabulary));
if let Some(graph) = graph {
format!("{subject} {predicate} {object} {graph} .\n")
} else {
format!("{subject} {predicate} {object} .\n")
}
}
fn serialize_subject<'a, N>(
identifier: &N::BlankId,
subject: &'a QuadSubject<N>,
vocabulary: &'a N,
) -> Cow<'a, str>
where
N: Vocabulary,
N::BlankId: Eq,
{
if subject.is_blank() && matches_identifier::<N>(identifier, subject) {
Cow::Borrowed("_:a")
} else if subject.is_blank() {
Cow::Borrowed("_:z")
} else {
Cow::Owned(subject.with(vocabulary).rdf_display().to_string())
}
}
fn serialize_object<'a, N>(
identifier: &N::BlankId,
object: &'a QuadValue<N>,
vocabulary: &'a N,
) -> Cow<'a, str>
where
N: Vocabulary,
N::BlankId: Eq,
{
match object {
Value::Literal(lit) => Cow::Owned(lit.with(vocabulary).rdf_display().to_string()),
Value::Reference(subject) => serialize_subject(identifier, subject, vocabulary),
}
}
pub fn quad_to_string<N>(quad: &NormalizingQuad<N>, vocabulary: &N) -> String
where
N: Vocabulary,
@ -788,14 +693,6 @@ where
}
}
fn canonicalization_node_generator() -> Blank {
make_issuer("c14n")
}
fn make_issuer(prefix: &str) -> Blank {
Blank::new_with_prefix(String::from(prefix))
}
impl std::fmt::Display for Security {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "Aborted due to time complexity")
@ -803,24 +700,3 @@ impl std::fmt::Display for Security {
}
impl std::error::Error for Security {}
impl<B> Clone for Issuer<B>
where
B: Clone,
{
fn clone(&self) -> Self {
Self {
blank_node_generator: Blank::new_full(
self.blank_node_generator.prefix().to_string(),
self.blank_node_generator.count(),
),
issued_identifier_list: self.issued_identifier_list.clone(),
}
}
}
impl<B> Default for Issuer<B> {
fn default() -> Self {
Self::new()
}
}

95
src/output_dataset.rs Normal file
View file

@ -0,0 +1,95 @@
use crate::input_dataset::{NormalizingQuad, QuadValue};
use contextual::DisplayWithContext;
use contextual::WithContext;
use json_ld::rdf::Value;
use rdf_types::{RdfDisplay, Vocabulary};
pub struct OutputDataset<N>
where
N: Vocabulary,
{
quads: Vec<NormalizingQuad<N>>,
}
impl<N> OutputDataset<N>
where
N: Vocabulary,
{
pub(crate) fn new(mut quads: Vec<NormalizingQuad<N>>, vocabulary: &N) -> Self {
quads.sort_by_cached_key(|quad| super::quad_to_string(quad, vocabulary));
Self { quads }
}
pub fn quads(&self) -> &[NormalizingQuad<N>] {
&self.quads
}
pub fn into_quads(self) -> Vec<NormalizingQuad<N>> {
self.quads
}
}
impl<N> DisplayWithContext<N> for OutputDataset<N>
where
N: Vocabulary,
{
fn fmt_with(&self, vocabulary: &N, f: &mut std::fmt::Formatter) -> std::fmt::Result {
for quad in &self.quads {
quad.subject().with(vocabulary).rdf_fmt(f)?;
write!(f, " ")?;
quad.predicate().with(vocabulary).rdf_fmt(f)?;
write!(f, " ")?;
write_object(quad.object(), vocabulary, f)?;
write!(f, " ")?;
if let Some(graph) = quad.graph() {
graph.with(vocabulary).rdf_fmt(f)?;
write!(f, " ")?;
}
writeln!(f, ".")?;
}
Ok(())
}
}
impl<N> std::fmt::Debug for OutputDataset<N>
where
N: Vocabulary,
N::BlankId: std::fmt::Debug,
N::Iri: std::fmt::Debug,
{
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("OutputDataset")
.field("quads", &self.quads)
.finish()
}
}
impl<N> Clone for OutputDataset<N>
where
N: Vocabulary,
N::BlankId: Clone,
N::Iri: Clone,
{
fn clone(&self) -> Self {
Self {
quads: self.quads.clone(),
}
}
}
fn write_object<'a, N>(
object: &'a QuadValue<N>,
vocabulary: &'a N,
formatter: &mut std::fmt::Formatter,
) -> std::fmt::Result
where
N: Vocabulary,
{
match object {
Value::Literal(ref lit) => lit.with(vocabulary).rdf_fmt(formatter),
Value::Reference(ref subject) => subject.with(vocabulary).rdf_fmt(formatter),
}
}

View file

@ -1,4 +1,3 @@
use sha2::{Digest, Sha256};
impl super::Sha256 for Sha256 {
@ -9,6 +8,6 @@ impl super::Sha256 for Sha256 {
fn finalize_hex_and_reset(&mut self) -> crate::HexHash {
let output = self.finalize_reset();
crate::HexHash(hex::encode(&output))
crate::HexHash(hex::encode(output))
}
}