Implement hash_first_degree_quads
This commit is contained in:
parent
0cfa8376e9
commit
b4bd3d6781
165
src/lib.rs
165
src/lib.rs
|
@ -1,3 +1,4 @@
|
|||
use contextual::WithContext;
|
||||
use indexmap::IndexMap;
|
||||
use iref::IriBuf;
|
||||
use json_ld::{ExpandedDocument, ValidId as Subject};
|
||||
|
@ -27,10 +28,6 @@ pub trait Sha256 {
|
|||
fn finalize_hex_and_reset(&mut self) -> HexHash;
|
||||
}
|
||||
|
||||
pub trait Hashable<Context> {
|
||||
fn as_bytes_with(&self, context: Context) -> &[u8];
|
||||
}
|
||||
|
||||
type Expanded<N> = Meta<
|
||||
ExpandedDocument<
|
||||
<N as IriVocabulary>::Iri,
|
||||
|
@ -49,10 +46,12 @@ where
|
|||
pub quads: Vec<NormalizingQuad<N>>,
|
||||
}
|
||||
|
||||
pub struct CanonicalizationState<N>
|
||||
pub struct CanonicalizationState<N, S>
|
||||
where
|
||||
N: Vocabulary,
|
||||
{
|
||||
sha256: S,
|
||||
|
||||
// Identifier Prefix and Identifier Counter
|
||||
blank_node_generator: Blank,
|
||||
vocabulary: N,
|
||||
|
@ -64,13 +63,17 @@ where
|
|||
hash_to_blank_nodes: BTreeMap<HexHash, HashSet<N::BlankId>>,
|
||||
}
|
||||
|
||||
impl<N> CanonicalizationState<N>
|
||||
impl<N, S> CanonicalizationState<N, S>
|
||||
where
|
||||
N: Vocabulary,
|
||||
{
|
||||
/// Step 1
|
||||
pub fn new(vocabulary: N) -> Self {
|
||||
pub fn new(vocabulary: N) -> Self
|
||||
where
|
||||
S: Default,
|
||||
{
|
||||
Self {
|
||||
sha256: S::default(),
|
||||
blank_node_generator: canonicalization_node_generator(),
|
||||
vocabulary,
|
||||
issued_identifier_list: Default::default(),
|
||||
|
@ -81,6 +84,7 @@ where
|
|||
|
||||
pub fn normalize(&mut self, document_id: IriBuf, expanded: Expanded<N>) -> OutputDataset<N>
|
||||
where
|
||||
S: Sha256,
|
||||
N: VocabularyMut + Default,
|
||||
N::Iri: Clone + Eq + Hash + Send + Sync,
|
||||
N::BlankId: Clone + Eq + Hash + Send + Sync + for<'a> Borrow<&'a N::BlankId>,
|
||||
|
@ -91,7 +95,7 @@ where
|
|||
self.find_blank_nodes(&input_dataset);
|
||||
|
||||
// Step 3, 4, and 5
|
||||
self.issue_simple_canonical_identifiers();
|
||||
self.issue_simple_canonical_identifiers(&input_dataset);
|
||||
|
||||
// Step 6
|
||||
self.issue_complex_canonical_identifiers();
|
||||
|
@ -131,8 +135,9 @@ where
|
|||
}
|
||||
|
||||
// Step 3, 4, and 5
|
||||
fn issue_simple_canonical_identifiers(&mut self)
|
||||
fn issue_simple_canonical_identifiers(&mut self, input_dataset: &InputDataset<N>)
|
||||
where
|
||||
S: Sha256,
|
||||
N::BlankId: Clone + Eq + Hash,
|
||||
N: VocabularyMut + BlankIdVocabularyMut,
|
||||
{
|
||||
|
@ -154,7 +159,13 @@ where
|
|||
// step 5.3
|
||||
for identifier in non_normalized_identifiers.iter() {
|
||||
// step 5.3.1
|
||||
let hash = self.hash_first_degree_quads((*identifier).clone());
|
||||
let hash = hash_first_degree_quads(
|
||||
&self.blank_node_to_quads,
|
||||
&self.vocabulary,
|
||||
(*identifier).clone(),
|
||||
input_dataset,
|
||||
&mut self.sha256,
|
||||
);
|
||||
|
||||
// step 5.3.2
|
||||
self.hash_to_blank_nodes
|
||||
|
@ -296,10 +307,6 @@ where
|
|||
}
|
||||
}
|
||||
|
||||
fn hash_first_degree_quads(&self, identifier: N::BlankId) -> HexHash {
|
||||
todo!()
|
||||
}
|
||||
|
||||
fn hash_n_degree_quads(
|
||||
&self,
|
||||
identifier: N::BlankId,
|
||||
|
@ -311,6 +318,91 @@ where
|
|||
}
|
||||
}
|
||||
|
||||
fn hash_first_degree_quads<N, S>(
|
||||
blank_node_to_quads: &HashMap<N::BlankId, HashSet<Position>>,
|
||||
vocabulary: &N,
|
||||
identifier: N::BlankId,
|
||||
input_dataset: &InputDataset<N>,
|
||||
sha256: &mut S,
|
||||
) -> HexHash
|
||||
where
|
||||
N: Vocabulary,
|
||||
N::BlankId: Eq + Hash + Clone,
|
||||
S: Sha256,
|
||||
{
|
||||
// Step 1
|
||||
let mut nquads = Vec::new();
|
||||
|
||||
// step 2
|
||||
if let Some(quads) = blank_node_to_quads.get(&identifier) {
|
||||
// step 3
|
||||
for quad_position in quads {
|
||||
let quad = input_dataset
|
||||
.get(*quad_position)
|
||||
.expect("Positions are created from the input dataset");
|
||||
|
||||
// step 3.1, 3.1.1, and 3.1.1.1
|
||||
let serizlied = serialize_quad(&identifier, quad, vocabulary);
|
||||
|
||||
nquads.push(serizlied);
|
||||
}
|
||||
}
|
||||
|
||||
// step 4
|
||||
nquads.sort();
|
||||
|
||||
// step 5
|
||||
let joined = nquads.join("");
|
||||
|
||||
sha256.update(joined.as_bytes());
|
||||
|
||||
sha256.finalize_hex_and_reset()
|
||||
}
|
||||
|
||||
fn serialize_quad<N>(identifier: &N::BlankId, quad: &NormalizingQuad<N>, vocabulary: &N) -> String
|
||||
where
|
||||
N: Vocabulary,
|
||||
N::BlankId: Clone + Eq,
|
||||
{
|
||||
let subject = serialize_subject(identifier, quad.subject(), vocabulary);
|
||||
let predicate = serialize_subject(identifier, quad.predicate(), vocabulary);
|
||||
let object = quad.object().with(vocabulary);
|
||||
let graph = quad
|
||||
.graph()
|
||||
.map(|graph| serialize_subject(identifier, graph, vocabulary));
|
||||
|
||||
if let Some(graph) = graph {
|
||||
format!("{subject} {predicate} {object} {graph}")
|
||||
} else {
|
||||
format!("{subject} {predicate} {object}")
|
||||
}
|
||||
}
|
||||
|
||||
fn serialize_subject<N>(identifier: &N::BlankId, subject: &QuadSubject<N>, vocabulary: &N) -> String
|
||||
where
|
||||
N: Vocabulary,
|
||||
N::BlankId: Eq,
|
||||
{
|
||||
if subject.is_blank() && matches_identifier::<N>(identifier, subject) {
|
||||
String::from("_:a")
|
||||
} else if subject.is_blank() {
|
||||
String::from("_:z")
|
||||
} else {
|
||||
format!("{}", subject.with(vocabulary))
|
||||
}
|
||||
}
|
||||
|
||||
fn matches_identifier<N>(identifier: &N::BlankId, subject: &QuadSubject<N>) -> bool
|
||||
where
|
||||
N: Vocabulary,
|
||||
N::BlankId: Eq,
|
||||
{
|
||||
match subject {
|
||||
Subject::Blank(blank) => blank == identifier,
|
||||
Subject::Iri(_) => false,
|
||||
}
|
||||
}
|
||||
|
||||
fn issue_identifier_algorithm<N>(
|
||||
identifier: N::BlankId,
|
||||
generator: &mut Blank,
|
||||
|
@ -348,48 +440,3 @@ fn canonicalization_node_generator() -> Blank {
|
|||
fn make_issuer(prefix: &str) -> Blank {
|
||||
Blank::new_with_prefix(String::from(prefix))
|
||||
}
|
||||
|
||||
impl<'a, T, Context> Hashable<Context> for &'a T
|
||||
where
|
||||
T: Hashable<Context>,
|
||||
{
|
||||
fn as_bytes_with(&self, context: Context) -> &[u8] {
|
||||
T::as_bytes_with(self, context)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, T, Context> Hashable<Context> for &'a mut T
|
||||
where
|
||||
T: Hashable<Context>,
|
||||
{
|
||||
fn as_bytes_with(&self, context: Context) -> &[u8] {
|
||||
T::as_bytes_with(self, context)
|
||||
}
|
||||
}
|
||||
|
||||
impl<T, Context> Hashable<Context> for Box<T>
|
||||
where
|
||||
T: Hashable<Context>,
|
||||
{
|
||||
fn as_bytes_with(&self, context: Context) -> &[u8] {
|
||||
T::as_bytes_with(self, context)
|
||||
}
|
||||
}
|
||||
|
||||
impl<T, Context> Hashable<Context> for std::rc::Rc<T>
|
||||
where
|
||||
T: Hashable<Context>,
|
||||
{
|
||||
fn as_bytes_with(&self, context: Context) -> &[u8] {
|
||||
T::as_bytes_with(self, context)
|
||||
}
|
||||
}
|
||||
|
||||
impl<T, Context> Hashable<Context> for std::sync::Arc<T>
|
||||
where
|
||||
T: Hashable<Context>,
|
||||
{
|
||||
fn as_bytes_with(&self, context: Context) -> &[u8] {
|
||||
T::as_bytes_with(self, context)
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue