aboutsummaryrefslogtreecommitdiff
path: root/lib/fragment_graph/rdf_fragment_graph.ml
diff options
context:
space:
mode:
Diffstat (limited to 'lib/fragment_graph/rdf_fragment_graph.ml')
-rw-r--r--lib/fragment_graph/rdf_fragment_graph.ml390
1 files changed, 251 insertions, 139 deletions
diff --git a/lib/fragment_graph/rdf_fragment_graph.ml b/lib/fragment_graph/rdf_fragment_graph.ml
index 09821a2..bc2d99e 100644
--- a/lib/fragment_graph/rdf_fragment_graph.ml
+++ b/lib/fragment_graph/rdf_fragment_graph.ml
@@ -5,11 +5,6 @@
* SPDX-License-Identifier: AGPL-3.0-or-later
*)
-(* TODO this implementation uses CCMultiMaps. This is not so good as CCMultiMaps
- adds duplicate values. This must be manually checked when entering statements
- (see `add_statemen_unique`). Find a better solution.
-*)
-
module type HASH = sig
(*** Signature of a hash that is used to compute identifiers. *)
@@ -17,21 +12,30 @@ module type HASH = sig
(** [hash v] returns the hash of [v] as an iri. *)
end
-module type T = sig
+module type S = sig
(** RDF Fragment Graph *)
+ (** {1 Predicate and Object} *)
+
+ (** Fragment Graphs can not reuse the {!module:Rdf.Triple.Predicate} and
+ {!module:Rdf.Triple.Object} modules as they do not allow Blank Nodes and in
+ addition to IRIs, references to fragments are allowed.*)
+
module Predicate : sig
type t =
| FragmentReference of string
| Iri of Rdf.Iri.t
(** Predicate that can appear in a Fragment Graph *)
- val of_iri : Rdf.Iri.t -> t
+ val of_iri : ?base_subject:Rdf.Iri.t -> Rdf.Iri.t -> t
(** [of_iri iri] creates an Iri predicate *)
val make_fragment_reference : string -> t
(** [make_fragment_reference fragment] creates a reference to [fragment] *)
+ val pp : t Fmt.t
+ [@@ocaml.toplevel_printer]
+
end
module Object : sig
@@ -41,7 +45,7 @@ module type T = sig
| Literal of Rdf.Literal.t
(** Object that can appear in a Fragment Graph *)
- val of_iri : Rdf.Iri.t -> t
+ val of_iri : ?base_subject:Rdf.Iri.t -> Rdf.Iri.t -> t
(** [of_iri iri] creates an iri object *)
val of_literal : Rdf.Literal.t -> t
@@ -49,42 +53,59 @@ module type T = sig
val make_fragment_reference : string -> t
(** [make_fragment_reference fragment] creates a reference to [fragment] *)
+
+ val pp : t Fmt.t
+ [@@ocaml.toplevel_printer]
end
type t
(** Type of a Fragment Graph *)
+ (** {1 Constructors} *)
+
val empty : t
(** [empty] is the empty fragment graph *)
- val equal : t -> t -> bool
- (** [equal a b] returns true if [a] is the same Fragment Graph as [b]. *)
-
val add_statement : Predicate.t -> Object.t -> t -> t
(** [add_statement fragment_graph predicate object'] adds a statement to the fragment graph *)
val add_fragment_statement : string -> Predicate.t -> Object.t -> t -> t
(** [add_fragment_statement] adds a fragment statement to the fragment graph *)
- val statements : t -> (Predicate.t * Object.t) list
+ val of_triples : Rdf.Triple.t Seq.t -> (Rdf.Iri.t * t) Seq.t
+ (** [of_triples triples] returns a sequence of fragment graphs for each base
+ subject appearing in [triples]. *)
+
+ (** {1 Accessors} *)
+
+ val statements : t -> (Predicate.t * Object.t) Seq.t
(** [statements fragment_graph] returns a list of statements in
[fragment_graph]. *)
- val fragment_statements : t -> (string * Predicate.t * Object.t) list
+ val fragment_statements : t -> (string * Predicate.t * Object.t) Seq.t
(** [fragment_statements fragment_graph] returns a list of fragment statements
in [fragment_graph]. *)
+ val to_triples : t -> Rdf.Triple.t Seq.t
+ (** [to_triples fragment_graph] returns a list of statements in [fragment_graph] as triples. *)
+
+
+ (** {1 Content-addressing} *)
+
val base_subject : t -> Rdf.Iri.t
(** [base_subject fragment_graph] returns the base subject of the fragment graph. *)
+ val equal : t -> t -> bool
+ (** [equal a b] returns true if [a] is the same Fragment Graph as [b].
+ Equality is implemnted by checking the base subject.*)
+
val canonical : t -> string
(** [canonical fragment_graph] returns the canonical serialization of the fragment graph. *)
val of_canonical : string -> (t, string) result
(** [of_canonical s] attempts to decode the fragment graph from the canonical serialization in [s]*)
- val to_triples : t -> Rdf.Triple.t list
- (** [to_triples fragment_graph] returns a list of statements in [fragment_graph] as triples. *)
+ (** {1 (Pretty) Printing} *)
val pp : t Fmt.t
[@@ocaml.toplevel_printer]
@@ -93,12 +114,26 @@ end
module Make(H: HASH) = struct
+
+ (* Helper to get the base subject of an IRI *)
+ let get_base_subject iri =
+ Rdf.Iri.with_fragment iri None
+
module Predicate = struct
type t =
| FragmentReference of string
| Iri of Rdf.Iri.t
- let of_iri iri = Iri iri
+ let of_iri ?base_subject iri =
+ match base_subject with
+ | None -> Iri iri
+ | Some base_subject ->
+ if Rdf.Iri.equal base_subject (get_base_subject iri) then
+ Rdf.Iri.fragment iri
+ |> Option.value ~default:""
+ |> (fun f -> FragmentReference f)
+ else
+ Iri iri
let make_fragment_reference id =
FragmentReference id
@@ -118,11 +153,11 @@ module Make(H: HASH) = struct
iri
|> Rdf.Triple.Predicate.of_iri
- (* let pp ppf = function
- * | FragmentReference id ->
- * Fmt.pf ppf "%s" id
- * | Iri iri ->
- * Fmt.pf ppf "%a" Rdf.Iri.pp iri *)
+ let pp ppf = function
+ | FragmentReference id ->
+ Fmt.pf ppf "@[<2><f %s>@]" id
+ | Iri iri ->
+ Fmt.pf ppf "@[<4><iri %a>@]" Rdf.Iri.pp iri
end
module Object = struct
@@ -131,7 +166,16 @@ module Make(H: HASH) = struct
| Iri of Rdf.Iri.t
| Literal of Rdf.Literal.t
- let of_iri iri = Iri iri
+ let of_iri ?base_subject iri =
+ match base_subject with
+ | None -> Iri iri
+ | Some base_subject ->
+ if Rdf.Iri.equal base_subject (get_base_subject iri) then
+ Rdf.Iri.fragment iri
+ |> Option.value ~default:""
+ |> (fun f -> FragmentReference f)
+ else
+ Iri iri
let of_literal literal = Literal literal
@@ -158,82 +202,144 @@ module Make(H: HASH) = struct
literal
|> Rdf.Triple.Object.of_literal
- (* let pp ppf = function
- * | FragmentReference id ->
- * Fmt.pf ppf "%s" id
- * | Iri iri ->
- * Fmt.pf ppf "%a" Rdf.Iri.pp iri
- * | Literal literal ->
- * Fmt.pf ppf "%a" Rdf.Literal.pp literal *)
+ let pp ppf = function
+ | FragmentReference id ->
+ Fmt.pf ppf "@[<2><f %s>@]" id
+ | Iri iri ->
+ Fmt.pf ppf "@[<4><iri %a>@]" Rdf.Iri.pp iri
+ | Literal lit ->
+ Fmt.pf ppf "@[<4><lit %a>@]" Rdf.Literal.pp lit
end
- module StatementMap = CCMultiMap.Make(Predicate)(Object)
- module FragmentStatementsMap = CCHashTrie.Make(struct
- type t = string
- let equal = String.equal
- let hash = Hashtbl.hash
- end)
+ module FragmentMap = Map.Make(String)
+ module PredicateMap = Map.Make(Predicate)
+ module ObjectSet = Set.Make(Object)
type t =
- { statements : StatementMap.t;
- fragment_statements : StatementMap.t FragmentStatementsMap.t
+ { statements : ObjectSet.t PredicateMap.t;
+ fragment_statements : ObjectSet.t PredicateMap.t FragmentMap.t
}
let empty =
- { statements = StatementMap.empty;
- fragment_statements = FragmentStatementsMap.empty
+ { statements = PredicateMap.empty;
+ fragment_statements = FragmentMap.empty
}
- (* Helper to return list of statements *)
- let list_of_statements statements =
- let l = ref [] in
- StatementMap.iter statements
- (fun p o -> l := (p,o) :: !l) ;
- !l
-
- (* Adds statement if it is not already present. *)
- let add_statement_unique predicate object' statements =
- let open StatementMap in
- match find statements predicate with
- | [] -> add statements predicate object'
- | objs ->
- if List.mem object' objs then
- statements
- else
- add statements predicate object'
+ let statements_signleton p o =
+ PredicateMap.add
+ p (ObjectSet.singleton o)
+ PredicateMap.empty
+
+ let statements_union a b =
+ PredicateMap.union
+ (fun _predicate objects_a objects_b ->
+ ObjectSet.union objects_a objects_b
+ |> Option.some)
+ a b
let add_statement predicate object' fragment_graph =
- {fragment_graph with
- statements =
- add_statement_unique
- predicate object'
- fragment_graph.statements
+ { fragment_graph with
+ statements = statements_signleton predicate object'
+ |> statements_union fragment_graph.statements
}
- let statements fragment_graph =
- list_of_statements fragment_graph.statements
-
- let add_fragment_statement fragment_reference predicate object' fragment_graph =
- {fragment_graph with
- fragment_statements = FragmentStatementsMap.update fragment_reference
- ~f:(function
- | Some statements ->
- Some (add_statement_unique predicate object' statements)
- | None ->
- Some (StatementMap.add StatementMap.empty
- predicate object'))
- fragment_graph.fragment_statements
+ let fragment_statements_singleton fragment predicate object' =
+ FragmentMap.add
+ fragment
+ (statements_signleton predicate object')
+ FragmentMap.empty
+
+ let fragment_statement_union a b =
+ FragmentMap.union
+ (fun _fragment statements_a statements_b ->
+ Some (statements_union statements_a statements_b)
+ )
+ a b
+
+ let add_fragment_statement fragment predicate object' fragment_graph =
+ { fragment_graph with
+ fragment_statements = fragment_statements_singleton fragment predicate object'
+ |> fragment_statement_union fragment_graph.fragment_statements
}
- let fragment_statements fragment_graph =
- FragmentStatementsMap.fold
- ~f:(fun fs fragment_id statements ->
- List.(append fs
- (list_of_statements statements
- |> map (fun (o,p) -> (fragment_id, o, p)))))
- ~x:[]
- fragment_graph.fragment_statements
+
+ (* Constructor from triples *)
+
+ module IriMap = Map.Make(Rdf.Iri)
+
+ let of_triples triples=
+ triples
+ |> Seq.fold_left
+ (fun fgs (triple:Rdf.Triple.t) ->
+
+ (* extract the subject as IRI *)
+ let subject_iri =
+ Rdf.Triple.Subject.map triple.subject
+ (fun iri -> iri)
+ (fun _bnode -> failwith "Blank Nodes are not allowed in Fragment Graphs.")
+ in
+
+ (* compute the base subject *)
+ let base_subject = get_base_subject subject_iri in
+
+ (* extract Predicate as Predicate *)
+ let predicate =
+ Rdf.Triple.Predicate.map triple.predicate
+ (fun iri -> Predicate.of_iri ~base_subject iri)
+ in
+
+ (* extract object as Object *)
+ let object' =
+ Rdf.Triple.Object.map triple.object'
+ (fun iri -> Object.of_iri ~base_subject iri)
+ (fun _bnode -> failwith "Blank Nodes are not allowed in Fragment Graphs.")
+ Object.of_literal
+ in
+
+
+ match Rdf.Iri.fragment subject_iri with
+
+ (* subject is not a fragment -> add triple as statement *)
+ | None ->
+ IriMap.update base_subject
+ (fun fg_opt ->
+ fg_opt
+ |> Option.value ~default:empty
+ |> add_statement predicate object'
+ |> Option.some)
+ fgs
+
+ (* subject is a fragment of base_subject -> add as fragment statement *)
+ | Some fragment_id ->
+ IriMap.update base_subject
+ (fun fg_opt ->
+ fg_opt
+ |> Option.value ~default:empty
+ |> add_fragment_statement fragment_id predicate object'
+ |> Option.some)
+ fgs
+ )
+ IriMap.empty
+ |> IriMap.to_seq
+
+ let statements_to_seq statements =
+ PredicateMap.to_seq statements
+ |> Seq.flat_map (fun (predicate, os) ->
+ ObjectSet.to_seq os
+ |> Seq.map (fun o -> predicate,o))
+
+ let statements fg =
+ fg.statements
+ |> statements_to_seq
+
+ let fragment_statements fg =
+ fg.fragment_statements
+ |> FragmentMap.to_seq
+ |> Seq.flat_map (fun (fragment, statements) ->
+ statements_to_seq statements
+ |> Seq.map (fun (p,o) -> (fragment, p, o))
+ )
module Serialization = struct
@@ -256,35 +362,33 @@ module Make(H: HASH) = struct
(Cbor.encode a)
(Cbor.encode b)
- let cbor_of_statements fragment_graph =
- StatementMap.fold fragment_graph.statements []
- (fun acc predicate object' ->
+ let cbor_of_statements fg =
+ fg
+ |> statements
+ |> Seq.map
+ (fun (predicate, object') ->
Cbor.Array [
cbor_of_predicate predicate;
cbor_of_object object'
- ] :: acc)
+ ] )
let cbor_of_fragment_statements fragment_graph =
- let cbor_of_statements fragment_id statements =
- StatementMap.fold statements []
- (fun acc predicate object' ->
- Cbor.Array [
- Cbor.TextString fragment_id;
- cbor_of_predicate predicate;
- cbor_of_object object'
- ] :: acc)
- in
- FragmentStatementsMap.fold
- ~x:[]
- ~f:(fun acc fragment_id statements ->
- List.append (cbor_of_statements fragment_id statements) acc)
- fragment_graph.fragment_statements
-
- let cbor_of_fragment_graph fragment_graph =
- List.append
- (cbor_of_statements fragment_graph)
- (cbor_of_fragment_statements fragment_graph)
- |> List.sort compare
+ fragment_graph
+ |> fragment_statements
+ |> Seq.map (fun (f, p, o) ->
+ Cbor.Array [
+ Cbor.TextString f;
+ cbor_of_predicate p;
+ cbor_of_object o
+ ]
+ )
+
+ let cbor_of_fragment_graph fg =
+ Seq.append
+ (cbor_of_statements fg)
+ (cbor_of_fragment_statements fg)
+ |> List.of_seq
+ |> List.sort_uniq compare
|> (fun s -> Cbor.Array s)
let encode fragment_graph =
@@ -369,45 +473,53 @@ module Make(H: HASH) = struct
|> canonical
|> H.hash
- (* TODO: equality is broken as CCMultiMap cares about order of inseration.
- This equality just uses the canonicalization to test equality. Think about
- if there is better way. *)
+
+ (* Order of insertion matters so we use *)
let equal a b =
Rdf.Iri.equal
(base_subject a)
(base_subject b)
- let fold_statements base_iri subject statements acc f =
- StatementMap.fold statements acc
- (fun acc predicate object' ->
- f acc @@
- Rdf.Triple.make subject
- (Predicate.expand base_iri predicate)
- (Object.expand base_iri object'))
-
- let fold fragment_graph acc f =
- let base_iri = base_subject fragment_graph in
- let subject = base_iri
- |> Rdf.Triple.Subject.of_iri
- in
- FragmentStatementsMap.fold
- ~f:(fun acc fragment_id statements ->
- let subject = Rdf.Iri.with_fragment base_iri (Some fragment_id)
- |> Rdf.Triple.Subject.of_iri
- in
- fold_statements base_iri subject statements acc f)
- ~x:(fold_statements base_iri subject
- fragment_graph.statements
- acc f)
- fragment_graph.fragment_statements
-
-
let to_triples fragment_graph =
- fold fragment_graph []
- (fun triples triple -> triple :: triples)
+ let base_subject = base_subject fragment_graph in
+ let subject = base_subject |> Rdf.Triple.Subject.of_iri in
+ Seq.append
+ (fragment_graph
+ |> statements
+ |> Seq.map
+ (fun (p, o) ->
+ Rdf.Triple.make
+ subject
+ (Predicate.expand base_subject p)
+ (Object.expand base_subject o)))
+ (fragment_graph
+ |> fragment_statements
+ |> Seq.map
+ (fun (f,p,o) ->
+ let subject = Rdf.Iri.with_fragment base_subject (Some f)
+ |> Rdf.Triple.Subject.of_iri
+ in
+ Rdf.Triple.make
+ subject
+ (Predicate.expand base_subject p)
+ (Object.expand base_subject o)))
+
+
+ let pp_statement ppf (p,o) =
+ Fmt.pf ppf "@[<2><s %a %a>@]"
+ Predicate.pp p
+ Object.pp o
+
+ let pp_fragment_statement ppf (f,p,o) =
+ Fmt.pf ppf "@[<3><fs %s %a %a>@]"
+ f
+ Predicate.pp p
+ Object.pp o
- (* TODO nicer pp *)
let pp ppf fragment_graph =
- Fmt.pf ppf "%a" Cbor.pp
- @@ Serialization.cbor_of_fragment_graph fragment_graph
+ Fmt.pf ppf "@[<v>%a@]@[<v>%a@]"
+ Fmt.(seq pp_statement)
+ (statements fragment_graph)
+ Fmt.(seq pp_fragment_statement)
+ (fragment_statements fragment_graph)
end