aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorarie <arie@alleycat.cc>2021-06-09 13:12:47 +0200
committerarie <arie@alleycat.cc>2021-06-28 21:03:33 +0200
commit862a3d9eb439452f8903384da421d543064168cc (patch)
treea6faf40b30293d7b40a73143137f7aa5be06a8db
parentbbb98e4944c3c0a2438aff3c6feae93f749880c9 (diff)
Rdf turtle:
Reorganise modules into different files.
-rw-r--r--lib/turtle/ast.ml340
-rw-r--r--lib/turtle/parser.ml272
-rw-r--r--lib/turtle/rdf_turtle.ml621
-rw-r--r--lib/turtle/rdf_turtle.mli37
-rw-r--r--test/alcotest/rdf_alcotest.ml32
-rw-r--r--test/alcotest/rdf_alcotest.mli32
-rw-r--r--test/turtle/main.ml2
7 files changed, 666 insertions, 670 deletions
diff --git a/lib/turtle/ast.ml b/lib/turtle/ast.ml
new file mode 100644
index 0000000..b1d9c6c
--- /dev/null
+++ b/lib/turtle/ast.ml
@@ -0,0 +1,340 @@
+(*
+* SPDX-FileCopyrightText: 2021 alleycat <info@alleycat.cc>
+* SPDX-FileCopyrightText: 2021 petites singularit├ęs <ps-dream@lesoiseaux.io>
+* SPDX-FileCopyrightText: 2021 pukkamustard <pukkamustard@posteo.net>
+*
+* SPDX-License-Identifier: AGPL-3.0-or-later
+*)
+
+(* From ocaml 4.12. Move to util functions or something? *)
+let rec list_equal eq l1 l2 =
+ match l1, l2 with
+ | [], [] -> true
+ | [], _::_ | _::_, [] -> false
+ | a1::l1, a2::l2 -> eq a1 a2 && list_equal eq l1 l2
+
+module Ordered_string = struct
+
+ type t = string
+
+ let compare = String.compare
+
+end
+
+module SMap = Map.Make (Ordered_string)
+
+module Iriref = struct
+
+ type t = string
+
+ let of_string s = s
+
+ let to_string s = s
+
+ let equal a b =
+ String.equal
+ (to_string a)
+ (to_string b)
+
+ let pp ppf iriref =
+ Fmt.pf ppf "@[<h 1><iriref@ %s>@]" (to_string iriref)
+
+end
+
+module Language = struct
+
+ type t = string
+
+ let of_string s = s
+
+ let to_string s = s
+
+ let equal a b =
+ String.equal
+ (to_string a)
+ (to_string b)
+
+ let pp ppf language =
+ Fmt.pf ppf "@[<h 1><language@ %s>@]" (to_string language)
+
+end
+
+module Prefixed_name = struct
+
+ type t = string * string
+
+ let of_strings s1 s2 = (s1, s2)
+
+ let equal a b =
+ String.equal (fst a) (fst b)
+ &&
+ String.equal (snd a) (snd b)
+
+ (* TODO check whether this is nice *)
+ let pp ppf prefixed_name =
+ Fmt.pf ppf "@[<h 1><prefixed_name fst@ %s snd@ %s>@]"
+ (fst prefixed_name)
+ (snd prefixed_name)
+
+end
+
+module Blank_node = struct
+
+ type t = string
+
+ let of_string s = s
+
+ let to_string s = s
+
+ let equal a b =
+ String.equal
+ (to_string a)
+ (to_string b)
+
+ (* TODO check whether this is nice (for all the pp functions)*)
+ let pp ppf blank_node =
+ Fmt.pf ppf "@[<h 1><blank_node@ %s>@]" (to_string blank_node)
+
+end
+
+module Iri = struct
+
+ type t = Iriref of Iriref.t | Prefixed_name of Prefixed_name.t
+
+ let of_iriref ref = Iriref ref
+
+ let of_prefixed_name pname = Prefixed_name pname
+
+ let equal a b =
+ match (a, b) with
+ | (Iriref refa), (Iriref refb) -> Iriref.equal refa refb
+ | (Prefixed_name pa), (Prefixed_name pb) -> Prefixed_name.equal pa pb
+ | _ -> false
+
+ let pp ppf = function
+ | Iriref iriref -> Iriref.pp ppf iriref
+ | Prefixed_name pname -> Prefixed_name.pp ppf pname
+
+end
+
+module Literal = struct
+ type t = {
+ value: string;
+ language: string option;
+ datatype: Iri.t;
+ }
+
+ let make value ?language datatype =
+ { value; datatype; language }
+
+ let canonical literal =
+ literal.value
+
+ let language literal =
+ literal.language
+
+ let datatype literal =
+ literal.datatype
+
+ let equal a b =
+ (String.equal a.value b.value)
+ &&
+ (Option.equal String.equal a.language b.language)
+ &&
+ (Iri.equal a.datatype b.datatype)
+
+ let pp ppf literal =
+ (* TODO print the datatype and language *)
+ Fmt.pf ppf
+ "@[<8><literal@ value@ %s@ iri %a>@]"
+ (canonical literal)
+ Iri.pp (datatype literal)
+
+end
+
+module Predicate = struct
+
+ type t = Pred_iri of Iri.t | Pred_a
+
+ let of_iri iri = Pred_iri iri
+ let a = Pred_a
+
+ let equal a b =
+ match (a, b) with
+ | (Pred_a, Pred_a) -> true
+ | (Pred_iri ia, Pred_iri ib) -> Iri.equal ia ib
+ | _ -> false
+
+ let pp ppf = function
+ | Pred_a -> Fmt.pf ppf "@<pred_a>@]"
+ | Pred_iri i -> Fmt.pf ppf "@[<4><predicate@ %a>@]" Iri.pp i
+
+end
+
+type object' =
+ Obj_iri of Iri.t
+ | Obj_blank_node of Blank_node.t
+ | Obj_literal of Literal.t
+ | Obj_coll of collection
+ | Obj_BnodPs of bnodps
+and collection =
+ Collection of object' list
+and subject =
+ Sub_iri of Iri.t
+ | Sub_blank_node of Blank_node.t
+ | Sub_coll of collection
+and bnodps = BnodPs of predobjs
+and predobjs = (Predicate.t * object' list) list
+
+let rec object_equal a b =
+ match a, b with
+ | Obj_iri ia, Obj_iri ib -> Iri.equal ia ib
+ | Obj_blank_node ba, Obj_blank_node bb -> Blank_node.equal ba bb
+ | Obj_literal la, Obj_literal lb -> Literal.equal la lb
+ | Obj_coll ca, Obj_coll cb -> collection_equal ca cb
+ | Obj_BnodPs bas, Obj_BnodPs bbs -> bnodps_equal bas bbs
+ | _ -> false
+and collection_equal (Collection obsa) (Collection obsb) =
+ list_equal object_equal obsa obsb
+and subject_equal a b =
+ match a, b with
+ | Sub_iri ia, Sub_iri ib -> Iri.equal ia ib
+ | Sub_blank_node ba, Sub_blank_node bb -> Blank_node.equal ba bb
+ | Sub_coll ca, Sub_coll cb -> collection_equal ca cb
+ | _ -> false
+and bnodps_equal (BnodPs a) (BnodPs b) =
+ predobjs_equal a b
+and predobjs_equal a b =
+ list_equal
+ (fun (p, obsa) (q, obsb) -> Predicate.equal p q && list_equal object_equal obsa obsb)
+ a b
+
+let rec object_pp ppf = function
+ | Obj_iri iri -> Fmt.pf ppf "@[<h 1><obj_iri@ %a>@]" Iri.pp iri
+ | Obj_blank_node bnode -> Fmt.pf ppf "@[<h 1><obj_blank_node@ %a>@]" Blank_node.pp bnode
+ | Obj_literal lit -> Fmt.pf ppf "@[<h 1><obj_literal@ %a>@]" Literal.pp lit
+ | Obj_coll coll -> Fmt.pf ppf "@[<h 1><obj_coll@ %a>@]" collection_pp coll
+ | Obj_BnodPs bnodps -> Fmt.pf ppf "@[<h 1><obj_bnodps@ %a>@]" bnodps_pp bnodps
+and collection_pp ppf (Collection objs) =
+ Fmt.pf ppf "@[<h 1><collection obj@ %a>@]" (Fmt.list object_pp) objs
+and subject_pp ppf = function
+ | Sub_iri iri ->Fmt.pf ppf "@[<h 1><obj_iri@ %a>@]" Iri.pp iri
+ | Sub_blank_node bnode -> Fmt.pf ppf "@[<h 1><obj_blank_node@ %a>@]" Blank_node.pp bnode
+ | Sub_coll collection -> Fmt.pf ppf "@[<h 1><obj_collection@ %a>@]" collection_pp collection
+and bnodps_pp ppf (BnodPs predobjs) =
+ Fmt.pf ppf "@[<h 1><obj_bnodps@ %a>@]" predobjs_pp predobjs
+and predobjs_pp ppf l =
+ let objlist_pp ppf l =
+ List.iter
+ (fun o -> Fmt.pf ppf "@[<h 1><object@ %a>@]" object_pp o)
+ l in
+ List.iter
+ (fun (p, objlist) ->
+ Fmt.pf ppf "@[<h 1><predicate %a object_list %a>@"
+ Predicate.pp p
+ objlist_pp objlist)
+ l
+
+
+module Triples = struct
+
+ type t = SubjPredObjs of subject * predobjs
+ | BnodPs of bnodps
+
+ let of_subject_and_predobjs s p =
+ SubjPredObjs (s, p)
+
+ let of_bnodps b =
+ BnodPs b
+
+ let equal a b =
+ match a, b with
+ | SubjPredObjs (asub, ap), SubjPredObjs (bsub, bp) ->
+ subject_equal asub bsub && predobjs_equal ap bp
+ | BnodPs ab, BnodPs bb ->
+ bnodps_equal ab bb
+ | _, _ -> false
+
+ let pp ppf = function
+ | SubjPredObjs (s, p) ->
+ Fmt.pf ppf "@[<h 1><triples@ subject %a@ predicate %a>@]"
+ subject_pp s
+ predobjs_pp p
+ | BnodPs b ->
+ Fmt.pf ppf "@[<h 1><triples@ bnodps %a>@]" bnodps_pp b
+
+end
+
+module Directive = struct
+
+ type t = PrefixID of string * Iriref.t | Base of Iriref.t
+
+ let of_string_and_iriref s i =
+ PrefixID (s, i)
+
+ let of_iriref i =
+ Base i
+
+ let equal a b =
+ match a, b with
+ | PrefixID (ast, ai), PrefixID (bst, bi) ->
+ String.equal ast bst && Iriref.equal ai bi
+ | Base ai, Base bi ->
+ Iriref.equal ai bi
+ | _, _ -> false
+
+ let pp ppf = function
+ | PrefixID (s, i) ->
+ Fmt.pf ppf "@[<h 1><directive@ prefixID %a@ iriref %a@]"
+ Fmt.string s
+ Iriref.pp i
+ | Base i ->
+ Fmt.pf ppf "@[<h 1><directive@ base %a>@]" Iriref.pp i
+end
+
+module Statement = struct
+
+ type t = Directive of Directive.t | Triples of Triples.t
+
+ let of_directive d =
+ Directive d
+
+ let of_triples t =
+ Triples t
+
+ let equal a b =
+ match a, b with
+ | Directive a, Directive b -> Directive.equal a b
+ | Triples a, Triples b -> Triples.equal a b
+ | _, _ -> false
+
+ let pp ppf = function
+ | Directive d ->
+ Fmt.pf ppf "@[<h 1><statement@ directive@ %a@]" Directive.pp d
+ | Triples t ->
+ Fmt.pf ppf "@[<h 1><statement@ triples %a>@]" Triples.pp t
+
+end
+
+module Turtle = struct
+
+ type t = Statement.t list
+
+ let of_statement_lst lst =
+ lst
+
+ let equal a b =
+ list_equal Statement.equal a b
+
+ let pp ppf l =
+ Fmt.pf ppf "@[<h 1><turtle@ statement list %a>@]" (Fmt.list Statement.pp) l
+
+end
+
+type parser_state = {
+ base_uri : Iri.t;
+ namespaces : Iri.t SMap.t;
+ bnode_labels : Blank_node.t SMap.t;
+ cur_subject: subject;
+ cur_predicate: Predicate.t;
+}
diff --git a/lib/turtle/parser.ml b/lib/turtle/parser.ml
new file mode 100644
index 0000000..c0e2017
--- /dev/null
+++ b/lib/turtle/parser.ml
@@ -0,0 +1,272 @@
+open Angstrom
+
+let whitespace_lst = ['\x20'; '\x0a'; '\x0d'; '\x09'; ]
+
+let is_whitespace = function
+ | '\x20' | '\x0a' | '\x0d' | '\x09' -> true
+ | _ -> false
+
+let is_not_whitespace c = c |> is_whitespace |> not
+
+let char_is_not_equal_to lst d =
+ List.for_all (fun x -> x != d) lst
+
+let whitespace =
+ many @@ choice [string " "; string "\n"; string "\t"]
+ >>| ignore
+
+let delimiters c1 c2 =
+ char c1
+ *> take_while (fun d -> not @@ Char.equal c2 d)
+ <* char c2
+
+let iriref =
+ lift
+ Ast.Iriref.of_string
+ (delimiters '<' '>')
+
+let prefixed_name =
+ lift2
+ Ast.Prefixed_name.of_strings
+ (take_while (char_is_not_equal_to ([':'] @ whitespace_lst))
+ <* char ':')
+ (take_while (char_is_not_equal_to ([']'] @ whitespace_lst)))
+
+let language =
+ lift
+ Ast.Language.of_string
+ (char '@'
+ *> take_while is_not_whitespace)
+
+let blank_node =
+ lift
+ Ast.Blank_node.of_string
+ (char '_'
+ *> char ':'
+ *> take_while is_not_whitespace)
+
+let iri =
+ (lift Ast.Iri.of_iriref iriref)
+ <|>
+ (lift Ast.Iri.of_prefixed_name prefixed_name)
+
+(* TODO the iri of the literal defaults to xds:string. This is the case,
+ * according to the spec, but it can also happen elsewhere.
+ * Moreover: an absent language results in an empty string. This could also be different. *)
+let literal =
+ lift3
+ (fun value lang_opt iri -> match lang_opt with
+ | "" -> Ast.Literal.make value iri
+ | lang -> Ast.Literal.make value ~language:lang iri)
+ (delimiters '"' '"')
+ (option
+ ""
+ (char '@' *> take_while ( char_is_not_equal_to ( [':'] @ whitespace_lst)))
+ )
+ (option
+ (Ast.Iri.of_prefixed_name @@ Ast.Prefixed_name.of_strings "xsd" "string")
+ (string "^^" *> iri))
+
+let predicate =
+ choice [
+ lift (fun _ -> Ast.Predicate.a) (char 'a' <* whitespace);
+ lift Ast.Predicate.of_iri iri
+ ]
+
+(* TODO if you change the order of literal and iri, the tests won't work anymore. *)
+(* This is because the prefixed name parser accepts, for example: *)
+(* "\"That Seventies Show\"^^xsd:string" *)
+(* Same thing for blank_node! *)
+let object_ collection bnodps =
+ choice [
+ (lift
+ (fun collection ->
+ Ast.Obj_coll collection)
+ collection
+ );
+ (lift
+ (fun bnodps ->
+ Ast.Obj_BnodPs bnodps)
+ bnodps
+ );
+ (lift
+ (fun literal ->
+ Ast.Obj_literal literal)
+ literal
+ );
+ (lift
+ (fun blank_node ->
+ Ast.Obj_blank_node blank_node)
+ blank_node
+ );
+ (lift
+ (fun iri ->
+ Ast.Obj_iri iri)
+ iri);
+ ]
+
+let collection_ bnodps =
+ fix (fun collection ->
+ let object' = object_ collection bnodps in
+ (lift
+ (fun collection -> Ast.Collection collection)
+ (
+ char '('
+ *> whitespace
+ *>
+ (sep_by whitespace object')
+ <* whitespace
+ <* char ')'
+ )
+ )
+ )
+
+let subject_ bnodps =
+ let collection = collection_ bnodps in
+ choice [
+ (lift
+ (fun collection -> Ast.Sub_coll collection)
+ collection
+ );
+ (lift
+ (fun blank_node -> Ast.Sub_blank_node blank_node)
+ blank_node
+ );
+ (lift
+ (fun iri -> Ast.Sub_iri iri)
+ iri
+ );
+ ]
+
+let bnodps predobjs =
+ lift
+ (fun predobjs -> Ast.BnodPs predobjs)
+ (
+ char '['
+ *> whitespace
+ *> predobjs
+ <* whitespace
+ <* char ']'
+ )
+
+let predobjs =
+ let semicolon =
+ whitespace
+ *> char ';'
+ <* whitespace
+ in
+ let comma =
+ whitespace
+ *> char ','
+ <* whitespace
+ in
+ fix (fun predobjs ->
+ let bnodps = bnodps predobjs in
+ let collection = collection_ bnodps in
+ let object' = object_ collection bnodps in
+ sep_by1 semicolon (
+ lift2
+ (fun p objs -> (p, objs))
+ (whitespace *> predicate <* whitespace)
+ (sep_by1 comma (whitespace *> object' <* whitespace))
+ )
+ )
+
+let bnodps = bnodps predobjs
+let subject = subject_ bnodps
+let collection = collection_ bnodps
+let object' = object_ collection bnodps
+
+let triples =
+ choice [
+ (lift2
+ (fun subject predobjs -> Ast.Triples.SubjPredObjs (subject, predobjs))
+ subject
+ predobjs);
+ (lift
+ (fun bnodps -> Ast.Triples.BnodPs bnodps)
+ bnodps
+ )
+ ]
+
+let directive =
+ choice [
+ lift2
+ (fun str iriref -> Ast.Directive.PrefixID (str, iriref))
+ (string "@prefix"
+ *> whitespace
+ *> (take_while (char_is_not_equal_to ([':'] @ whitespace_lst)))
+ <* char ':'
+ <* whitespace)
+ (iriref
+ <* whitespace
+ <* char '.'
+ <* whitespace
+ )
+ ;
+ lift
+ (fun iriref -> Ast.Directive.Base iriref)
+ (string "@base"
+ *> whitespace
+ *> iriref
+ <* whitespace
+ <* char '.'
+ )
+ ]
+
+let statement =
+ choice [
+ lift
+ (fun directive -> Ast.Statement.Directive directive)
+ (directive)
+ ;
+ lift
+ (fun triples -> Ast.Statement.Triples triples)
+ (triples
+ <* whitespace
+ <* char '.'
+ <* whitespace
+ )
+ ]
+
+let turtle =
+ many statement
+
+
+ (*
+ This is the idea for mutually recursive parsers (because Angstrom doesn't have a 'fix_poly'. The 'fix' function allows recursion, but the definition of the function can only depend on itself, not on other functions. That's why we have
+let a b c d = ...
+We just make a function a that depends on b, c and d, and later we can compute a if the variables b, c and d are available.
+In principle, you have as many parsers as you like, but it becomes rather lengthy at some point.
+ Note that a plays two different roles here, namely that of the function that sends the parsers b, c and d to the parser a, and the parser a itself.
+In the implementation, we chose a_ (or for example object_) for the function.
+Because some of the types did not depend on all the other types, our functions are simpler (but it is harder to see the general pattern).
+
+ let a b c d = fix (fun a -> (* definition of `a` in terms of `a` and `b` and `c` and `d`*))
+
+ let b c d =
+ fix (fun b ->
+ let a = a b c d in
+ (* definition of b in terms of `a` and `b` and `c` and `d`*)
+
+ let c d =
+ fix (fun c ->
+ let b = b c d in
+ let a = a b c d in
+ (* definition of c in terms of `a` and `b` and `c` and `d`*)
+ )
+
+ let d =
+ fix (fun d ->
+ let c = c d in
+ let b = b c d in
+ let a = a b c d in
+ (* definition of c in terms of `a` and `b` and `c` and `d`*)
+ )
+
+ let c = c d
+
+ let b = b c d
+
+ let a = a b c d
+ *)
diff --git a/lib/turtle/rdf_turtle.ml b/lib/turtle/rdf_turtle.ml
index 41875c2..966cdb7 100644
--- a/lib/turtle/rdf_turtle.ml
+++ b/lib/turtle/rdf_turtle.ml
@@ -1,621 +1,4 @@
-(*
-* SPDX-FileCopyrightText: 2021 alleycat <info@alleycat.cc>
-* SPDX-FileCopyrightText: 2021 petites singularit├ęs <ps-dream@lesoiseaux.io>
-* SPDX-FileCopyrightText: 2021 pukkamustard <pukkamustard@posteo.net>
-*
-* SPDX-License-Identifier: AGPL-3.0-or-later
-*)
+module Ast = Ast
-(* From ocaml 4.12. Move to util functions or something? *)
-let rec list_equal eq l1 l2 =
- match l1, l2 with
- | [], [] -> true
- | [], _::_ | _::_, [] -> false
- | a1::l1, a2::l2 -> eq a1 a2 && list_equal eq l1 l2
-
-module Ordered_string = struct
-
- type t = string
-
- let compare = String.compare
-
-end
-
-module SMap = Map.Make (Ordered_string)
-
-module AST = struct
-
- module Iriref = struct
-
- type t = string
-
- let of_string s = s
-
- let to_string s = s
-
- let equal a b =
- String.equal
- (to_string a)
- (to_string b)
-
- let pp ppf iriref =
- Fmt.pf ppf "@[<h 1><iriref@ %s>@]" (to_string iriref)
-
- end
-
- module Language = struct
-
- type t = string
-
- let of_string s = s
-
- let to_string s = s
-
- let equal a b =
- String.equal
- (to_string a)
- (to_string b)
-
- let pp ppf language =
- Fmt.pf ppf "@[<h 1><language@ %s>@]" (to_string language)
-
- end
-
- module Prefixed_name = struct
-
- type t = string * string
-
- let of_strings s1 s2 = (s1, s2)
-
- let equal a b =
- String.equal (fst a) (fst b)
- &&
- String.equal (snd a) (snd b)
-
-(* TODO check whether this is nice *)
- let pp ppf prefixed_name =
- Fmt.pf ppf "@[<h 1><prefixed_name fst@ %s snd@ %s>@]"
- (fst prefixed_name)
- (snd prefixed_name)
-
- end
-
- module Blank_node = struct
-
- type t = string
-
- let of_string s = s
-
- let to_string s = s
-
- let equal a b =
- String.equal
- (to_string a)
- (to_string b)
-
-(* TODO check whether this is nice (for all the pp functions)*)
- let pp ppf blank_node =
- Fmt.pf ppf "@[<h 1><blank_node@ %s>@]" (to_string blank_node)
-
- end
-
- module Iri = struct
-
- type t = Iriref of Iriref.t | Prefixed_name of Prefixed_name.t
-
- let of_iriref ref = Iriref ref
-
- let of_prefixed_name pname = Prefixed_name pname
-
- let equal a b =
- match (a, b) with
- | (Iriref refa), (Iriref refb) -> Iriref.equal refa refb
- | (Prefixed_name pa), (Prefixed_name pb) -> Prefixed_name.equal pa pb
- | _ -> false
-
- let pp ppf = function
- | Iriref iriref -> Iriref.pp ppf iriref
- | Prefixed_name pname -> Prefixed_name.pp ppf pname
-
- end
-
- module Literal = struct
- type t = {
- value: string;
- language: string option;
- datatype: Iri.t;
- }
-
- let make value ?language datatype =
- { value; datatype; language }
-
- let canonical literal =
- literal.value
-
- let language literal =
- literal.language
-
- let datatype literal =
- literal.datatype
-
- let equal a b =
- (String.equal a.value b.value)
- &&
- (Option.equal String.equal a.language b.language)
- &&
- (Iri.equal a.datatype b.datatype)
-
- let pp ppf literal =
- (* TODO print the datatype and language *)
- Fmt.pf ppf
- "@[<8><literal@ value@ %s@ iri %a>@]"
- (canonical literal)
- Iri.pp (datatype literal)
-
- end
-
- module Predicate = struct
-
- type t = Pred_iri of Iri.t | Pred_a
-
- let of_iri iri = Pred_iri iri
- let a = Pred_a
-
- let equal a b =
- match (a, b) with
- | (Pred_a, Pred_a) -> true
- | (Pred_iri ia, Pred_iri ib) -> Iri.equal ia ib
- | _ -> false
-
- let pp ppf = function
- | Pred_a -> Fmt.pf ppf "@<pred_a>@]"
- | Pred_iri i -> Fmt.pf ppf "@[<4><predicate@ %a>@]" Iri.pp i
-
- end
-
- type object' =
- Obj_iri of Iri.t
- | Obj_blank_node of Blank_node.t
- | Obj_literal of Literal.t
- | Obj_coll of collection
- | Obj_BnodPs of bnodps
- and collection =
- Collection of object' list
- and subject =
- Sub_iri of Iri.t
- | Sub_blank_node of Blank_node.t
- | Sub_coll of collection
- and bnodps = BnodPs of predobjs
- and predobjs = (Predicate.t * object' list) list
-
- let rec object_equal a b =
- match a, b with
- | Obj_iri ia, Obj_iri ib -> Iri.equal ia ib
- | Obj_blank_node ba, Obj_blank_node bb -> Blank_node.equal ba bb
- | Obj_literal la, Obj_literal lb -> Literal.equal la lb
- | Obj_coll ca, Obj_coll cb -> collection_equal ca cb
- | Obj_BnodPs bas, Obj_BnodPs bbs -> bnodps_equal bas bbs
- | _ -> false
- and collection_equal (Collection obsa) (Collection obsb) =
- list_equal object_equal obsa obsb
- and subject_equal a b =
- match a, b with
- | Sub_iri ia, Sub_iri ib -> Iri.equal ia ib
- | Sub_blank_node ba, Sub_blank_node bb -> Blank_node.equal ba bb
- | Sub_coll ca, Sub_coll cb -> collection_equal ca cb
- | _ -> false
- and bnodps_equal (BnodPs a) (BnodPs b) =
- predobjs_equal a b
- and predobjs_equal a b =
- list_equal
- (fun (p, obsa) (q, obsb) -> Predicate.equal p q && list_equal object_equal obsa obsb)
- a b
-
- let rec object_pp ppf = function
- | Obj_iri iri -> Fmt.pf ppf "@[<h 1><obj_iri@ %a>@]" Iri.pp iri
- | Obj_blank_node bnode -> Fmt.pf ppf "@[<h 1><obj_blank_node@ %a>@]" Blank_node.pp bnode
- | Obj_literal lit -> Fmt.pf ppf "@[<h 1><obj_literal@ %a>@]" Literal.pp lit
- | Obj_coll coll -> Fmt.pf ppf "@[<h 1><obj_coll@ %a>@]" collection_pp coll
- | Obj_BnodPs bnodps -> Fmt.pf ppf "@[<h 1><obj_bnodps@ %a>@]" bnodps_pp bnodps
- and collection_pp ppf (Collection objs) =
- Fmt.pf ppf "@[<h 1><collection obj@ %a>@]" (Fmt.list object_pp) objs
- and subject_pp ppf = function
- | Sub_iri iri ->Fmt.pf ppf "@[<h 1><obj_iri@ %a>@]" Iri.pp iri
- | Sub_blank_node bnode -> Fmt.pf ppf "@[<h 1><obj_blank_node@ %a>@]" Blank_node.pp bnode
- | Sub_coll collection -> Fmt.pf ppf "@[<h 1><obj_collection@ %a>@]" collection_pp collection
- and bnodps_pp ppf (BnodPs predobjs) =
- Fmt.pf ppf "@[<h 1><obj_bnodps@ %a>@]" predobjs_pp predobjs
- and predobjs_pp ppf l =
- let objlist_pp ppf l =
- List.iter
- (fun o -> Fmt.pf ppf "@[<h 1><object@ %a>@]" object_pp o)
- l in
- List.iter
- (fun (p, objlist) ->
- Fmt.pf ppf "@[<h 1><predicate %a object_list %a>@"
- Predicate.pp p
- objlist_pp objlist)
- l
-
-
- module Triples = struct
-
- type t = SubjPredObjs of subject * predobjs
- | BnodPs of bnodps
-
- let of_subject_and_predobjs s p =
- SubjPredObjs (s, p)
-
- let of_bnodps b =
- BnodPs b
-
- let equal a b =
- match a, b with
- | SubjPredObjs (asub, ap), SubjPredObjs (bsub, bp) ->
- subject_equal asub bsub && predobjs_equal ap bp
- | BnodPs ab, BnodPs bb ->
- bnodps_equal ab bb
- | _, _ -> false
-
- let pp ppf = function
- | SubjPredObjs (s, p) ->
- Fmt.pf ppf "@[<h 1><triples@ subject %a@ predicate %a>@]"
- subject_pp s
- predobjs_pp p
- | BnodPs b ->
- Fmt.pf ppf "@[<h 1><triples@ bnodps %a>@]" bnodps_pp b
-
- end
-
- module Directive = struct
-
- type t = PrefixID of string * Iriref.t | Base of Iriref.t
-
- let of_string_and_iriref s i =
- PrefixID (s, i)
-
- let of_iriref i =
- Base i
-
- let equal a b =
- match a, b with
- | PrefixID (ast, ai), PrefixID (bst, bi) ->
- String.equal ast bst && Iriref.equal ai bi
- | Base ai, Base bi ->
- Iriref.equal ai bi
- | _, _ -> false
-
- let pp ppf = function
- | PrefixID (s, i) ->
- Fmt.pf ppf "@[<h 1><directive@ prefixID %a@ iriref %a@]"
- Fmt.string s
- Iriref.pp i
- | Base i ->
- Fmt.pf ppf "@[<h 1><directive@ base %a>@]" Iriref.pp i
- end
-
- module Statement = struct
-
- type t = Directive of Directive.t | Triples of Triples.t
-
- let of_directive d =
- Directive d
-
- let of_triples t =
- Triples t
-
- let equal a b =
- match a, b with
- | Directive a, Directive b -> Directive.equal a b
- | Triples a, Triples b -> Triples.equal a b
- | _, _ -> false
-
- let pp ppf = function
- | Directive d ->
- Fmt.pf ppf "@[<h 1><statement@ directive@ %a@]" Directive.pp d
- | Triples t ->
- Fmt.pf ppf "@[<h 1><statement@ triples %a>@]" Triples.pp t
-
- end
-
- module Turtle = struct
-
- type t = Statement.t list
-
- let of_statement_lst lst =
- lst
-
- let equal a b =
- list_equal Statement.equal a b
-
- let pp ppf l =
- Fmt.pf ppf "@[<h 1><turtle@ statement list %a>@]" (Fmt.list Statement.pp) l
-
-end
-
- type parser_state = {
- base_uri : Iri.t;
- namespaces : Iri.t SMap.t;
- bnode_labels : Blank_node.t SMap.t;
- cur_subject: subject;
- cur_predicate: Predicate.t;
- }
-
-end
-
-module Parser = struct
- open Angstrom
-
- let whitespace_lst = ['\x20'; '\x0a'; '\x0d'; '\x09'; ]
-
- let is_whitespace = function
- | '\x20' | '\x0a' | '\x0d' | '\x09' -> true
- | _ -> false
-
- let is_not_whitespace c = c |> is_whitespace |> not
-
- let char_is_not_equal_to lst d =
- List.for_all (fun x -> x != d) lst
-
- let whitespace =
- many @@ choice [string " "; string "\n"; string "\t"]
- >>| ignore
-
- let delimiters c1 c2 =
- char c1
- *> take_while (fun d -> not @@ Char.equal c2 d)
- <* char c2
-
- let iriref =
- lift
- AST.Iriref.of_string
- (delimiters '<' '>')
-
- let prefixed_name =
- lift2
- AST.Prefixed_name.of_strings
- (take_while (char_is_not_equal_to ([':'] @ whitespace_lst))
- <* char ':')
- (take_while (char_is_not_equal_to ([']'] @ whitespace_lst)))
-
- let language =
- lift
- AST.Language.of_string
- (char '@'
- *> take_while is_not_whitespace)
-
- let blank_node =
- lift
- AST.Blank_node.of_string
- (char '_'
- *> char ':'
- *> take_while is_not_whitespace)
-
- let iri =
- (lift AST.Iri.of_iriref iriref)
- <|>
- (lift AST.Iri.of_prefixed_name prefixed_name)
-
- (* TODO the iri of the literal defaults to xds:string. This is the case,
- * according to the spec, but it can also happen elsewhere.
- * Moreover: an absent language results in an empty string. This could also be different. *)
- let literal =
- lift3
- (fun value lang_opt iri -> match lang_opt with
- | "" -> AST.Literal.make value iri
- | lang -> AST.Literal.make value ~language:lang iri)
- (delimiters '"' '"')
- (option
- ""
- (char '@' *> take_while ( char_is_not_equal_to ( [':'] @ whitespace_lst)))
- )
- (option
- (AST.Iri.of_prefixed_name @@ AST.Prefixed_name.of_strings "xsd" "string")
- (string "^^" *> iri))
-
- let predicate =
- choice [
- lift (fun _ -> AST.Predicate.a) (char 'a' <* whitespace);
- lift AST.Predicate.of_iri iri
- ]
-
-(* TODO if you change the order of literal and iri, the tests won't work anymore. *)
-(* This is because the prefixed name parser accepts, for example: *)
-(* "\"That Seventies Show\"^^xsd:string" *)
-(* Same thing for blank_node! *)
- let object_ collection bnodps =
- choice [
- (lift
- (fun collection ->
- AST.Obj_coll collection)
- collection
- );
- (lift
- (fun bnodps ->
- AST.Obj_BnodPs bnodps)
- bnodps
- );
- (lift
- (fun literal ->
- AST.Obj_literal literal)
- literal
- );
- (lift
- (fun blank_node ->
- AST.Obj_blank_node blank_node)
- blank_node
- );
- (lift
- (fun iri ->
- AST.Obj_iri iri)
- iri);
- ]
-
- let collection_ bnodps =
- fix (fun collection ->
- let object' = object_ collection bnodps in
- (lift
- (fun collection -> AST.Collection collection)
- (
- char '('
- *> whitespace
- *>
- (sep_by whitespace object')
- <* whitespace
- <* char ')'
- )
- )
- )
-
- let subject_ bnodps =
- let collection = collection_ bnodps in
- choice [
- (lift
- (fun collection -> AST.Sub_coll collection)
- collection
- );
- (lift
- (fun blank_node -> AST.Sub_blank_node blank_node)
- blank_node
- );
- (lift
- (fun iri -> AST.Sub_iri iri)
- iri
- );
- ]
-
- let bnodps predobjs =
- lift
- (fun predobjs -> AST.BnodPs predobjs)
- (
- char '['
- *> whitespace
- *> predobjs
- <* whitespace
- <* char ']'
- )
-
- let predobjs =
- let semicolon =
- whitespace
- *> char ';'
- <* whitespace
- in
- let comma =
- whitespace
- *> char ','
- <* whitespace
- in
- fix (fun predobjs ->
- let bnodps = bnodps predobjs in
- let collection = collection_ bnodps in
- let object' = object_ collection bnodps in
- sep_by1 semicolon (
- lift2
- (fun p objs -> (p, objs))
- (whitespace *> predicate <* whitespace)
- (sep_by1 comma (whitespace *> object' <* whitespace))
- )
- )
-
- let bnodps = bnodps predobjs
- let subject = subject_ bnodps
- let collection = collection_ bnodps
- let object' = object_ collection bnodps
-
- let triples =
- choice [
- (lift2
- (fun subject predobjs -> AST.Triples.SubjPredObjs (subject, predobjs))
- subject
- predobjs);
- (lift
- (fun bnodps -> AST.Triples.BnodPs bnodps)
- bnodps
- )
- ]
-
- let directive =
- choice [
- lift2
- (fun str iriref -> AST.Directive.PrefixID (str, iriref))
- (string "@prefix"
- *> whitespace
- *> (take_while (char_is_not_equal_to ([':'] @ whitespace_lst)))
- <* char ':'
- <* whitespace)
- (iriref
- <* whitespace
- <* char '.'
- <* whitespace
- )
- ;
- lift
- (fun iriref -> AST.Directive.Base iriref)
- (string "@base"
- *> whitespace
- *> iriref
- <* whitespace
- <* char '.'
- )
- ]
-
- let statement =
- choice [
- lift
- (fun directive -> AST.Statement.Directive directive)
- (directive)
- ;
- lift
- (fun triples -> AST.Statement.Triples triples)
- (triples
- <* whitespace
- <* char '.'
- <* whitespace
- )
- ]
-
- let turtle =
- many statement
-
-
- (*
- This is the idea for mutually recursive parsers (because Angstrom doesn't have a 'fix_poly'. The 'fix' function allows recursion, but the definition of the function can only depend on itself, not on other functions. That's why we have
-let a b c d = ...
-We just make a function a that depends on b, c and d, and later we can compute a if the variables b, c and d are available.
-In principle, you have as many parsers as you like, but it becomes rather lengthy at some point.
- Note that a plays two different roles here, namely that of the function that sends the parsers b, c and d to the parser a, and the parser a itself.
-In the implementation, we chose a_ (or for example object_) for the function.
-Because some of the types did not depend on all the other types, our functions are simpler (but it is harder to see the general pattern).
-
- let a b c d = fix (fun a -> (* definition of `a` in terms of `a` and `b` and `c` and `d`*))
-
- let b c d =
- fix (fun b ->
- let a = a b c d in
- (* definition of b in terms of `a` and `b` and `c` and `d`*)
-
- let c d =
- fix (fun c ->
- let b = b c d in
- let a = a b c d in
- (* definition of c in terms of `a` and `b` and `c` and `d`*)
- )
-
- let d =
- fix (fun d ->
- let c = c d in
- let b = b c d in
- let a = a b c d in
- (* definition of c in terms of `a` and `b` and `c` and `d`*)
- )
-
- let c = c d
-
- let b = b c d
-
- let a = a b c d
- *)
-
-end
+module Parser = Parser
diff --git a/lib/turtle/rdf_turtle.mli b/lib/turtle/rdf_turtle.mli
index 1e41686..c81a983 100644
--- a/lib/turtle/rdf_turtle.mli
+++ b/lib/turtle/rdf_turtle.mli
@@ -6,9 +6,10 @@
* SPDX-License-Identifier: AGPL-3.0-or-later
*)
-module SMap : Map.S with type key = string
-module AST : sig
+module Ast : sig
+
+ module SMap : Map.S with type key = string
module Iriref : sig
@@ -236,36 +237,36 @@ module Parser : sig
val whitespace : unit Angstrom.t
- val iriref : AST.Iriref.t Angstrom.t
+ val iriref : Ast.Iriref.t Angstrom.t
- val prefixed_name : AST.Prefixed_name.t Angstrom.t
+ val prefixed_name : Ast.Prefixed_name.t Angstrom.t
- val language : AST.Language.t Angstrom.t
+ val language : Ast.Language.t Angstrom.t
- val blank_node : AST.Blank_node.t Angstrom.t
+ val blank_node : Ast.Blank_node.t Angstrom.t
- val iri : AST.Iri.t Angstrom.t
+ val iri : Ast.Iri.t Angstrom.t
- val literal : AST.Literal.t Angstrom.t
+ val literal : Ast.Literal.t Angstrom.t
- val predicate : AST.Predicate.t Angstrom.t
+ val predicate : Ast.Predicate.t Angstrom.t
- val predobjs : AST.predobjs Angstrom.t
+ val predobjs : Ast.predobjs Angstrom.t
- val bnodps : AST.bnodps Angstrom.t
+ val bnodps : Ast.bnodps Angstrom.t
- val subject : AST.subject Angstrom.t
+ val subject : Ast.subject Angstrom.t
- val collection : AST.collection Angstrom.t
+ val collection : Ast.collection Angstrom.t
- val object' : AST.object' Angstrom.t
+ val object' : Ast.object' Angstrom.t
- val triples : AST.Triples.t Angstrom.t
+ val triples : Ast.Triples.t Angstrom.t
- val directive : AST.Directive.t Angstrom.t
+ val directive : Ast.Directive.t Angstrom.t
- val statement : AST.Statement.t Angstrom.t
+ val statement : Ast.Statement.t Angstrom.t
- val turtle : AST.Turtle.t Angstrom.t
+ val turtle : Ast.Turtle.t Angstrom.t
end
diff --git a/test/alcotest/rdf_alcotest.ml b/test/alcotest/rdf_alcotest.ml
index 973072b..b281027 100644
--- a/test/alcotest/rdf_alcotest.ml
+++ b/test/alcotest/rdf_alcotest.ml
@@ -26,49 +26,49 @@ let graph =
Alcotest.testable Rdf.Graph.pp Rdf.Graph.equal
let ast_iriref =
- Alcotest.testable Rdf_turtle.AST.Iriref.pp Rdf_turtle.AST.Iriref.equal
+ Alcotest.testable Rdf_turtle.Ast.Iriref.pp Rdf_turtle.Ast.Iriref.equal
let ast_language =
- Alcotest.testable Rdf_turtle.AST.Language.pp Rdf_turtle.AST.Language.equal
+ Alcotest.testable Rdf_turtle.Ast.Language.pp Rdf_turtle.Ast.Language.equal
let ast_prefixed_name =
- Alcotest.testable Rdf_turtle.AST.Prefixed_name.pp Rdf_turtle.AST.Prefixed_name.equal
+ Alcotest.testable Rdf_turtle.Ast.Prefixed_name.pp Rdf_turtle.Ast.Prefixed_name.equal
let ast_blank_node =
- Alcotest.testable Rdf_turtle.AST.Blank_node.pp Rdf_turtle.AST.Blank_node.equal
+ Alcotest.testable Rdf_turtle.Ast.Blank_node.pp Rdf_turtle.Ast.Blank_node.equal
let ast_iri =
- Alcotest.testable Rdf_turtle.AST.Iri.pp Rdf_turtle.AST.Iri.equal
+ Alcotest.testable Rdf_turtle.Ast.Iri.pp Rdf_turtle.Ast.Iri.equal
let ast_literal =
- Alcotest.testable Rdf_turtle.AST.Literal.pp Rdf_turtle.AST.Literal.equal
+ Alcotest.testable Rdf_turtle.Ast.Literal.pp Rdf_turtle.Ast.Literal.equal
let ast_predicate =
- Alcotest.testable Rdf_turtle.AST.Predicate.pp Rdf_turtle.AST.Predicate.equal
+ Alcotest.testable Rdf_turtle.Ast.Predicate.pp Rdf_turtle.Ast.Predicate.equal
let ast_object =
- Alcotest.testable Rdf_turtle.AST.object_pp Rdf_turtle.AST.object_equal
+ Alcotest.testable Rdf_turtle.Ast.object_pp Rdf_turtle.Ast.object_equal
let ast_collection =
- Alcotest.testable Rdf_turtle.AST.collection_pp Rdf_turtle.AST.collection_equal
+ Alcotest.testable Rdf_turtle.Ast.collection_pp Rdf_turtle.Ast.collection_equal
let ast_subject =
- Alcotest.testable Rdf_turtle.AST.subject_pp Rdf_turtle.AST.subject_equal
+ Alcotest.testable Rdf_turtle.Ast.subject_pp Rdf_turtle.Ast.subject_equal
let ast_bnodps =
- Alcotest.testable Rdf_turtle.AST.bnodps_pp Rdf_turtle.AST.bnodps_equal
+ Alcotest.testable Rdf_turtle.Ast.bnodps_pp Rdf_turtle.Ast.bnodps_equal
let ast_predobjs =
- Alcotest.testable Rdf_turtle.AST.predobjs_pp Rdf_turtle.AST.predobjs_equal
+ Alcotest.testable Rdf_turtle.Ast.predobjs_pp Rdf_turtle.Ast.predobjs_equal
let ast_triples =
- Alcotest.testable Rdf_turtle.AST.Triples.pp Rdf_turtle.AST.Triples.equal
+ Alcotest.testable Rdf_turtle.Ast.Triples.pp Rdf_turtle.Ast.Triples.equal
let ast_directive =
- Alcotest.testable Rdf_turtle.AST.Directive.pp Rdf_turtle.AST.Directive.equal
+ Alcotest.testable Rdf_turtle.Ast.Directive.pp Rdf_turtle.Ast.Directive.equal
let ast_statement =
- Alcotest.testable Rdf_turtle.AST.Statement.pp Rdf_turtle.AST.Statement.equal
+ Alcotest.testable Rdf_turtle.Ast.Statement.pp Rdf_turtle.Ast.Statement.equal
let ast_turtle =
- Alcotest.testable Rdf_turtle.AST.Turtle.pp Rdf_turtle.AST.Turtle.equal
+ Alcotest.testable Rdf_turtle.Ast.Turtle.pp Rdf_turtle.Ast.Turtle.equal
diff --git a/test/alcotest/rdf_alcotest.mli b/test/alcotest/rdf_alcotest.mli
index d0d6395..0e6bb33 100644
--- a/test/alcotest/rdf_alcotest.mli
+++ b/test/alcotest/rdf_alcotest.mli
@@ -18,35 +18,35 @@ val triple : Rdf.Triple.t Alcotest.testable
val graph : Rdf.Graph.t Alcotest.testable
-val ast_iriref : Rdf_turtle.AST.Iriref.t Alcotest.testable
+val ast_iriref : Rdf_turtle.Ast.Iriref.t Alcotest.testable
-val ast_language : Rdf_turtle.AST.Language.t Alcotest.testable
+val ast_language : Rdf_turtle.Ast.Language.t Alcotest.testable
-val ast_prefixed_name : Rdf_turtle.AST.Prefixed_name.t Alcotest.testable
+val ast_prefixed_name : Rdf_turtle.Ast.Prefixed_name.t Alcotest.testable
-val ast_blank_node : Rdf_turtle.AST.Blank_node.t Alcotest.testable
+val ast_blank_node : Rdf_turtle.Ast.Blank_node.t Alcotest.testable
-val ast_iri: Rdf_turtle.AST.Iri.t Alcotest.testable
+val ast_iri: Rdf_turtle.Ast.Iri.t Alcotest.testable
-val ast_literal : Rdf_turtle.AST.Literal.t Alcotest.testable
+val ast_literal : Rdf_turtle.Ast.Literal.t Alcotest.testable
-val ast_predicate : Rdf_turtle.AST.Predicate.t Alcotest.testable
+val ast_predicate : Rdf_turtle.Ast.Predicate.t Alcotest.testable
-val ast_object : Rdf_turtle.AST.object' Alcotest.testable
+val ast_object : Rdf_turtle.Ast.object' Alcotest.testable
-val ast_collection : Rdf_turtle.AST.collection Alcotest.testable
+val ast_collection : Rdf_turtle.Ast.collection Alcotest.testable
-val ast_subject : Rdf_turtle.AST.subject Alcotest.testable
+val ast_subject : Rdf_turtle.Ast.subject Alcotest.testable
-val ast_bnodps : Rdf_turtle.AST.bnodps Alcotest.testable
+val ast_bnodps : Rdf_turtle.Ast.bnodps Alcotest.testable
-val ast_predobjs : Rdf_turtle.AST.predobjs Alcotest.testable
+val ast_predobjs : Rdf_turtle.Ast.predobjs Alcotest.testable
-val ast_triples : Rdf_turtle.AST.Triples.t Alcotest.testable
+val ast_triples : Rdf_turtle.Ast.Triples.t Alcotest.testable
-val ast_directive : Rdf_turtle.AST.Directive.t Alcotest.testable
+val ast_directive : Rdf_turtle.Ast.Directive.t Alcotest.testable
-val ast_statement : Rdf_turtle.AST.Statement.t Alcotest.testable
+val ast_statement : Rdf_turtle.Ast.Statement.t Alcotest.testable
-val ast_turtle : Rdf_turtle.AST.Turtle.t Alcotest.testable
+val ast_turtle : Rdf_turtle.Ast.Turtle.t Alcotest.testable
diff --git a/test/turtle/main.ml b/test/turtle/main.ml
index 3c70f28..0973a17 100644
--- a/test/turtle/main.ml
+++ b/test/turtle/main.ml
@@ -1,5 +1,5 @@
module Parser = Rdf_turtle.Parser
-module Turtle = Rdf_turtle.AST
+module Turtle = Rdf_turtle.Ast
open Turtle
open Alcotest