aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorarie <arie@alleycat.cc>2021-06-17 14:13:56 +0200
committerarie <arie@alleycat.cc>2021-06-28 21:03:35 +0200
commit9b771ca73327985f1d88cb2bb5a8402dc89814fc (patch)
treeb948586af4b1724637ddd89970f4a27dd6cd313e
parente585d8bdd3bd491b84c9400170b674ba46c58b59 (diff)
Rdf-turtle
Improve pp functions and parsers. The pp_functions print a turtle doc to a certain satisfaction now, and the parsers work likewise. Still, there are things that can happen unexpectedly. The code is a bit cleaner now. An issue is: how do we test this properly?
-rw-r--r--lib/turtle/ast.ml71
-rw-r--r--lib/turtle/parser.ml180
-rw-r--r--lib/turtle/rdf_turtle.mli4
-rw-r--r--test/turtle/main.ml2
4 files changed, 133 insertions, 124 deletions
diff --git a/lib/turtle/ast.ml b/lib/turtle/ast.ml
index c9a4fe3..d6dd013 100644
--- a/lib/turtle/ast.ml
+++ b/lib/turtle/ast.ml
@@ -219,7 +219,7 @@ and predobjs_equal a b =
let rec object_pp ppf = function
| Obj_iri iri -> Fmt.pf ppf "@[<hov 1>%a@]" Iri.pp iri
| Obj_blank_node bnode -> Fmt.pf ppf "@[<hov 1>%a@]" Blank_node.pp bnode
- | Obj_literal lit -> Fmt.pf ppf "@[<hov 1>%a>@]" Literal.pp lit
+ | Obj_literal lit -> Fmt.pf ppf "@[<hov 1>%a@]" Literal.pp lit
| Obj_coll coll -> Fmt.pf ppf "@[<hov 1>%a@]" collection_pp coll
| Obj_bnodps bnodps -> Fmt.pf ppf "@[<hov 1>%a@]" bnodps_pp bnodps
and collection_pp ppf (Collection objs) =
@@ -233,32 +233,15 @@ and subject_pp ppf = function
and bnodps_pp ppf (BnodPs predobjs) =
Fmt.pf ppf "@[<hov 1>[%a]@]" predobjs_pp predobjs
and predobjs_pp ppf predobjs =
+ let fmt_break_and_semi_colon = Fmt.any " ;@. " in
+ let fmt_space = Fmt.any " " in
+(* let fmt_comma = Fmt.any " ," in *)
let objlist_pp ppf objs =
- match objs with
- | [obj] -> object_pp ppf obj
- | head :: tail ->
- object_pp ppf head;
- (List.iter
- (fun o -> Fmt.pf ppf "@[<hov 1>,%a@]" object_pp o) tail)
- | _ -> raise @@ Invalid_argument "An empty list should not be possible"
+ Fmt.pf ppf "@[<hov 1>%a@]" (Fmt.list ~sep:Fmt.comma object_pp) objs
in
- match predobjs with
- | [(p, objs)] ->
- Fmt.pf ppf "@[<hov 1>%a %a@]"
- Predicate.pp p
- objlist_pp objs
- | (p, objs) :: tail ->
- Fmt.pf ppf "@[<hov 1>%a %a@]"
- Predicate.pp p
- objlist_pp objs;
- List.iter
- (fun (p, objs) ->
- Fmt.pf ppf "@[<hov 1> ;\n %a %a@]"
- Predicate.pp p
- objlist_pp objs)
- tail
- | _ -> raise @@ Invalid_argument "An empty list should not be possible"
-
+ Fmt.pf ppf "@[<hov 1>%a@]"
+ (Fmt.list ~sep:fmt_break_and_semi_colon (Fmt.pair ~sep:fmt_space Predicate.pp objlist_pp))
+ predobjs
module Triples = struct
@@ -281,11 +264,11 @@ module Triples = struct
let pp ppf = function
| SubjPredObjs (s, ps) ->
- Fmt.pf ppf "@[<h 1><triples@ subject %a@ predicate %a>@]"
+ Fmt.pf ppf "@[<hov 1>%a@ %a@]"
subject_pp s
predobjs_pp ps
| BnodPs bs ->
- Fmt.pf ppf "@[<h 1><triples@ bnodps %a >@]"
+ Fmt.pf ppf "@[<hov 1>%a@]"
bnodps_pp bs
end
@@ -310,11 +293,11 @@ module Directive = struct
let pp ppf = function
| PrefixID (s, i) ->
- Fmt.pf ppf "@[<h 1><directive@ prefixID %a@ iriref %a@]"
+ Fmt.pf ppf "@[<hov 1>@prefix %a: %a@]"
Fmt.string s
Iriref.pp i
| Base i ->
- Fmt.pf ppf "@[<h 1><directive@ base %a>@]" Iriref.pp i
+ Fmt.pf ppf "@[<hov 1>@base %a@]" Iriref.pp i
end
module Statement = struct
@@ -335,9 +318,9 @@ module Statement = struct
let pp ppf = function
| Directive d ->
- Fmt.pf ppf "@[<h 1><statement@ directive@ %a@]" Directive.pp d
+ Fmt.pf ppf "@[<hov 1>%a@]" Directive.pp d
| Triples t ->
- Fmt.pf ppf "@[<h 1><statement@ triples %a>@]" Triples.pp t
+ Fmt.pf ppf "@[<hov 1>%a@]" Triples.pp t
end
@@ -348,11 +331,33 @@ module Turtle = struct
let of_statement_lst lst =
lst
+ let directives_triples lst =
+ List.fold_right
+ (fun x (dirs, triples) ->
+ match x with
+ | Statement.Triples y -> (dirs, Statement.of_triples y :: triples)
+ | Statement.Directive y -> (Statement.of_directive y :: dirs, triples)
+ )
+ lst
+ ([], [])
+
let equal a b =
list_equal Statement.equal a b
- let pp ppf l =
- Fmt.pf ppf "@[<h 1><turtle@ statement list %a>@]" (Fmt.list Statement.pp) l
+ let pp ppf lst =
+ let fmt_break = Fmt.any "@.@." in
+ let statement_pp_directives ppf statement =
+ Fmt.pf ppf "%a ." Statement.pp statement
+ in
+ let statement_pp_triples ppf statement =
+ Fmt.pf ppf "%a .@." Statement.pp statement
+ in
+ Fmt.pf ppf "@[<hov 1>%a@]"
+ (Fmt.pair
+ ~sep:fmt_break
+ (Fmt.list statement_pp_directives)
+ (Fmt.list statement_pp_triples))
+ (directives_triples lst)
end
diff --git a/lib/turtle/parser.ml b/lib/turtle/parser.ml
index a586c55..fd6de8a 100644
--- a/lib/turtle/parser.ml
+++ b/lib/turtle/parser.ml
@@ -1,32 +1,31 @@
open Angstrom
-let string_starts_with s subs =
- if String.length subs > String.length s then
- false
- else if subs <> String.sub s 0 (String.length subs) then
- false
- else
- true
-
let whitespace_lst = ['\x20'; '\x0a'; '\x0d'; '\x09'; ]
let is_whitespace = function
| '\x20' | '\x0a' | '\x0d' | '\x09' -> true
| _ -> false
-let is_not_whitespace c = c |> is_whitespace |> not
-
let char_is_not_equal_to lst d =
List.for_all (fun x -> x != d) lst
-let whitespace =
- many @@ choice [string " "; string "\n"; string "\t"]
+let comment =
+ char '#'
+ *> take_while (char_is_not_equal_to ['\n'; '\r'])
>>| ignore
-let whitespace1 =
+let whitespace =
many1 @@ choice [string " "; string "\n"; string "\t"]
>>| ignore
+let whitespace_or_comment =
+ many @@ choice [comment; whitespace]
+ >>| ignore
+
+let whitespace_or_comment1 =
+ many1 @@ choice [comment; whitespace]
+ >>| ignore
+
let delimiters c1 c2 =
char c1
*> take_while (fun d -> not @@ Char.equal c2 d)
@@ -43,33 +42,40 @@ let prefixed_name =
(peek_char >>= function
| Some '_' -> fail "A prefixed_name can not start with _"
| _ ->
- (take_while (char_is_not_equal_to ([':'] @ whitespace_lst))
+ (take_while (char_is_not_equal_to (
+ [':'] @ whitespace_lst))
<* char ':'))
- (take_while (char_is_not_equal_to ([']'; ')'; '('] @ whitespace_lst)))
+ (take_while (char_is_not_equal_to (
+ [']'; ')'; '('] @ whitespace_lst)))
let language =
lift
Ast.Language.of_string
(char '@'
- *> take_while (char_is_not_equal_to ([']'; ')'; '('] @ whitespace_lst)))
+ *> take_while (char_is_not_equal_to (
+ [']'; ')'; '('] @ whitespace_lst)))
let blank_node =
lift
Ast.Blank_node.of_string
- (char '_'
- *> char ':'
- *> take_while is_not_whitespace)
+ (string "_:"
+ *> take_while (char_is_not_equal_to (
+ [']'; ')'; '('; ';'; ','] @ whitespace_lst)))
let iri =
(lift Ast.Iri.of_iriref iriref)
<|>
(lift Ast.Iri.of_prefixed_name prefixed_name)
-(* TODO the iri of the literal defaults to xds:string. This is the case,
- * according to the spec, but it can also happen elsewhere.
- * Moreover: an absent language results in an empty string. This could also be different. *)
+(* TODO the iri of the literal defaults to xds:string. This is the case, *)
+(* according to the spec, but it can also happen elsewhere. *)
+(* TODO include literals with more quotation marks? *)
+(* TODO include literal-ints? *)
let literal =
-let datatype_str = Ast.Iri.of_prefixed_name (Ast.Prefixed_name.of_strings "xsd" "string") in
+ let datatype_str =
+ Ast.Prefixed_name.of_strings "xsd" "string"
+ |> Ast.Iri.of_prefixed_name
+ in
choice ~failure_msg:"None of the parsers worked for Literal"
[
lift2
@@ -99,11 +105,13 @@ let datatype_str = Ast.Iri.of_prefixed_name (Ast.Prefixed_name.of_strings "xsd"
let predicate =
choice [
- lift (fun _ ->
- Fmt.pr "PRED_A:@.";
- Ast.Predicate.a) (char 'a' <* whitespace1);
- lift (fun iri ->
- Fmt.pr "PRED_IRI: %a@." Ast.Iri.pp iri;
+ lift
+ (fun _ ->
+ Ast.Predicate.a)
+ (char 'a' <* whitespace_or_comment1)
+ ;
+ lift
+ (fun iri ->
Ast.Predicate.of_iri iri)
iri
]
@@ -112,34 +120,27 @@ let predicate =
(* This is because the prefixed name parser accepts, for example: *)
(* "\"That Seventies Show\"^^xsd:string" *)
(* Same thing for blank_node! *)
+(* Note: this happens for a lot of parsers.. Not sure what the best solution is: to play with the order until it's right, or to make parsers more specific (e.g. fail when you encounter this or that character) *)
let object_ collection bnodps =
choice [
(lift
- (fun collection ->
- Ast.Obj_coll collection)
+ (fun collection -> Ast.Obj_coll collection)
collection
);
(lift
- (fun bnodps ->
- Ast.Obj_bnodps bnodps)
+ (fun bnodps -> Ast.Obj_bnodps bnodps)
bnodps
);
(lift
- (fun literal ->
- Fmt.pr "OBJ_LITERAL %a" Ast.Literal.pp literal;
- Ast.Obj_literal literal)
+ (fun literal -> Ast.Obj_literal literal)
literal
);
(lift
- (fun blank_node ->
- Fmt.pr "OBJ_BNODE %a" Ast.Blank_node.pp blank_node;
- Ast.Obj_blank_node blank_node)
+ (fun blank_node -> Ast.Obj_blank_node blank_node)
blank_node
);
(lift
- (fun iri ->
- Fmt.pr "OBJ_IRI %a@." Ast.Iri.pp iri;
- Ast.Obj_iri iri)
+ (fun iri -> Ast.Obj_iri iri)
iri);
]
@@ -148,16 +149,13 @@ let collection_ bnodps =
let object' = object_ collection bnodps in
(lift
(fun collection -> Ast.Collection collection)
- (
- char '('
- *> whitespace
- *>
- (sep_by whitespace object')
- <* whitespace
- <* char ')'
- )
- )
- )
+ (char '('
+ *> whitespace_or_comment
+ *> sep_by
+ whitespace_or_comment
+ object'
+ <* whitespace_or_comment
+ <* char ')')))
let subject_ bnodps =
let collection = collection_ bnodps in
@@ -174,44 +172,43 @@ let subject_ bnodps =
(fun iri -> Ast.Sub_iri iri)
iri
);
+ (lift
+ (fun bnodps -> Ast.Sub_bnodps bnodps)
+ bnodps)
]
let bnodps predobjs =
lift
- (fun predobjs ->
- Fmt.pr "BNODPS";
- Ast.BnodPs predobjs)
- (
- char '['
- *> whitespace
+ (fun predobjs -> Ast.BnodPs predobjs)
+ (char '['
+ *> whitespace_or_comment
*> predobjs
- <* whitespace
- <* char ']'
- )
+ <* whitespace_or_comment
+ <* char ']')
let predobjs =
let semicolon =
- whitespace
+ whitespace_or_comment
*> char ';'
- <* whitespace
+ <* whitespace_or_comment
in
let comma =
- whitespace
+ whitespace_or_comment
*> char ','
- <* whitespace
+ <* whitespace_or_comment
in
fix (fun predobjs ->
- Fmt.pr "PREDOBJS";
let bnodps = bnodps predobjs in
let collection = collection_ bnodps in
let object' = object_ collection bnodps in
- sep_by semicolon (
- lift2
+ sep_by
+ semicolon
+ (lift2
(fun p objs -> (p, objs))
- (whitespace *> predicate <* whitespace)
- (sep_by comma (whitespace *> object' <* whitespace))
- )
- )
+ (whitespace_or_comment *> predicate <* whitespace_or_comment)
+ (sep_by
+ comma
+ (whitespace_or_comment *> object' <* whitespace_or_comment))))
let bnodps = bnodps predobjs
let subject = subject_ bnodps
@@ -222,36 +219,36 @@ let triples =
choice [
(lift2
Ast.Triples.of_subject_and_predobjs
- (subject <* whitespace)
- predobjs);
+ (whitespace_or_comment *> subject <* whitespace_or_comment)
+ predobjs)
+ ;
(lift
Ast.Triples.of_bnodps
- bnodps
- )
+ bnodps)
+ ;
]
let directive =
choice [
lift2
Ast.Directive.of_string_and_iriref
- (string "@prefix"
- *> whitespace
+ (whitespace_or_comment
+ *> string "@prefix"
+ *> whitespace_or_comment
*> (take_while (char_is_not_equal_to ([':'] @ whitespace_lst)))
<* char ':'
- <* whitespace)
+ <* whitespace_or_comment)
(iriref
- <* whitespace
- <* char '.'
- <* whitespace
+ <* whitespace_or_comment
)
;
lift
Ast.Directive.of_iriref
- (string "@base"
- *> whitespace
+ (whitespace_or_comment
+ *> string "@base"
+ *> whitespace_or_comment
*> iriref
- <* whitespace
- <* char '.'
+ <* whitespace_or_comment
)
]
@@ -259,19 +256,22 @@ let statement =
choice [
lift
Ast.Statement.of_directive
- (directive)
+ (directive
+ <* whitespace_or_comment
+ <* char '.'
+ <* whitespace_or_comment)
;
lift
Ast.Statement.of_triples
(triples
- <* whitespace
+ <* whitespace_or_comment
<* char '.'
- <* whitespace
- )
+ <* whitespace_or_comment)
]
let turtle =
- many statement
+ many1
+ statement
(*
diff --git a/lib/turtle/rdf_turtle.mli b/lib/turtle/rdf_turtle.mli
index 36d28f8..5c75058 100644
--- a/lib/turtle/rdf_turtle.mli
+++ b/lib/turtle/rdf_turtle.mli
@@ -217,6 +217,8 @@ module Ast : sig
val of_statement_lst : Statement.t list -> t
+ val directives_triples : t -> t * t
+
val equal : t -> t -> bool
val pp : t Fmt.t
@@ -238,6 +240,8 @@ module Parser : sig
val whitespace : unit Angstrom.t
+ val whitespace_or_comment : unit Angstrom.t
+
val iriref : Ast.Iriref.t Angstrom.t
val prefixed_name : Ast.Prefixed_name.t Angstrom.t
diff --git a/test/turtle/main.ml b/test/turtle/main.ml
index db4193d..5b95da4 100644
--- a/test/turtle/main.ml
+++ b/test/turtle/main.ml
@@ -333,7 +333,7 @@ let directive_test_case =
"@base <http://one.example/> .",
Directive.of_iriref (Iriref.of_string "http://one.example/")
;
- "@prefix foaf: <http://xmlns.com/foaf/0.1/> .",
+ "@prefix foaf: <http://xmlns.com/foaf/0.1/> ",
Directive.of_string_and_iriref
"foaf"
(Iriref.of_string "http://xmlns.com/foaf/0.1/")