aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorarie <arie@alleycat.cc>2021-06-24 14:46:30 +0200
committerarie <arie@alleycat.cc>2021-06-28 21:08:44 +0200
commit536e4421a4465cffe2ddfc9b7e629cdd79d08ea7 (patch)
tree9edb67b86c475e84838ab30e32806f9f2bbe5059
parent2e0294b9b72568f7b171fb6a95cc135d4caf6ae7 (diff)
Include multiple quotes for the literal parser.
-rw-r--r--bin/main.ml19
-rw-r--r--lib/turtle/parser.ml66
-rw-r--r--lib/turtle/rdf_turtle.mli2
3 files changed, 42 insertions, 45 deletions
diff --git a/bin/main.ml b/bin/main.ml
index 7e78b78..091a6d6 100644
--- a/bin/main.ml
+++ b/bin/main.ml
@@ -1,6 +1,5 @@
module Parser = Rdf_turtle.Parser
-open Rdf
module Ast = Rdf_turtle.Ast
let parse p =
@@ -19,7 +18,7 @@ let turtle_list =
(* SUBJECT = COLLECTION , this works! *)
"() <http://a.example/p> <http://a.example/o> ."
;
- "('hello') <http://a.example/p> <http://a.example/o> ."
+ "('''hello''') <http://a.example/p> <http://a.example/o> ."
;
"('1' '2') <http://a.example/p> <http://a.example/o> ."
;
@@ -100,14 +99,14 @@ let turtle_list =
;
]
-let test_ctx : Rdf_turtle.Ast.state =
- {
- base_uri = "base" |> Iri.of_string;
- namespaces = Rdf_turtle.Ast.SMap.empty;
- bnode_counter = 0;
- }
-
-let empty_graph = Graph.empty
+(* let test_ctx : Rdf_turtle.Ast.state = *)
+(* { *)
+(* base_uri = "base" |> Iri.of_string; *)
+(* namespaces = Rdf_turtle.Ast.SMap.empty; *)
+(* bnode_counter = 0; *)
+(* } *)
+(* *)
+(* let empty_graph = Graph.empty *)
let () =
List.iter (
diff --git a/lib/turtle/parser.ml b/lib/turtle/parser.ml
index 8e8b154..81822cb 100644
--- a/lib/turtle/parser.ml
+++ b/lib/turtle/parser.ml
@@ -26,15 +26,29 @@ let whitespace_or_comment1 =
many1 @@ choice [comment; whitespace]
>>| ignore
-let delimiters c1 c2 =
+(* TODO replace this by delimiters_str? *)
+let delimiters_chr c1 c2 =
char c1
*> take_while (fun d -> not @@ Char.equal c2 d)
<* char c2
+(* Note that many_till p q executes p until q succeeds, and then it also executes q! *)
+(* The function take_until_str s parses a string until it encounters the substring s,
+ * which it parses. It returns the so far parsed string.
+ * If it doesn't encounter the substring, it fails. *)
+let take_until_str s =
+ lift
+ (List.fold_left (fun s c -> s ^ (String.make 1 c)) "")
+ (many_till any_char (string s))
+
+let delimiters_str s1 s2 =
+ string s1
+ *> take_until_str s2
+
let iriref =
lift
Ast.Iriref.of_string
- (delimiters '<' '>')
+ (delimiters_chr '<' '>')
let prefixed_name =
lift2
@@ -69,55 +83,37 @@ let iri =
(* TODO the iri of the literal defaults to xds:string. This is the case, *)
(* according to the spec, but it can also happen elsewhere. *)
-(* TODO include literals with more quotation marks? *)
+(* TODO include literals with more quotation marks? Is this OK now? *)
(* TODO include literal-ints? *)
let literal =
let datatype_str =
Ast.Prefixed_name.of_strings "xsd" "string"
|> Ast.Iri.of_prefixed_name
in
+ let literal_value =
+ (choice [
+ delimiters_str "\"\"\"" "\"\"\"" ;
+ delimiters_str "'''" "'''" ;
+ delimiters_str "'" "'" ;
+ delimiters_str "\"" "\"" ;
+ ])
+ in
choice ~failure_msg:"None of the parsers worked for Literal"
[
lift2
(fun value lang ->
Ast.Literal.make value ~language:lang datatype_str)
- (choice [
- (char '"'
- *> take_while (char_is_not_equal_to (['"']))
- <* char '"')
- ;
- (char '\''
- *> take_while (char_is_not_equal_to (['\'']))
- <* char '\'')
- ])
- (language)
+ literal_value
+ language
;
lift2
- (fun value datatype ->
- Ast.Literal.make value datatype)
- (choice [
- (char '"'
- *> take_while (char_is_not_equal_to (['"']))
- <* char '"')
- ;
- (char '\''
- *> take_while (char_is_not_equal_to (['\'']))
- <* char '\'')
- ])
+ (fun value datatype -> Ast.Literal.make value datatype)
+ literal_value
(string "^^" *> iri)
;
lift
- (fun value ->
- Ast.Literal.make value datatype_str)
- (choice [
- (char '"'
- *> take_while (char_is_not_equal_to (['"']))
- <* char '"')
- ;
- (char '\''
- *> take_while (char_is_not_equal_to (['\'']))
- <* char '\'')
- ])
+ (fun value -> Ast.Literal.make value datatype_str)
+ literal_value
;
]
diff --git a/lib/turtle/rdf_turtle.mli b/lib/turtle/rdf_turtle.mli
index 4c1b759..7e3657f 100644
--- a/lib/turtle/rdf_turtle.mli
+++ b/lib/turtle/rdf_turtle.mli
@@ -240,6 +240,8 @@ module Parser : sig
val whitespace_or_comment : unit Angstrom.t
+ val take_until_str : string -> string Angstrom.t
+
val iriref : Ast.Iriref.t Angstrom.t
val prefixed_name : Ast.Prefixed_name.t Angstrom.t