aboutsummaryrefslogtreecommitdiff
path: root/lib/turtle/rdf_turtle.ml
blob: cb3e3e8c1b73f2a066781c80e1c2ec02890d867a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
module Ast = Ast

module Parser = Parser

open Rdf

(* Creates a new blank node and incrementes the bnode_counter in the context *)
let create_blank_node (state:Ast.state) =
  let i = state.bnode_counter in
  {state with bnode_counter = state.bnode_counter + 1},
  Blank_node.of_string @@ ("genid" ^ string_of_int i)

(* TODO do we need to do something with the state here? Can prefixes depend on previous prefixes?
 * In that case it would be necessary *)
let iri_of_iriref : Ast.Iriref.t -> Iri.t =
  fun iriref ->
  iriref
  |> Ast.Iriref.to_string
  |> Iri.of_string

(* This function computes an iri:Rdf.Iri.t, given an iri:Ast.Iri.t and the state.
 * In case the iri is an Iriref, it will just be the corresponding iri.
 * In the case the iri is a Prefixed_name, it looks for the prefix_label in the state.
 * If it finds this label, the resulting iri is the concatenation of this label and the
 * local part. Otherwise it raises Not_found. *)
(* TODO It's kind of weird to store the Iri's in the state, then
 * to take out the iri, turn it into a string, take iriref, and the string
 * from that, glue them together and make an iri out of it. We can also store the
 * string's themselves. *)
let iri_of_resource : Ast.state -> Ast.Iri.t -> Iri.t = fun
  state ast_iri ->
  match ast_iri with
  | Iriref str ->
    str |>
    Iri.of_string
  | Prefixed_name (a, b) ->
    let iri = Ast.SMap.find a state.namespaces in
    let lst = b |> Ast.Iriref.to_string in
    let fst = iri |> Iri.to_string in
    Iri.of_string (fst ^ lst)

(* TODO is this a natural choice? Do we expect a base in all the turtle strings we'll receive? *)
let base_iri =
  "<http://example.org/>"
  |> Iri.of_string

(* TODO should it be this? *)
let base_predicate =
  "http://www.w3.org/1999/02/22-rdf-syntax-ns#type"
  |> Iri.of_string
  |> Triple.Predicate.of_iri

(* TODO should it be this? *)
let nil_obj =
  "http://www.w3.org/1999/02/22-rdf-syntax-ns#nil"
  |> Iri.of_string
  |> Triple.Object.of_iri

let nil_sub =
  "http://www.w3.org/1999/02/22-rdf-syntax-ns#nil"
  |> Iri.of_string
  |> Triple.Subject.of_iri

let first_pred =
  "http://www.w3.org/1999/02/22-rdf-syntax-ns#first"
  |> Iri.of_string
  |> Triple.Predicate.of_iri

let rest_pred =
  "http://www.w3.org/1999/02/22-rdf-syntax-ns#rest"
  |> Iri.of_string
  |> Triple.Predicate.of_iri

(* We separate the cases where the collection is an object, and where it is a subject. *)
(* They have in common that, for the list of objects, a list of bnodes is generated. *)
(* And a list of triples is emitted. This happens in mk_collection. *)
let rec mk_collection : Blank_node.t -> (Ast.state * Graph.t) -> Ast.object' list -> (Ast.state * Graph.t) * Blank_node.t =
  fun bnode (state, g) objs ->
  match objs with
  | [obj] ->
(*     Fmt.pr "Im in mk_collection [x] : hi!@."; *)
    let sub_bnode = Triple.Subject.of_blank_node @@ bnode in
    let (state, g) = insert_pred sub_bnode first_pred (state, g) obj in
    let g = Graph.add (Triple.make sub_bnode rest_pred nil_obj) g in
    let (state, new_bnode) = create_blank_node state in
    (state, g), new_bnode
  | head :: tail ->
(*     Fmt.pr "Im in mk_collection head :: tail: hi!@."; *)
    let sub_bnode = Triple.Subject.of_blank_node @@ bnode in
    let (state, g) = insert_pred sub_bnode first_pred (state, g) head in
    let (state, new_bnode) = create_blank_node state in
    let obj_bnode = Triple.Object.of_blank_node @@ new_bnode in
    let g = Graph.add (Triple.make sub_bnode rest_pred obj_bnode) g in
    mk_collection new_bnode (state, g) tail
  | _ -> raise @@ Invalid_argument "This list cannot be empty."

and mk_collection_sub : Ast.state * Graph.t -> Ast.object' list -> Triple.Subject.t * (Ast.state * Graph.t) =
  fun (state, g) objs ->
(*   Fmt.pr "Im in mk_collection_sub; hi!@."; *)
  match objs with
  | [] -> nil_sub, (state, g)
  | objs ->
    let (state, bnode) = create_blank_node state in
    let (state, g), _bnode = mk_collection bnode (state, g) objs in
    let sub_bnode = Triple.Subject.of_blank_node @@ bnode in
    sub_bnode, (state, g)

and mk_collection_obj : Triple.Subject.t -> Triple.Predicate.t -> Ast.state * Graph.t -> Ast.object' list -> Ast.state * Graph.t =
  fun sub pred (state, g) objs ->
  match objs with
  | [] ->
    let g = Graph.add (Triple.make sub pred nil_obj) g in
    (state, g)
  | objs ->
    let (state, bnode) = create_blank_node state in
    let (state, g), _bnode = mk_collection bnode (state, g) objs in
    let obj_bnode = Triple.Object.of_blank_node @@ bnode in
    let g = Graph.add (Triple.make sub pred obj_bnode) g in
    (state, g)

(*   We match on objects *)
(*   If the object is an Iri, check whether it is a prefixed name and apply the namespaces *)
(*   Add a triple to the graph. *)
(*   If the object is a blank_node, transform to an rdf_blank_node and add to graph. *)
(*   If the object is a literal, transform to an rdf_literal and add to graph *)
(*   If the object is a collection, perform mk_collection with sub, pred, Ast.state * Graph.t. *)
and insert_pred : Triple.Subject.t -> Triple.Predicate.t -> Ast.state * Graph.t -> Ast.object' -> Ast.state * Graph.t =
  fun sub pred (state, g) obj ->
  match obj with
  | Obj_iri i ->
(*     Fmt.pr "Im in insert_pred; hi!@."; *)
    let rdf_obj = Triple.Object.of_iri (iri_of_resource state i) in
    let g = Graph.add (Triple.make sub pred rdf_obj) g in
    (state, g)
  | Obj_blank_node bnode ->
    let rdf_obj =
      bnode
      |> Blank_node.of_string
      |> Triple.Object.of_blank_node
    in
    let g = Graph.add (Triple.make sub pred rdf_obj) g in
    (state, g)
  | Obj_literal { value; datatype; language } ->
    let rdf_iri_datatype = iri_of_resource state datatype in
    let rdf_obj =
      Triple.Object.of_literal
        (Literal.make
           value
           rdf_iri_datatype
           ?language)
    in
    let g = Graph.add (Triple.make sub pred rdf_obj) g in
    (state, g)
  | Obj_coll (Collection objs) -> mk_collection_obj sub pred (state, g) objs
  | Obj_bnodps (BnodPs obj_predobjs) ->
    let (state, bnode) = create_blank_node state in
    let obj_bnode = Triple.Object.of_blank_node @@ bnode in
    let sub_bnode = Triple.Subject.of_blank_node @@ bnode in
    let g = Graph.add (Triple.make sub pred obj_bnode) g in
    let (state, g) = List.fold_left (insert_sub_predobj sub_bnode) (state, g) obj_predobjs in
    (state, g)

and insert_sub_predobj : Triple.Subject.t -> Ast.state * Graph.t -> Ast.Predicate.t * (Ast.object' list) -> Ast.state * Graph.t =
  fun sub (state, g) (pred, objs) ->
  let pred =
    match pred with
    | Pred_iri i -> Triple.Predicate.of_iri @@ (iri_of_resource state i)
    | Pred_a -> base_predicate
  in
  List.fold_left (insert_pred sub pred) (state, g) objs

(* We match on subject.
 * If the subject is an Iri, we apply the function iri_of_resource, which computes a new Iri, using the namespaces.
 * If the subject is a blank_node, we create a subject with that blank node.
 * If the subject is a collection, we apply the mk_collection_sub function.
 * If the subject is a BnodPs of sub_predobjs, we create a fresh blank_node,
 * an corresponding subject sub_bnode, and fold with insert_sub_predobjs sub_bnode sub_predobjs.
 * After this match, we fold over preobjs *)
let insert_sub_predobjs : Ast.state * Graph.t -> Ast.subject -> Ast.predobjs -> Ast.state * Graph.t =
  fun (state, g) sub predobjs ->
  let sub, (state, g) =
    match sub with
    | Sub_iri i -> Triple.Subject.of_iri (iri_of_resource state i), (state, g)
    | Sub_blank_node b -> (Triple.Subject.of_blank_node @@ Blank_node.of_string b), (state, g)
    | Sub_coll (Collection objs) -> mk_collection_sub (state, g) objs
    | Sub_bnodps (BnodPs sub_predobjs) ->
      let (state, bnode) = create_blank_node state in
      let sub_bnode = Triple.Subject.of_blank_node @@ bnode in
      let (state, g) = List.fold_left (insert_sub_predobj sub_bnode) (state, g) sub_predobjs in
      sub_bnode, (state, g)
  in
  List.fold_left (insert_sub_predobj sub) (state, g) predobjs

(* Statements can be either a Directive or a Triple.
 * If it is a Directive, the prefix or base is included in the state.
 * If its a Triple of SubjPredObjs, the function insert_sub_predobjs is applied
 * If it is a Triple of BnodPs of predobjs, a fresh blank_node is created, and from that a subject sub_bnode,
 * and then we fold over the list predobjs, with the function insert_sub_predobj and accumulator sub_bnode.*)
let apply_statement : Ast.state * Graph.t -> Ast.Statement.t -> Ast.state * Graph.t =
  fun (state, g) statement -> match statement with
    | Directive (PrefixID (s, iriref)) ->
      let iri = iri_of_iriref iriref in
      let state = { state with namespaces = Ast.SMap.add s iri state.namespaces } in
      (state, g)
    | Directive (Base iriref) ->
      let iri =
        iriref
        |> Ast.Iriref.to_string
        |> Iri.of_string
      in
      let state = { state with base_uri = iri } in
      (state, g)
    | Triple (SubjPredObjs (sub, predobjs)) ->
      insert_sub_predobjs (state, g) sub predobjs
    (*  TODO, double BnodPs is weird *)
    | Triple (BnodPs (BnodPs predobjs)) ->
      let (state, bnode) = create_blank_node state in
      let sub_bnode = Triple.Subject.of_blank_node @@ bnode in
      List.fold_left (insert_sub_predobj sub_bnode) (state, g) predobjs

(* A turtle element is in fact a list of statements. Here, we fold over this list, with the function apply_statements *)
let apply_statements : Ast.state * Graph.t -> Ast.Turtle.t -> Ast.state * Graph.t =
  fun (state, g) statements ->
  List.fold_left apply_statement (state, g) statements

(* This function is in quite some places. *)
let parse p =
  Angstrom.parse_string
    ~consume:Angstrom.Consume.All
    p

(* This function accepts a string, tries to parse it as a turtle string, and, if this succeeds, it transforms
 * the resulting instance of a turtle type into an Rdf graph. *)
let decode str =
(*  This 'string -> iri' map is needed because of the default datatype 'xsd:string' in literals *)
  let xsd_iri =
    "http://www.w3.org/2001/XMLSchema#"
    |> Rdf.Iri.of_string in
  let namespaces = Ast.SMap.add "xsd" xsd_iri Ast.SMap.empty in
  let state : Ast.state = {
    base_uri = base_iri;
    namespaces = namespaces;
    bnode_counter = 1;
  } in
  let g = Graph.empty in
  str
  |> parse Parser.turtle
  |> (function
      | Ok x ->
        apply_statements (state, g) x
        |> (fun (_state, g) -> g)
      | Error e -> raise (Failure (Fmt.str "eror: %s@." e)))


let encode graph =
  let encode_iri iri =
    Ast.Iri.of_iriref @@ Iri.to_string iri
  in
  let encode_subject subject =
    Triple.Subject.map subject
      (fun iri -> Ast.Sub_iri (encode_iri iri))
      (fun bnode -> Ast.Sub_blank_node (Blank_node.identifier bnode))
  in
  let encode_predicate predicate =
    Triple.Predicate.map predicate
      (fun iri -> Ast.Predicate.of_iri (encode_iri iri))
  in
  let encode_object object' =
    Triple.Object.map object'
      (fun iri -> Ast.Obj_iri (encode_iri iri))
      (fun bnode -> Ast.Obj_blank_node (Blank_node.identifier bnode))
      (fun literal -> Ast.Obj_literal (Ast.Literal.make
                                         (Literal.canonical literal)
                                         (encode_iri (Literal.datatype literal))))
  in
  let encode_objects os_seq =
    os_seq
    |> Seq.map encode_object
    |> List.of_seq
  in
  let encode_pos pos_seq =
    pos_seq
    |> Seq.map (fun (p, os_seq) ->
        encode_predicate p,
        encode_objects os_seq)
    |> List.of_seq
  in
  Graph.to_nested_seq graph
  |> Seq.map (fun (subject, pos_seq) ->
      Ast.Triple .of_subject_and_predobjs
        (encode_subject subject)
        (encode_pos pos_seq)
      |> Ast.Statement.of_triple)
  |> List.of_seq
  |> Fmt.str "%a" Ast.Turtle.pp