summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorpukkamustard <pukkamustard@posteo.net>2020-09-07 17:52:39 +0200
committerpukkamustard <pukkamustard@posteo.net>2020-09-07 17:52:39 +0200
commit0d365c6e13b21ac74293968a017e9cba71d8304d (patch)
tree5b742423027bf8fdd566cd9cc8e9c752e6aa914b
parent9d8b083dd8897c55c08f9faa3b2dad0e71109fae (diff)
(schemantic fragment-graph): add CSexp encoding
-rw-r--r--Makefile.am2
-rw-r--r--hall.scm2
-rw-r--r--schemantic/fragment-graph.scm44
-rw-r--r--schemantic/fragment-graph/csexp.scm193
-rw-r--r--schemantic/fragment-graph/radix-sort.scm50
-rw-r--r--schemantic/rdf/lang-string.scm4
6 files changed, 292 insertions, 3 deletions
diff --git a/Makefile.am b/Makefile.am
index f19277c..147e411 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -45,6 +45,8 @@ SOURCES = schemantic.scm \
schemantic/rdf.scm \
schemantic/literal.scm \
schemantic/fragment-graph.scm \
+ schemantic/fragment-graph/csexp.scm \
+ schemantic/fragment-graph/radix-sort.scm \
schemantic/ns.scm
TESTS = tests/schemantic/graph/vhash.scm \
diff --git a/hall.scm b/hall.scm
index 86b6f4c..c8fdce3 100644
--- a/hall.scm
+++ b/hall.scm
@@ -29,6 +29,8 @@
(scheme-file "rdf")
(scheme-file "literal")
(scheme-file "fragment-graph")
+ (directory "fragment-graph" ((scheme-file "csexp")
+ (scheme-file "radix-sort")))
(scheme-file "ns")))))
(tests ((directory
"tests"
diff --git a/schemantic/fragment-graph.scm b/schemantic/fragment-graph.scm
index d040a04..3a1a528 100644
--- a/schemantic/fragment-graph.scm
+++ b/schemantic/fragment-graph.scm
@@ -4,6 +4,8 @@
(define-module (schemantic fragment-graph)
#:use-module (schemantic rdf)
+ #:use-module (schemantic fragment-graph csexp)
+ #:use-module (schemantic fragment-graph radix-sort)
#:use-module (schemantic graph vhash index)
#:use-module (oop goops)
@@ -18,6 +20,7 @@
fragment-graph?
make-fragment-graph
fragment-graph-base-subject
+ fragment-graph->csexp
<fragment-reference>
fragment-reference?
@@ -113,7 +116,8 @@
(index-add (fragment-statements fg)
(list fr
(term->fragment-reference (fragment-graph-base-subject fg) p)
- (term->fragment-reference (fragment-graph-base-subject fg) o)))))
+ (term->fragment-reference (fragment-graph-base-subject fg) o))))
+ fg)
;; catch all method that decides if triple should be added as statement or fragment-statement
(define-method (graph-add (fg <fragment-graph>) (s <term>) (p <term>) (o <term>))
@@ -182,3 +186,41 @@
(define-method (graph-query (fg <fragment-graph>) (s <string>))
(make <graph-query> #:graph fg #:s (make-fragment-reference s)))
+
+
+;; CSexp encoding
+
+(define (term->csexp term)
+ (cond
+
+ ((iri? term) (iri-value term))
+
+ ((fragment-reference? term) `(f ,(fragment-reference-id term)))
+
+ ((is-a? term <rdf:langString>)
+ `(l ,(literal-canonical term)
+ ,(term->csexp (literal-datatype term))
+ ,(literal-language term)))
+
+ ((literal? term) `(l ,(literal-canonical term) ,(term->csexp (literal-datatype term))))))
+
+(define (fragment-graph->csexp fg)
+ (let* ((statements
+ (map (match-lambda ((p o)
+ `(s ,(term->csexp p) ,(term->csexp o))))
+ (index-match (statements fg) (list (lvar) (lvar)))))
+ (fragment-statements
+ (map (match-lambda ((f p o)
+ `(fs ,(term->csexp f)
+ ,(term->csexp p)
+ ,(term->csexp o))))
+ (index-match (fragment-statements fg)
+ (list (lvar) (lvar) (lvar)))))
+ (sorted-statements
+ (map bytevector->csexp
+ (radix-sort
+ (map csexp->bytevector
+ (append statements fragment-statements))))))
+
+ (csexp->bytevector
+ (cons 'rdf sorted-statements))))
diff --git a/schemantic/fragment-graph/csexp.scm b/schemantic/fragment-graph/csexp.scm
new file mode 100644
index 0000000..ff21194
--- /dev/null
+++ b/schemantic/fragment-graph/csexp.scm
@@ -0,0 +1,193 @@
+; SPDX-FileCopyrightText: 2020 pukkamustard <pukkamustard@posteo.net>
+;
+; SPDX-License-Identifier: GPL-3.0-or-later
+
+(define-module (schemantic fragment-graph csexp)
+ #:use-module (rnrs bytevectors)
+ #:use-module (ice-9 binary-ports)
+ #:use-module (ice-9 exceptions)
+ #:export (put-csexp
+ get-csexp
+ csexp->bytevector
+ csexp->string
+ bytevector->csexp))
+
+;; Cannonical S-Expressions
+;; See https://people.csail.mit.edu/rivest/Sexp.txt
+
+;; Error handling
+
+(define &csexp-parse-error
+ (make-exception-type "csexp-parse-error"
+ &external-error
+ '()))
+
+(define (make-csexp-parse-error msg)
+ (make-exception
+ ((record-constructor &csexp-parse-error))
+ (make-exception-with-message msg)))
+
+(define &csexp-encode-error
+ (make-exception-type "csexp-encode-error"
+ &programming-error
+ '(expression)))
+
+(define (make-csexp-encode-error expression)
+ ((record-constructor &csexp-encode-error) expression))
+
+;; Helpers for constants
+
+(define open-parens-code
+ (char->integer #\())
+
+(define (open-parens-code? byte)
+ (= byte open-parens-code))
+
+(define close-parens-code
+ (char->integer #\)))
+
+(define (close-parens-code? byte)
+ (= byte close-parens-code))
+
+(define colon-code
+ (char->integer #\:))
+
+(define (colon-code? byte)
+ (= byte colon-code))
+
+(define (is-digit? byte)
+ (if (number? byte)
+ (char-numeric? (integer->char byte))
+ #f))
+
+
+;; Parse CSexp
+
+(define (get-number port)
+ "Read a decimal number from port"
+ (let ((digits '()))
+
+ (while (is-digit? (lookahead-u8 port))
+ (set! digits (cons
+ (integer->char (get-u8 port))
+ digits)))
+
+ (string->number
+ (reverse-list->string digits))))
+
+(define (expect-colon port)
+ "Read a colon from port or raise an exception"
+ (let ((byte (get-u8 port)))
+ (if (colon-code? byte)
+ #t
+ (raise-continuable (make-csexp-parse-error "expecting colon")))))
+
+(define (get-netstring port)
+ "Get a netstring from the port"
+ (let ((n (get-number port)))
+ (if n
+ (begin
+ (expect-colon port)
+ (get-bytevector-n port n))
+ #f)))
+
+(define (expect-open-parens port)
+ "Read an opening parens from port or raise an exception"
+ (let ((byte (get-u8 port)))
+ (if (open-parens-code? byte)
+ #t
+ (raise-continuable (make-csexp-parse-error "expecting open parens")))))
+
+(define (expect-close-parens port)
+ "Read a closing parens from port or raise an exception"
+ (let ((byte (get-u8 port)))
+ (if (close-parens-code? byte)
+ #t
+ (raise-continuable (make-csexp-parse-error "expecting close parens")))))
+
+(define (get-expression port)
+ "Get an expression which is either a list of expressions or a bytevector"
+ (let ((lookahead (lookahead-u8 port)))
+ (cond
+
+ ;; an opening parens -> get a list
+ ((open-parens-code? lookahead) (get-list port))
+
+ ;; a numeric value -> a bytevector
+ ((is-digit? lookahead) (get-netstring port))
+
+ ;; else something went wrong
+ (raise-continuable
+ (make-csexp-parse-error "expecting numeric value or open parens")))))
+
+(define (get-list port)
+ "Get a list of expressions"
+
+ ;; initialize emtpy list
+ (let ((lst '()))
+
+ ;; get openeing parens
+ (expect-open-parens port)
+
+ ;; loop over port while lookahead is not closing parens
+ (while (not (close-parens-code? (lookahead-u8 port)))
+
+ ;; get the expression and add to list
+ (set! lst (cons (get-expression port) lst)))
+
+ ;; get closing parens
+ (expect-close-parens port)
+
+ ;; return list in proper order
+ (reverse lst)))
+
+;; Encoder
+
+(define (put-netstring port bv)
+ "Write bytevector to port as netstring"
+ (put-bytevector port
+ (string->utf8
+ (string-append (number->string (bytevector-length bv)) ":")))
+ (put-bytevector port bv))
+
+(define (put-expression port exp)
+ "Write encoded expression on port"
+ (cond
+ ((bytevector? exp) (put-netstring port exp))
+ ((list? exp) (put-list port exp))
+ ((string? exp) (put-expression port (string->utf8 exp)))
+ ((symbol? exp) (put-expression port (symbol->string exp)))
+ ((number? exp) (put-expression port (number->string exp)))
+ (else (raise-continuable (make-csexp-encode-error exp)))))
+
+(define (put-list port lst)
+ "Write encoded list to port"
+ (put-u8 port open-parens-code)
+ (map (lambda (el) (put-expression port el)) lst)
+ (put-u8 port close-parens-code))
+
+;; Interface
+
+(define (put-csexp port exp)
+ "Write expression to port as cannonical s-expression"
+ (put-expression port exp))
+
+(define (get-csexp port)
+ "Read cannonical s-expression from port"
+ (get-expression port))
+
+(define (csexp->bytevector exp)
+ "Returns bytevector with expression encoded as csexp"
+ (call-with-values
+ (lambda () (open-bytevector-output-port))
+ (lambda (port get-bytevector)
+ (put-csexp port exp)
+ (get-bytevector))))
+
+(define (csexp->string exp)
+ "Returns string with encoded csexp"
+ (utf8->string (csexp->bytevector exp)))
+
+(define (bytevector->csexp bv)
+ "Read csexp from bytevector"
+ (get-csexp (open-bytevector-input-port bv)))
diff --git a/schemantic/fragment-graph/radix-sort.scm b/schemantic/fragment-graph/radix-sort.scm
new file mode 100644
index 0000000..39683d1
--- /dev/null
+++ b/schemantic/fragment-graph/radix-sort.scm
@@ -0,0 +1,50 @@
+; SPDX-FileCopyrightText: 2020 pukkamustard <pukkamustard@posteo.net>
+;
+; SPDX-License-Identifier: GPL-3.0-or-later
+
+(define-module (schemantic fragment-graph radix-sort)
+ #:use-module (srfi srfi-1)
+ #:use-module (rnrs bytevectors)
+ #:export (radix-sort))
+
+
+(define (max-length bvs)
+ (fold
+ (lambda (bv cmax) (max (bytevector-length bv) cmax))
+ 0 bvs))
+
+(define (make-buckets)
+ (make-array '() 256))
+
+(define (collect-buckets buckets)
+ (apply append (map reverse (array->list buckets))))
+
+(define (clear-buckets buckets)
+ (array-fill! buckets '()))
+
+(define (to-bucket buckets v bv)
+ (array-set! buckets
+ (cons bv (array-ref buckets v))
+ v))
+
+(define (byte-ref bv i)
+ (if (< i (u8vector-length bv))
+ (u8vector-ref bv i) 0))
+
+(define (radix-sort bvs)
+ (let ((buckets (make-buckets)))
+ (let loop ((i (max-length bvs))
+ (bvs bvs))
+ (if (<= 0 i)
+ (begin
+ ;; clear buckets from last iteration
+ (clear-buckets buckets)
+ ;; place bytevectors in buckets according to value at position i
+ (for-each
+ (lambda (bv) (to-bucket buckets (byte-ref bv i) bv)) bvs)
+ ;; loop on next position
+ (loop (1- i)
+ (collect-buckets buckets)))
+ ;; done return the sorted bytevectors
+ bvs))))
+
diff --git a/schemantic/rdf/lang-string.scm b/schemantic/rdf/lang-string.scm
index 7468a40..a3d1da7 100644
--- a/schemantic/rdf/lang-string.scm
+++ b/schemantic/rdf/lang-string.scm
@@ -15,11 +15,11 @@
(language #:init-keyword #:language #:getter literal-language))
(define-method (literal-datatype (l <rdf:langString>)) (rdf "langString"))
+(define-method (literal-lexical (l <rdf:langString>)) (literal-value l))
+(define-method (literal-canonical (l <rdf:langString>)) (literal-value l))
(define* (make-lang-string value #:key language)
(make <rdf:langString> #:value value #:language language))
(define-method (write (self <literal>) port)
(format port "<rdf:langString \"~a\"@~a>" (literal-value self) (literal-language self)))
-
-(make-lang-string "hello" #:language "en")