aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorpukkamustard <pukkamustard@posteo.net>2020-10-23 16:15:48 +0200
committerpukkamustard <pukkamustard@posteo.net>2020-10-23 16:16:22 +0200
commit2b03565f2f4e0f5820af518edabb07d8b19c44d6 (patch)
tree895dfa1cb322d296e1a15217edcad2c85990eced
parent25986f98d606f3b0980bfc5f7da54cd3877ce373 (diff)
eris.adoc: pseudocode
-rw-r--r--doc/diagrams.org30
-rw-r--r--doc/eris-merkle-tree.svg30
-rw-r--r--doc/eris.adoc185
-rw-r--r--public/index.html375
4 files changed, 407 insertions, 213 deletions
diff --git a/doc/diagrams.org b/doc/diagrams.org
index 4d372ac..f814c20 100644
--- a/doc/diagrams.org
+++ b/doc/diagrams.org
@@ -32,14 +32,14 @@ digraph {
rk_0_0 [shape=record, color=yellow, style=filled, label="r | k"];
};
- rk_0_0 -> block_0;
- rk_0_1 -> block_1;
- rk_0_2 -> block_2;
- rk_0_3 -> block_3;
- rk_0_4 -> block_4;
- rk_0_5 -> block_5;
- rk_0_6 -> block_6;
- rk_0_7 -> block_7;
+ rk_0_0 -> block_0 [style=dotted];
+ rk_0_1 -> block_1 [style=dotted];
+ rk_0_2 -> block_2 [style=dotted];
+ rk_0_3 -> block_3 [style=dotted];
+ rk_0_4 -> block_4 [style=dotted];
+ rk_0_5 -> block_5 [style=dotted];
+ rk_0_6 -> block_6 [style=dotted];
+ rk_0_7 -> block_7 [style=dotted];
subgraph cluster__level_1 {
style=dotted;
@@ -70,10 +70,10 @@ digraph {
node_1_3 -> rk_0_6;
node_1_3 -> rk_0_7;
- rk_1_0 -> node_1_0;
- rk_1_1 -> node_1_1;
- rk_1_2 -> node_1_2;
- rk_1_3 -> node_1_3;
+ rk_1_0 -> node_1_0 [style=dotted];
+ rk_1_1 -> node_1_1 [style=dotted];
+ rk_1_2 -> node_1_2 [style=dotted];
+ rk_1_3 -> node_1_3 [style=dotted];
subgraph cluster__level_2 {
style=dotted;
@@ -94,8 +94,8 @@ digraph {
node_2_1 -> rk_1_2;
node_2_1 -> rk_1_3;
- rk_2_0 -> node_2_0;
- rk_2_1 -> node_2_1;
+ rk_2_0 -> node_2_0 [style=dotted];
+ rk_2_1 -> node_2_1 [style=dotted];
subgraph cluster__level_3 {
style=dotted;
@@ -111,7 +111,7 @@ digraph {
node_3_0 -> rk_2_1;
node_3_0 -> rk_2_0;
- rk_3_0 -> node_3_0;
+ rk_3_0 -> node_3_0 [style=dotted];
}
#+END_SRC
diff --git a/doc/eris-merkle-tree.svg b/doc/eris-merkle-tree.svg
index 3133632..8008cf7 100644
--- a/doc/eris-merkle-tree.svg
+++ b/doc/eris-merkle-tree.svg
@@ -88,7 +88,7 @@
<!-- rk_0_7&#45;&gt;block_7 -->
<g id="edge8" class="edge">
<title>rk_0_7&#45;&gt;block_7</title>
-<path fill="none" stroke="black" d="M643.66,-88.43C644.87,-80.67 646.34,-71.28 647.7,-62.56"/>
+<path fill="none" stroke="black" stroke-dasharray="1,5" d="M643.66,-88.43C644.87,-80.67 646.34,-71.28 647.7,-62.56"/>
<polygon fill="black" stroke="black" points="651.2,-62.82 649.29,-52.4 644.29,-61.74 651.2,-62.82"/>
</g>
<!-- rk_0_6 -->
@@ -102,7 +102,7 @@
<!-- rk_0_6&#45;&gt;block_6 -->
<g id="edge7" class="edge">
<title>rk_0_6&#45;&gt;block_6</title>
-<path fill="none" stroke="black" d="M558.42,-88.43C559.52,-80.67 560.85,-71.28 562.09,-62.56"/>
+<path fill="none" stroke="black" stroke-dasharray="1,5" d="M558.42,-88.43C559.52,-80.67 560.85,-71.28 562.09,-62.56"/>
<polygon fill="black" stroke="black" points="565.59,-62.79 563.53,-52.4 558.66,-61.81 565.59,-62.79"/>
</g>
<!-- rk_0_5 -->
@@ -116,7 +116,7 @@
<!-- rk_0_5&#45;&gt;block_5 -->
<g id="edge6" class="edge">
<title>rk_0_5&#45;&gt;block_5</title>
-<path fill="none" stroke="black" d="M473.94,-88.43C474.82,-80.67 475.88,-71.28 476.87,-62.56"/>
+<path fill="none" stroke="black" stroke-dasharray="1,5" d="M473.94,-88.43C474.82,-80.67 475.88,-71.28 476.87,-62.56"/>
<polygon fill="black" stroke="black" points="480.38,-62.73 478.03,-52.4 473.42,-61.94 480.38,-62.73"/>
</g>
<!-- rk_0_4 -->
@@ -130,7 +130,7 @@
<!-- rk_0_4&#45;&gt;block_4 -->
<g id="edge5" class="edge">
<title>rk_0_4&#45;&gt;block_4</title>
-<path fill="none" stroke="black" d="M390.97,-88.43C391.41,-80.67 391.94,-71.28 392.44,-62.56"/>
+<path fill="none" stroke="black" stroke-dasharray="1,5" d="M390.97,-88.43C391.41,-80.67 391.94,-71.28 392.44,-62.56"/>
<polygon fill="black" stroke="black" points="395.94,-62.58 393.01,-52.4 388.95,-62.18 395.94,-62.58"/>
</g>
<!-- rk_0_3 -->
@@ -144,7 +144,7 @@
<!-- rk_0_3&#45;&gt;block_3 -->
<g id="edge4" class="edge">
<title>rk_0_3&#45;&gt;block_3</title>
-<path fill="none" stroke="black" d="M310.27,-88.43C309.94,-80.67 309.54,-71.28 309.17,-62.56"/>
+<path fill="none" stroke="black" stroke-dasharray="1,5" d="M310.27,-88.43C309.94,-80.67 309.54,-71.28 309.17,-62.56"/>
<polygon fill="black" stroke="black" points="312.66,-62.24 308.74,-52.4 305.67,-62.54 312.66,-62.24"/>
</g>
<!-- rk_0_2 -->
@@ -158,7 +158,7 @@
<!-- rk_0_2&#45;&gt;block_2 -->
<g id="edge3" class="edge">
<title>rk_0_2&#45;&gt;block_2</title>
-<path fill="none" stroke="black" d="M228.06,-88.43C227.18,-80.67 226.12,-71.28 225.13,-62.56"/>
+<path fill="none" stroke="black" stroke-dasharray="1,5" d="M228.06,-88.43C227.18,-80.67 226.12,-71.28 225.13,-62.56"/>
<polygon fill="black" stroke="black" points="228.58,-61.94 223.97,-52.4 221.62,-62.73 228.58,-61.94"/>
</g>
<!-- rk_0_1 -->
@@ -172,7 +172,7 @@
<!-- rk_0_1&#45;&gt;block_1 -->
<g id="edge2" class="edge">
<title>rk_0_1&#45;&gt;block_1</title>
-<path fill="none" stroke="black" d="M144.34,-88.43C143.13,-80.67 141.66,-71.28 140.3,-62.56"/>
+<path fill="none" stroke="black" stroke-dasharray="1,5" d="M144.34,-88.43C143.13,-80.67 141.66,-71.28 140.3,-62.56"/>
<polygon fill="black" stroke="black" points="143.71,-61.74 138.71,-52.4 136.8,-62.82 143.71,-61.74"/>
</g>
<!-- rk_0_0 -->
@@ -186,7 +186,7 @@
<!-- rk_0_0&#45;&gt;block_0 -->
<g id="edge1" class="edge">
<title>rk_0_0&#45;&gt;block_0</title>
-<path fill="none" stroke="black" d="M55.31,-88.43C54.53,-80.67 53.6,-71.28 52.74,-62.56"/>
+<path fill="none" stroke="black" stroke-dasharray="1,5" d="M55.31,-88.43C54.53,-80.67 53.6,-71.28 52.74,-62.56"/>
<polygon fill="black" stroke="black" points="56.2,-62 51.73,-52.4 49.23,-62.7 56.2,-62"/>
</g>
<!-- node_1_0 -->
@@ -272,7 +272,7 @@
<!-- rk_1_3&#45;&gt;node_1_3 -->
<g id="edge20" class="edge">
<title>rk_1_3&#45;&gt;node_1_3</title>
-<path fill="none" stroke="black" d="M458,-245.31C458,-237.29 458,-227.55 458,-218.57"/>
+<path fill="none" stroke="black" stroke-dasharray="1,5" d="M458,-245.31C458,-237.29 458,-227.55 458,-218.57"/>
<polygon fill="black" stroke="black" points="461.5,-218.53 458,-208.53 454.5,-218.53 461.5,-218.53"/>
</g>
<!-- rk_1_2 -->
@@ -286,7 +286,7 @@
<!-- rk_1_2&#45;&gt;node_1_2 -->
<g id="edge19" class="edge">
<title>rk_1_2&#45;&gt;node_1_2</title>
-<path fill="none" stroke="black" d="M386,-245.31C386,-237.29 386,-227.55 386,-218.57"/>
+<path fill="none" stroke="black" stroke-dasharray="1,5" d="M386,-245.31C386,-237.29 386,-227.55 386,-218.57"/>
<polygon fill="black" stroke="black" points="389.5,-218.53 386,-208.53 382.5,-218.53 389.5,-218.53"/>
</g>
<!-- rk_1_1 -->
@@ -300,7 +300,7 @@
<!-- rk_1_1&#45;&gt;node_1_1 -->
<g id="edge18" class="edge">
<title>rk_1_1&#45;&gt;node_1_1</title>
-<path fill="none" stroke="black" d="M314,-245.31C314,-237.29 314,-227.55 314,-218.57"/>
+<path fill="none" stroke="black" stroke-dasharray="1,5" d="M314,-245.31C314,-237.29 314,-227.55 314,-218.57"/>
<polygon fill="black" stroke="black" points="317.5,-218.53 314,-208.53 310.5,-218.53 317.5,-218.53"/>
</g>
<!-- rk_1_0 -->
@@ -314,7 +314,7 @@
<!-- rk_1_0&#45;&gt;node_1_0 -->
<g id="edge17" class="edge">
<title>rk_1_0&#45;&gt;node_1_0</title>
-<path fill="none" stroke="black" d="M242,-245.31C242,-237.29 242,-227.55 242,-218.57"/>
+<path fill="none" stroke="black" stroke-dasharray="1,5" d="M242,-245.31C242,-237.29 242,-227.55 242,-218.57"/>
<polygon fill="black" stroke="black" points="245.5,-218.53 242,-208.53 238.5,-218.53 245.5,-218.53"/>
</g>
<!-- node_2_1 -->
@@ -364,7 +364,7 @@
<!-- rk_2_1&#45;&gt;node_2_1 -->
<g id="edge26" class="edge">
<title>rk_2_1&#45;&gt;node_2_1</title>
-<path fill="none" stroke="black" d="M386,-402.31C386,-394.29 386,-384.55 386,-375.57"/>
+<path fill="none" stroke="black" stroke-dasharray="1,5" d="M386,-402.31C386,-394.29 386,-384.55 386,-375.57"/>
<polygon fill="black" stroke="black" points="389.5,-375.53 386,-365.53 382.5,-375.53 389.5,-375.53"/>
</g>
<!-- rk_2_0 -->
@@ -378,7 +378,7 @@
<!-- rk_2_0&#45;&gt;node_2_0 -->
<g id="edge25" class="edge">
<title>rk_2_0&#45;&gt;node_2_0</title>
-<path fill="none" stroke="black" d="M314,-402.31C314,-394.29 314,-384.55 314,-375.57"/>
+<path fill="none" stroke="black" stroke-dasharray="1,5" d="M314,-402.31C314,-394.29 314,-384.55 314,-375.57"/>
<polygon fill="black" stroke="black" points="317.5,-375.53 314,-365.53 310.5,-375.53 317.5,-375.53"/>
</g>
<!-- node_3_0 -->
@@ -410,7 +410,7 @@
<!-- rk_3_0&#45;&gt;node_3_0 -->
<g id="edge29" class="edge">
<title>rk_3_0&#45;&gt;node_3_0</title>
-<path fill="none" stroke="black" d="M350,-559.31C350,-551.29 350,-541.55 350,-532.57"/>
+<path fill="none" stroke="black" stroke-dasharray="1,5" d="M350,-559.31C350,-551.29 350,-541.55 350,-532.57"/>
<polygon fill="black" stroke="black" points="353.5,-532.53 350,-522.53 346.5,-532.53 353.5,-532.53"/>
</g>
</g>
diff --git a/doc/eris.adoc b/doc/eris.adoc
index b208423..89b7c2f 100644
--- a/doc/eris.adoc
+++ b/doc/eris.adoc
@@ -94,7 +94,7 @@ We use a byte padding scheme to ensure that input content size is a multiple of
`PAD(INPUT,BLOCK-SIZE)` :: For `INPUT` of size `n` adds a mandatory byte valued `0x80` (hexadecimal) to `INPUT` followed by `m < BLOCK-SIZE - 1` bytes valued `0x00` such that `n + m + 1` is a multiple of `BLOCK-SIZE`.
`UNPAD(INPUT,BLOCK-SIZE)` :: Starts reading bytes from the end of `INPUT` until a `0x80` is read and then returns bytes of `INPUT` before the `0x80`. Throws an error if a value other than `0x00` is read before reading `0x80` or if no `0x80` is read after reading `BLOCK-SIZE - 1` bytes from the end.
-This is the padding algorithm implemented in https://libsodium.gitbook.io/doc/padding[libsodium]footnote:[Also as apparently specified in ISO/IEC 7816-4, which however is not openly available. Fuck you ISO.].
+This is the padding algorithm implemented in https://libsodium.gitbook.io/doc/padding[libsodium]footnote:[This padding algorithm is apparently also specified in ISO/IEC 7816-4. However, the speicifcation is not openly available. Fuck you ISO.].
=== Block Size
@@ -110,23 +110,79 @@ The block size is encoded in the read capability and the decoding process is cap
====
When using block size 32Kb to encode content smaller than 1Kb, the content will be encoded in a 32Kb block. This is a storage overhead of over 3100%. When encoding very many pieces of small content (e.g. short messages or cartographic nodes) this overhead is not acceptable.
-On the other hand, using small block sizes increases the number of internal nodes that must be used to encode the content (see <<_merkle_tree>>). When encoding larger content it is more efficient to use a block size of 32Kb.
+On the other hand, using small block sizes increases the number of internal nodes that must be used to encode the content (see <<_collect_reference_key_pairs_in_nodes>>). When encoding larger content it is more efficient to use a block size of 32Kb.
====
+=== Convergence Secret
+
+Using the hash of the content as key is called _convergent encryption_.
+
+Because the hash of the content is deterministically computed from the content, the key will be the same when the same content is encoded twice. This results in de-duplication of content. Convergent encryption suffers from two known attacks: The Confirmation Of A File Attack and The Learn-The-Remaining-Information Attack <<Zooko2008>>. A defense against both attacks is to use a _convergence secret_. This results in different encoding of the same content with different convergence secret.
+
+If no convergence secret is specified a null convergence secret is used (32 bytes of zeroes).
+
+The convergence secret is implemented as the keying feature of the Blake2 cryptographic hash <<RFC7693>>.
+
=== Encoding
-An overview of how content (sequence of bytes) is encoded using ERIS:
+Inputs to the encoding process are:
+
+`CONTENT` :: An arbitary length byte sequence of content to be encoded.
+`CONVERGENCE-SECRET` :: A 256 bit (32 byte) byte sequence (see <<_convergence_secret>>).
+`BLOCK-SIZE` :: The block size used for encoding in bytes can be either 1024 (1Kb) or 32768 (32Kb) (see <<_block_size>>).
+
+Content is encoded by first splitting into uniformly sized blocks, encrypting the blocks and computing references to the blocks. If there are multiple references to blocks they are collected in nodes that have the same size as content blocks. The nodes are encrypted and references to the nodes are computed. This process is repeated until there is a single root reference.
+
+References to nodes and blocks of content consist of a reference to an encrypted block and a key to decrypt the block - a _reference-key pair_. The process of encrypting a block and computing a reference-key pair is explained in <<_encrypt_block_and_compute_reference_key_pair>>.
+
+The encoding process constructs a tree of reference-key pairs that reference nodes that hold references to nodes of a lower level or to content.
+
+The number of reference-key pairs collected into a node is called the _arity_ of the tree and depends on the block size. For block size 1Kb the arity of the tree is 16, for block size 32Kb the arity is 512.
+
+An encoding of a content that is split into eight blocks is depicted in <<figure_merkle_tree>>. For illustration purposes the tree is of arity 2 (instead of 16 or 512).
+
+[[figure_merkle_tree]]
+.Encoding of content as tree. Solid edges are concatenations of reference-key pairs as described in <<_collect_reference_key_pairs_in_nodes>>. Dotted edges are encryption and computation of reference-key pairs as described in <<_encrypt_block_and_compute_reference_key_pair>>.
+image::eris-merkle-tree.svg[Merkle Tree,opts=inline]
+
+The block-size, the level of the root reference and the root reference-key pair itself are the necessary pieces of information required to decode content. The tuple consisting of block size, level, root reference and key is called the _read capability_.
+
+The encrypted blocks and the read capability are the outputs of the encoding process.
-1. Split content into unencrypted blocks of size block size (see <<_splitting_input_content_into_blocks>>).
-2. Encrypt unencrypted block and compute reference to encrypted block. This is a fundamental operation during encoding and described in section <<_reference_key_pair>>.
-3. Build a tree of nodes containing references to blocks by (see <<_merkle_tree>>):
- - Collect references to encrypted blocks in a node of size block size
- - Encrypt node and compute reference in the same way as for data blocks.
- - Recursively collect references in nodes of higher level until there is only a single root node.
+A pseudo-code implementation of the encoding process:
-The input to the encoding process is the content as sequence of bytes and an optional convergence secret (see <<_convergence_secret>>).
+[source,pseudocode]
+----
+ERIS-Encode(CONTENT, CONVERGENCE-SECRET, BLOCK-SIZE):
+ // initialize empty list of blocks to be output
+ BLOCKS := []
-The output of the encoding process is a sequence of encrypted blocks (of size block size) and a root reference-key pair.
+ // initialize level to 0
+ LEVEL := 0
+
+ // split the input content into uniformly sized blocks and encode
+ LEVEL-0-BLOCKS, RK-PAIRS := Split-Content(CONTENT, BLOCK-SIZE)
+
+ // add blocks from level 0 to blocks to be output
+ BLOCKS := BLOCKS ++ LEVEL-0-BLOCKS
+
+ // loop until there is a single root reference
+ WHILE Length(RK-PAIRS) > 1:
+ LEVEL-BLOCKS, RK-Pairs := Collect-RK-Pairs(RK-PAIRS, CONVERGENCE-SECRET, BLOCK-SIZE)
+
+ // add blocks to blocks to be output and increase the level counter
+ BLOCKS := BLOCKS ++ LEVEL-BLOCKS
+ LEVEL := LEVEL + 1
+
+ // extract the root reference-key pair
+ ROOT-RK-PAIR := RK-PAIRS[0]
+ ROOT-REFERENCE, ROOT-KEY := ROOT-RK-PAIR
+
+ // return blocks and read-capability
+ RETURN BLOCKS, BLOCK-SIZE, LEVEL, ROOT-REFERENCE, ROOT-KEY
+----
+
+The sub-process `Split-Content` and `Collect-RK-Pairs` are explained in the following sections.
==== Splitting Input Content into Blocks
@@ -134,53 +190,101 @@ Input content is split into blocks of size at most block size such that only the
The last content block is always padded according to the padding algorithm to block size. If the size of the padded last block is larger than block size it is split into content blocks of block size.
-NOTE: If the length of the last content block is exactly block size, then padding will result in a padded block that is double the block size and must be split.
+A pseudo code implementation:
+
+[source,pseudocode]
+----
+Split-Content(CONTENT,CONVERGENCE-SECRET,BLOCK-SIZE):
+ // initialize list of blocks and reference-key pairs to output
+ BLOCKS := []
+ RK-PAIRS := []
+
+ // read blocks of size BLOCK-SIZE from CONTENT
+ WHILE CONTENT-BLOCK, LAST? := READ(CONTENT, BLOCK-SIZE):
+
+ IF LAST?:
+ // pad block if it is the last
+ PADDED := PAD(CONTENT-BLOCK, BLOCK-SIZE)
+
+ IF Length(PADDED) > BLOCK-SIZE:
+ PADDED-0, PADDED-1 := SPLIT(PADDED, BLOCK-SIZE)
+ ENCRYPTED-BLOCK-0, RK-PAIR-0 := Encrypt-Block(PADDED-0, CONVERGENCE-SECRET)
+ ENCRYPTED-BLOCK-1, RK-PAIR-1 := Encrypt-Block(PADDED-1, CONVERGENCE-SECRET)
+ BLOCKS := BLOCKS ++ [ENCRYPTED-BLOCK-0, ENCRYPTED-BLOCK-1]
+ RK-PAIRS := RK-PAIRS ++ [RK-PAIR-0, RK-PAIR-1]
+ ELSE:
+ ENCRYPTED-BLOCK, RK-PAIR := Encrypt-Block(PADDED, CONVERGENCE-SECRET)
+ BLOCKS := BLOCKS ++ [ENCRYPTED-BLOCK]
+ RK-PAIRS := RK-PAIRS ++ [RK-PAIR]
+
+ ELSE:
+ ENCRYPTED-BLOCK, RK-PAIR := Encrypt-Block(CONTENT-BLOCK, CONVERGENCE-SECRET)
+ BLOCKS := BLOCKS ++ [ENCRYPTED-BLOCK]
+ RK-PAIRS := RK-PAIRS ++ [RK-PAIR]
+
+ RETURN BLOCKS, RK-PAIRS
+----
-==== Reference-Key Pair
+NOTE: If the length of the last content block is exactly block size, then padding will result in a padded block that is double the block size and must be split.
-A _reference-key pair_ is a pair consisting of a reference to an encrypted block and the key to decrypt the block. Reference and key are both 32 bytes long. The concatenation of a reference-key pair is 64 bytes long (512 bits). In the following we also use the abbreviation _r-k pair_ for reference-key pair.
+==== Encrypt Block and Compute Reference-Key Pair
-Encrypting a block and computing the reference-key pair is a fundamental operation of the encoding process and may be implemented as following sub-process or function:
+A _reference-key pair_ is a pair consisting of a reference to an encrypted block and the key to decrypt the block. Reference and key are both 32 bytes long. The concatenation of a reference-key pair is 64 bytes long (512 bits).
-1. Compute the hash of the unencrypted block. If a convergence secret is used the convergence secret MUST be used as key of the hash function. The output of the hash is the _key_.
-2. Encrypt the block using the symmetric key cipher with the key.
-3. The hash of the encrypted block is used as _reference_ to the encrypted block.
+The `Encrypt-Block` function encrypts a block and returns the encrypted block along with the reference-key pair:
-The encrypted block MUST be added to the output of the encoding process. The reference-key pair is returned and used for further processing.
+[source,pseudocode]
+----
+Encrypt-Block(INPUT, CONVERGENCE-SECRET):
+ KEY := Blake2b-256(INPUT,CONVERGENCE-SECRET)
+ ENCRYPTED-BLOCK := ChaCha20(INPUT,KEY)
+ REFERENCE := Blake2b-256(ENCRYPTED-BLOCK)
+ RETURN ENCRYPTED-BLOCK, REFERENCE, KEY
+----
The convergence-secret MUST NOT be used to compute the reference to the encrypted block.
-===== Convergence Secret
+==== Collect Reference-Key Pairs in Nodes
-Using the hash of the content as key is called _convergent encryption_.
+Reference-key pairs are collected into nodes of size block size by concatenating reference-key pair. The node is encrypted, and a reference-key pair to the node is computed. This results in a sequence of reference-key pairs that refer to nodes containing reference-key pairs at a lower level - a tree.
-Because the hash of the content is deterministically computed from the content, the key will be the same when the same content is encoded twice. This results in de-duplication of content.
+If there are less than arity number of references-key pairs to collect in a node, then the node is filled with missing number of _null reference-key pairs_ - 64 bytes of zeros. The size of a node is always equal the block size (implemented with the `FILL-WITH-NULL-RK-PAIRS` function).
-However convergent encryption suffers from two known attacks: The Confirmation Of A File Attack and The Learn-The-Remaining-Information Attack <<Zooko2008>>.
+A pseudo-code implementation of `Collect-RK-Pairs`:
-A defense against both attacks is to use a _convergence secret_. This results in different encoding of the same content with different convergence secret.
+[source,pseudocode]
+----
+Collect-RK-Pairs(INPUT-RK-PAIRS, CONVERGENCE-SECRET, BLOCK-SIZE):
+ // number of reference-key pairs in a node
+ ARITY := BLOCK-SIZE / 64
-The convergence secret is implemented as the keying feature of the Blake2 cryptographic hash <<RFC7693>>.
-
-==== Merkle Tree
+ // initialize blocks and reference-key pairs to output
+ BLOCKS := []
+ OUTPUT-RK-PAIRS := []
-Reference-key pairs are collected into nodes of size block size by concatenating the concatenated reference-key pair. The node is encrypted, and a reference-key pair to the node is computed. This results in a sequence of reference-key pairs that refer nodes containing reference-key pairs at a lower level - a tree. This process is recursively applied until there is a single reference-key pair - the _root reference-key pair_. We keep track of the level of recursion.
+ // take ARITY reference-key pairs from INPUT-RK-PAIRS at a time
+ WHILE RK-PAIRS-FOR-NODE := TAKE(INPUT-RK-PAIRS, ARITY):
+ // make sure there are exactly ARITY reference-key pairs in node
+ RK-PAIRS-FOR-NODE := FILL-WITH-NULL-RK-PAIRS(RK-PAIRS-FOR-NODE, ARITY)
-The number of reference-key pairs collected into a node is called the _arity_ of the tree and depends on the block size. For block size 1Kb the arity of the tree is 16, for block size 32Kb the arity is 512. If there are less than arity number of references-key pairs to collect in a node, then the node is filled with missing number of _null reference-key pairs_ - 64 bytes of zeros. The size of a node is always equal the block size.
+ // concat reference-key pairs to node
+ NODE := CONCAT(RK-PAIRS-FOR-NODE)
-The initial input (level 0) is the sequence of reference-key pairs to the input content.
+ // encrypt node and compute reference-key pair
+ BLOCK, RK-TO-NODE := Encrypt-Block(NODE, CONVERGENCE-SECRET)
-The root reference-key pair, the level of the root reference-key pair and the block-size are the necessary pieces of information required to decode content. The tuple consisting of block size, level, root reference and key is called the _read capability_.
+ // add node to output
+ BLOCKS := BLOCKS ++ [BLOCK]
+ OUTPUT-RK-PAIRS := OUTPUT-RK-PAIRS ++ [RK-TO-NODE]
-The read capability as well as the encrypted blocks (as output by the <<_reference_key_pair>> sub-process) is the output of the entire encoding process.
+ RETURN BLOCKS, OUTPUT-RK-PAIRS
+----
-NOTE: The constructed tree of nodes containing reference-key pairs is called a Merkle Tree.
+==== Streaming
-An encoding of a content that is split into eight blocks is depicted in <<figure_merkle_tree>>. For illustration purposes the tree is of arity 2 (instead of 16 or 512).
+The encoding process can be implemented to encode a stream of content while immediately outputting encrypted blocks when ready and eagerly collecting reference-key pairs to nodes. This allows the encoding of larger-than-memory content.
-[[figure_merkle_tree]]
-.Merkle Tree
-image::eris-merkle-tree.svg[Merkle Tree,opts=inline]
+For an example, see https://gitlab.com/openengiadina/eris/-/raw/main/eris/encode.scm[the reference Guile implementation].
=== Decoding
@@ -188,15 +292,16 @@ image::eris-merkle-tree.svg[Merkle Tree,opts=inline]
=== URN
-
-=== ERIS in Streaming Applications
-
== Applications
=== Storage and Transport Layers
=== Namespaces
+== Implementations
+
+A reference implementation is available in Guile: https://gitlab.com/openengiadina/eris/
+
== Acknowledgments
[appendix]
diff --git a/public/index.html b/public/index.html
index e01c65a..692349e 100644
--- a/public/index.html
+++ b/public/index.html
@@ -459,11 +459,11 @@ body.book #toc,body.book #preamble,body.book h1.sect0,body.book .sect1>h2{page-b
<ul class="sectlevel2">
<li><a href="#_cryptographic_primitives">2.1. Cryptographic Primitives</a></li>
<li><a href="#_block_size">2.2. Block Size</a></li>
-<li><a href="#_encoding">2.3. Encoding</a></li>
-<li><a href="#_decoding">2.4. Decoding</a></li>
-<li><a href="#_binary_encoding_of_read_capability">2.5. Binary Encoding of Read Capability</a></li>
-<li><a href="#_urn">2.6. URN</a></li>
-<li><a href="#_eris_in_streaming_applications">2.7. ERIS in Streaming Applications</a></li>
+<li><a href="#_convergence_secret">2.3. Convergence Secret</a></li>
+<li><a href="#_encoding">2.4. Encoding</a></li>
+<li><a href="#_decoding">2.5. Decoding</a></li>
+<li><a href="#_binary_encoding_of_read_capability">2.6. Binary Encoding of Read Capability</a></li>
+<li><a href="#_urn">2.7. URN</a></li>
</ul>
</li>
<li><a href="#_applications">3. Applications</a>
@@ -472,7 +472,8 @@ body.book #toc,body.book #preamble,body.book h1.sect0,body.book .sect1>h2{page-b
<li><a href="#_namespaces">3.2. Namespaces</a></li>
</ul>
</li>
-<li><a href="#_acknowledgments">4. Acknowledgments</a></li>
+<li><a href="#_implementations">4. Implementations</a></li>
+<li><a href="#_acknowledgments">5. Acknowledgments</a></li>
<li><a href="#_test_vectors">Appendix A: Test Vectors</a></li>
<li><a href="#_changelog">Appendix B: Changelog</a></li>
<li><a href="#_copyright">Appendix C: Copyright</a></li>
@@ -581,7 +582,7 @@ The Encoding for Robust Immutable Storage (ERIS) is an encoding of arbitrary con
<dl>
<dt class="hdlist1">Cryptographic primitives </dt>
<dd>
-<p>ECRS itself does not specify any cryptographic primitives but the GNUNet implementation uses the SHA-512 hash and AES cipher. ERIS uses the Blake2b-256 cryptographic hash <a href="#RFC7693">[RFC7693]</a> and the ChaCha20 stream cipher <a href="#RFC8439">[RFC8439]</a>. This improves performance, storage efficiency (as hash references are smaller) and allows a convergence secret to be used (via Blake2b keyed hashing; see <a href="#_convergence_secret">Section 2.3.2.1</a>).</p>
+<p>ECRS itself does not specify any cryptographic primitives but the GNUNet implementation uses the SHA-512 hash and AES cipher. ERIS uses the Blake2b-256 cryptographic hash <a href="#RFC7693">[RFC7693]</a> and the ChaCha20 stream cipher <a href="#RFC8439">[RFC8439]</a>. This improves performance, storage efficiency (as hash references are smaller) and allows a convergence secret to be used (via Blake2b keyed hashing; see <a href="#_convergence_secret">Section 2.3</a>).</p>
</dd>
<dt class="hdlist1">Block size </dt>
<dd>
@@ -688,7 +689,7 @@ The Encoding for Robust Immutable Storage (ERIS) is an encoding of arbitrary con
<p>When using block size 32Kb to encode content smaller than 1Kb, the content will be encoded in a 32Kb block. This is a storage overhead of over 3100%. When encoding very many pieces of small content (e.g. short messages or cartographic nodes) this overhead is not acceptable.</p>
</div>
<div class="paragraph">
-<p>On the other hand, using small block sizes increases the number of internal nodes that must be used to encode the content (see <a href="#_merkle_tree">Section 2.3.3</a>). When encoding larger content it is more efficient to use a block size of 32Kb.</p>
+<p>On the other hand, using small block sizes increases the number of internal nodes that must be used to encode the content (see <a href="#_collect_reference_key_pairs_in_nodes">Section 2.4.3</a>). When encoding larger content it is more efficient to use a block size of 32Kb.</p>
</div>
</td>
</tr>
@@ -696,137 +697,52 @@ The Encoding for Robust Immutable Storage (ERIS) is an encoding of arbitrary con
</div>
</div>
<div class="sect2">
-<h3 id="_encoding"><a class="anchor" href="#_encoding"></a>2.3. Encoding</h3>
-<div class="paragraph">
-<p>An overview of how content (sequence of bytes) is encoded using ERIS:</p>
-</div>
-<div class="olist arabic">
-<ol class="arabic">
-<li>
-<p>Split content into unencrypted blocks of size block size (see <a href="#_splitting_input_content_into_blocks">Section 2.3.1</a>).</p>
-</li>
-<li>
-<p>Encrypt unencrypted block and compute reference to encrypted block. This is a fundamental operation during encoding and described in section <a href="#_reference_key_pair">Section 2.3.2</a>.</p>
-</li>
-<li>
-<p>Build a tree of nodes containing references to blocks by (see <a href="#_merkle_tree">Section 2.3.3</a>):</p>
-<div class="ulist">
-<ul>
-<li>
-<p>Collect references to encrypted blocks in a node of size block size</p>
-</li>
-<li>
-<p>Encrypt node and compute reference in the same way as for data blocks.</p>
-</li>
-<li>
-<p>Recursively collect references in nodes of higher level until there is only a single root node.</p>
-</li>
-</ul>
-</div>
-</li>
-</ol>
-</div>
-<div class="paragraph">
-<p>The input to the encoding process is the content as sequence of bytes and an optional convergence secret (see <a href="#_convergence_secret">Section 2.3.2.1</a>).</p>
-</div>
-<div class="paragraph">
-<p>The output of the encoding process is a sequence of encrypted blocks (of size block size) and a root reference-key pair.</p>
-</div>
-<div class="sect3">
-<h4 id="_splitting_input_content_into_blocks"><a class="anchor" href="#_splitting_input_content_into_blocks"></a>2.3.1. Splitting Input Content into Blocks</h4>
-<div class="paragraph">
-<p>Input content is split into blocks of size at most block size such that only the last content block may be smaller than block size.</p>
-</div>
-<div class="paragraph">
-<p>The last content block is always padded according to the padding algorithm to block size. If the size of the padded last block is larger than block size it is split into content blocks of block size.</p>
-</div>
-<div class="admonitionblock note">
-<table>
-<tr>
-<td class="icon">
-<div class="title">Note</div>
-</td>
-<td class="content">
-If the length of the last content block is exactly block size, then padding will result in a padded block that is double the block size and must be split.
-</td>
-</tr>
-</table>
-</div>
-</div>
-<div class="sect3">
-<h4 id="_reference_key_pair"><a class="anchor" href="#_reference_key_pair"></a>2.3.2. Reference-Key Pair</h4>
-<div class="paragraph">
-<p>A <em>reference-key pair</em> is a pair consisting of a reference to an encrypted block and the key to decrypt the block. Reference and key are both 32 bytes long. The concatenation of a reference-key pair is 64 bytes long (512 bits). In the following we also use the abbreviation <em>r-k pair</em> for reference-key pair.</p>
-</div>
-<div class="paragraph">
-<p>Encrypting a block and computing the reference-key pair is a fundamental operation of the encoding process and may be implemented as following sub-process or function:</p>
-</div>
-<div class="olist arabic">
-<ol class="arabic">
-<li>
-<p>Compute the hash of the unencrypted block. If a convergence secret is used the convergence secret MUST be used as key of the hash function. The output of the hash is the <em>key</em>.</p>
-</li>
-<li>
-<p>Encrypt the block using the symmetric key cipher with the key.</p>
-</li>
-<li>
-<p>The hash of the encrypted block is used as <em>reference</em> to the encrypted block.</p>
-</li>
-</ol>
-</div>
-<div class="paragraph">
-<p>The encrypted block MUST be added to the output of the encoding process. The reference-key pair is returned and used for further processing.</p>
-</div>
-<div class="paragraph">
-<p>The convergence-secret MUST NOT be used to compute the reference to the encrypted block.</p>
-</div>
-<div class="sect4">
-<h5 id="_convergence_secret"><a class="anchor" href="#_convergence_secret"></a>Convergence Secret</h5>
+<h3 id="_convergence_secret"><a class="anchor" href="#_convergence_secret"></a>2.3. Convergence Secret</h3>
<div class="paragraph">
<p>Using the hash of the content as key is called <em>convergent encryption</em>.</p>
</div>
<div class="paragraph">
-<p>Because the hash of the content is deterministically computed from the content, the key will be the same when the same content is encoded twice. This results in de-duplication of content.</p>
+<p>Because the hash of the content is deterministically computed from the content, the key will be the same when the same content is encoded twice. This results in de-duplication of content. Convergent encryption suffers from two known attacks: The Confirmation Of A File Attack and The Learn-The-Remaining-Information Attack <a href="#Zooko2008">[Zooko2008]</a>. A defense against both attacks is to use a <em>convergence secret</em>. This results in different encoding of the same content with different convergence secret.</p>
</div>
<div class="paragraph">
-<p>However convergent encryption suffers from two known attacks: The Confirmation Of A File Attack and The Learn-The-Remaining-Information Attack <a href="#Zooko2008">[Zooko2008]</a>.</p>
-</div>
-<div class="paragraph">
-<p>A defense against both attacks is to use a <em>convergence secret</em>. This results in different encoding of the same content with different convergence secret.</p>
+<p>If no convergence secret is specified a null convergence secret is used (32 bytes of zeroes).</p>
</div>
<div class="paragraph">
<p>The convergence secret is implemented as the keying feature of the Blake2 cryptographic hash <a href="#RFC7693">[RFC7693]</a>.</p>
</div>
</div>
-</div>
-<div class="sect3">
-<h4 id="_merkle_tree"><a class="anchor" href="#_merkle_tree"></a>2.3.3. Merkle Tree</h4>
+<div class="sect2">
+<h3 id="_encoding"><a class="anchor" href="#_encoding"></a>2.4. Encoding</h3>
<div class="paragraph">
-<p>Reference-key pairs are collected into nodes of size block size by concatenating the concatenated reference-key pair. The node is encrypted, and a reference-key pair to the node is computed. This results in a sequence of reference-key pairs that refer nodes containing reference-key pairs at a lower level - a tree. This process is recursively applied until there is a single reference-key pair - the <em>root reference-key pair</em>. We keep track of the level of recursion.</p>
+<p>Inputs to the encoding process are:</p>
</div>
-<div class="paragraph">
-<p>The number of reference-key pairs collected into a node is called the <em>arity</em> of the tree and depends on the block size. For block size 1Kb the arity of the tree is 16, for block size 32Kb the arity is 512. If there are less than arity number of references-key pairs to collect in a node, then the node is filled with missing number of <em>null reference-key pairs</em> - 64 bytes of zeros. The size of a node is always equal the block size.</p>
+<div class="dlist">
+<dl>
+<dt class="hdlist1"><code>CONTENT</code> </dt>
+<dd>
+<p>An arbitary length byte sequence of content to be encoded.</p>
+</dd>
+<dt class="hdlist1"><code>CONVERGENCE-SECRET</code> </dt>
+<dd>
+<p>A 256 bit (32 byte) byte sequence (see <a href="#_convergence_secret">Section 2.3</a>).</p>
+</dd>
+<dt class="hdlist1"><code>BLOCK-SIZE</code> </dt>
+<dd>
+<p>The block size used for encoding in bytes can be either 1024 (1Kb) or 32768 (32Kb) (see <a href="#_block_size">Section 2.2</a>).</p>
+</dd>
+</dl>
</div>
<div class="paragraph">
-<p>The initial input (level 0) is the sequence of reference-key pairs to the input content.</p>
+<p>Content is encoded by first splitting into uniformly sized blocks, encrypting the blocks and computing references to the blocks. If there are multiple references to blocks they are collected in nodes that have the same size as content blocks. The nodes are encrypted and references to the nodes are computed. This process is repeated until there is a single root reference.</p>
</div>
<div class="paragraph">
-<p>The root reference-key pair, the level of the root reference-key pair and the block-size are the necessary pieces of information required to decode content. The tuple consisting of block size, level, root reference and key is called the <em>read capability</em>.</p>
+<p>References to nodes and blocks of content consist of a reference to an encrypted block and a key to decrypt the block - a <em>reference-key pair</em>. The process of encrypting a block and computing a reference-key pair is explained in <a href="#_encrypt_block_and_compute_reference_key_pair">Section 2.4.2</a>.</p>
</div>
<div class="paragraph">
-<p>The read capability as well as the encrypted blocks (as output by the <a href="#_reference_key_pair">Section 2.3.2</a> sub-process) is the output of the entire encoding process.</p>
+<p>The encoding process constructs a tree of reference-key pairs that reference nodes that hold references to nodes of a lower level or to content.</p>
</div>
-<div class="admonitionblock note">
-<table>
-<tr>
-<td class="icon">
-<div class="title">Note</div>
-</td>
-<td class="content">
-The constructed tree of nodes containing reference-key pairs is called a Merkle Tree.
-</td>
-</tr>
-</table>
+<div class="paragraph">
+<p>The number of reference-key pairs collected into a node is called the <em>arity</em> of the tree and depends on the block size. For block size 1Kb the arity of the tree is 16, for block size 32Kb the arity is 512.</p>
</div>
<div class="paragraph">
<p>An encoding of a content that is split into eight blocks is depicted in <a href="#figure_merkle_tree">Figure 1</a>. For illustration purposes the tree is of arity 2 (instead of 16 or 512).</p>
@@ -917,7 +833,7 @@ The constructed tree of nodes containing reference-key pairs is called a Merkle
<!-- rk_0_7&#45;&gt;block_7 -->
<g id="edge8" class="edge">
<title>rk_0_7&#45;&gt;block_7</title>
-<path fill="none" stroke="black" d="M643.66,-88.43C644.87,-80.67 646.34,-71.28 647.7,-62.56"/>
+<path fill="none" stroke="black" stroke-dasharray="1,5" d="M643.66,-88.43C644.87,-80.67 646.34,-71.28 647.7,-62.56"/>
<polygon fill="black" stroke="black" points="651.2,-62.82 649.29,-52.4 644.29,-61.74 651.2,-62.82"/>
</g>
<!-- rk_0_6 -->
@@ -931,7 +847,7 @@ The constructed tree of nodes containing reference-key pairs is called a Merkle
<!-- rk_0_6&#45;&gt;block_6 -->
<g id="edge7" class="edge">
<title>rk_0_6&#45;&gt;block_6</title>
-<path fill="none" stroke="black" d="M558.42,-88.43C559.52,-80.67 560.85,-71.28 562.09,-62.56"/>
+<path fill="none" stroke="black" stroke-dasharray="1,5" d="M558.42,-88.43C559.52,-80.67 560.85,-71.28 562.09,-62.56"/>
<polygon fill="black" stroke="black" points="565.59,-62.79 563.53,-52.4 558.66,-61.81 565.59,-62.79"/>
</g>
<!-- rk_0_5 -->
@@ -945,7 +861,7 @@ The constructed tree of nodes containing reference-key pairs is called a Merkle
<!-- rk_0_5&#45;&gt;block_5 -->
<g id="edge6" class="edge">
<title>rk_0_5&#45;&gt;block_5</title>
-<path fill="none" stroke="black" d="M473.94,-88.43C474.82,-80.67 475.88,-71.28 476.87,-62.56"/>
+<path fill="none" stroke="black" stroke-dasharray="1,5" d="M473.94,-88.43C474.82,-80.67 475.88,-71.28 476.87,-62.56"/>
<polygon fill="black" stroke="black" points="480.38,-62.73 478.03,-52.4 473.42,-61.94 480.38,-62.73"/>
</g>
<!-- rk_0_4 -->
@@ -959,7 +875,7 @@ The constructed tree of nodes containing reference-key pairs is called a Merkle
<!-- rk_0_4&#45;&gt;block_4 -->
<g id="edge5" class="edge">
<title>rk_0_4&#45;&gt;block_4</title>
-<path fill="none" stroke="black" d="M390.97,-88.43C391.41,-80.67 391.94,-71.28 392.44,-62.56"/>
+<path fill="none" stroke="black" stroke-dasharray="1,5" d="M390.97,-88.43C391.41,-80.67 391.94,-71.28 392.44,-62.56"/>
<polygon fill="black" stroke="black" points="395.94,-62.58 393.01,-52.4 388.95,-62.18 395.94,-62.58"/>
</g>
<!-- rk_0_3 -->
@@ -973,7 +889,7 @@ The constructed tree of nodes containing reference-key pairs is called a Merkle
<!-- rk_0_3&#45;&gt;block_3 -->
<g id="edge4" class="edge">
<title>rk_0_3&#45;&gt;block_3</title>
-<path fill="none" stroke="black" d="M310.27,-88.43C309.94,-80.67 309.54,-71.28 309.17,-62.56"/>
+<path fill="none" stroke="black" stroke-dasharray="1,5" d="M310.27,-88.43C309.94,-80.67 309.54,-71.28 309.17,-62.56"/>
<polygon fill="black" stroke="black" points="312.66,-62.24 308.74,-52.4 305.67,-62.54 312.66,-62.24"/>
</g>
<!-- rk_0_2 -->
@@ -987,7 +903,7 @@ The constructed tree of nodes containing reference-key pairs is called a Merkle
<!-- rk_0_2&#45;&gt;block_2 -->
<g id="edge3" class="edge">
<title>rk_0_2&#45;&gt;block_2</title>
-<path fill="none" stroke="black" d="M228.06,-88.43C227.18,-80.67 226.12,-71.28 225.13,-62.56"/>
+<path fill="none" stroke="black" stroke-dasharray="1,5" d="M228.06,-88.43C227.18,-80.67 226.12,-71.28 225.13,-62.56"/>
<polygon fill="black" stroke="black" points="228.58,-61.94 223.97,-52.4 221.62,-62.73 228.58,-61.94"/>
</g>
<!-- rk_0_1 -->
@@ -1001,7 +917,7 @@ The constructed tree of nodes containing reference-key pairs is called a Merkle
<!-- rk_0_1&#45;&gt;block_1 -->
<g id="edge2" class="edge">
<title>rk_0_1&#45;&gt;block_1</title>
-<path fill="none" stroke="black" d="M144.34,-88.43C143.13,-80.67 141.66,-71.28 140.3,-62.56"/>
+<path fill="none" stroke="black" stroke-dasharray="1,5" d="M144.34,-88.43C143.13,-80.67 141.66,-71.28 140.3,-62.56"/>
<polygon fill="black" stroke="black" points="143.71,-61.74 138.71,-52.4 136.8,-62.82 143.71,-61.74"/>
</g>
<!-- rk_0_0 -->
@@ -1015,7 +931,7 @@ The constructed tree of nodes containing reference-key pairs is called a Merkle
<!-- rk_0_0&#45;&gt;block_0 -->
<g id="edge1" class="edge">
<title>rk_0_0&#45;&gt;block_0</title>
-<path fill="none" stroke="black" d="M55.31,-88.43C54.53,-80.67 53.6,-71.28 52.74,-62.56"/>
+<path fill="none" stroke="black" stroke-dasharray="1,5" d="M55.31,-88.43C54.53,-80.67 53.6,-71.28 52.74,-62.56"/>
<polygon fill="black" stroke="black" points="56.2,-62 51.73,-52.4 49.23,-62.7 56.2,-62"/>
</g>
<!-- node_1_0 -->
@@ -1101,7 +1017,7 @@ The constructed tree of nodes containing reference-key pairs is called a Merkle
<!-- rk_1_3&#45;&gt;node_1_3 -->
<g id="edge20" class="edge">
<title>rk_1_3&#45;&gt;node_1_3</title>
-<path fill="none" stroke="black" d="M458,-245.31C458,-237.29 458,-227.55 458,-218.57"/>
+<path fill="none" stroke="black" stroke-dasharray="1,5" d="M458,-245.31C458,-237.29 458,-227.55 458,-218.57"/>
<polygon fill="black" stroke="black" points="461.5,-218.53 458,-208.53 454.5,-218.53 461.5,-218.53"/>
</g>
<!-- rk_1_2 -->
@@ -1115,7 +1031,7 @@ The constructed tree of nodes containing reference-key pairs is called a Merkle
<!-- rk_1_2&#45;&gt;node_1_2 -->
<g id="edge19" class="edge">
<title>rk_1_2&#45;&gt;node_1_2</title>
-<path fill="none" stroke="black" d="M386,-245.31C386,-237.29 386,-227.55 386,-218.57"/>
+<path fill="none" stroke="black" stroke-dasharray="1,5" d="M386,-245.31C386,-237.29 386,-227.55 386,-218.57"/>
<polygon fill="black" stroke="black" points="389.5,-218.53 386,-208.53 382.5,-218.53 389.5,-218.53"/>
</g>
<!-- rk_1_1 -->
@@ -1129,7 +1045,7 @@ The constructed tree of nodes containing reference-key pairs is called a Merkle
<!-- rk_1_1&#45;&gt;node_1_1 -->
<g id="edge18" class="edge">
<title>rk_1_1&#45;&gt;node_1_1</title>
-<path fill="none" stroke="black" d="M314,-245.31C314,-237.29 314,-227.55 314,-218.57"/>
+<path fill="none" stroke="black" stroke-dasharray="1,5" d="M314,-245.31C314,-237.29 314,-227.55 314,-218.57"/>
<polygon fill="black" stroke="black" points="317.5,-218.53 314,-208.53 310.5,-218.53 317.5,-218.53"/>
</g>
<!-- rk_1_0 -->
@@ -1143,7 +1059,7 @@ The constructed tree of nodes containing reference-key pairs is called a Merkle
<!-- rk_1_0&#45;&gt;node_1_0 -->
<g id="edge17" class="edge">
<title>rk_1_0&#45;&gt;node_1_0</title>
-<path fill="none" stroke="black" d="M242,-245.31C242,-237.29 242,-227.55 242,-218.57"/>
+<path fill="none" stroke="black" stroke-dasharray="1,5" d="M242,-245.31C242,-237.29 242,-227.55 242,-218.57"/>
<polygon fill="black" stroke="black" points="245.5,-218.53 242,-208.53 238.5,-218.53 245.5,-218.53"/>
</g>
<!-- node_2_1 -->
@@ -1193,7 +1109,7 @@ The constructed tree of nodes containing reference-key pairs is called a Merkle
<!-- rk_2_1&#45;&gt;node_2_1 -->
<g id="edge26" class="edge">
<title>rk_2_1&#45;&gt;node_2_1</title>
-<path fill="none" stroke="black" d="M386,-402.31C386,-394.29 386,-384.55 386,-375.57"/>
+<path fill="none" stroke="black" stroke-dasharray="1,5" d="M386,-402.31C386,-394.29 386,-384.55 386,-375.57"/>
<polygon fill="black" stroke="black" points="389.5,-375.53 386,-365.53 382.5,-375.53 389.5,-375.53"/>
</g>
<!-- rk_2_0 -->
@@ -1207,7 +1123,7 @@ The constructed tree of nodes containing reference-key pairs is called a Merkle
<!-- rk_2_0&#45;&gt;node_2_0 -->
<g id="edge25" class="edge">
<title>rk_2_0&#45;&gt;node_2_0</title>
-<path fill="none" stroke="black" d="M314,-402.31C314,-394.29 314,-384.55 314,-375.57"/>
+<path fill="none" stroke="black" stroke-dasharray="1,5" d="M314,-402.31C314,-394.29 314,-384.55 314,-375.57"/>
<polygon fill="black" stroke="black" points="317.5,-375.53 314,-365.53 310.5,-375.53 317.5,-375.53"/>
</g>
<!-- node_3_0 -->
@@ -1239,30 +1155,195 @@ The constructed tree of nodes containing reference-key pairs is called a Merkle
<!-- rk_3_0&#45;&gt;node_3_0 -->
<g id="edge29" class="edge">
<title>rk_3_0&#45;&gt;node_3_0</title>
-<path fill="none" stroke="black" d="M350,-559.31C350,-551.29 350,-541.55 350,-532.57"/>
+<path fill="none" stroke="black" stroke-dasharray="1,5" d="M350,-559.31C350,-551.29 350,-541.55 350,-532.57"/>
<polygon fill="black" stroke="black" points="353.5,-532.53 350,-522.53 346.5,-532.53 353.5,-532.53"/>
</g>
</g>
</svg>
</div>
-<div class="title">Figure 1. Merkle Tree</div>
+<div class="title">Figure 1. Encoding of content as tree. Solid edges are concatenations of reference-key pairs as described in <a href="#_collect_reference_key_pairs_in_nodes">Section 2.4.3</a>. Dotted edges are encryption and computation of reference-key pairs as described in <a href="#_encrypt_block_and_compute_reference_key_pair">Section 2.4.2</a>.</div>
</div>
+<div class="paragraph">
+<p>The block-size, the level of the root reference and the root reference-key pair itself are the necessary pieces of information required to decode content. The tuple consisting of block size, level, root reference and key is called the <em>read capability</em>.</p>
</div>
+<div class="paragraph">
+<p>The encrypted blocks and the read capability are the outputs of the encoding process.</p>
</div>
-<div class="sect2">
-<h3 id="_decoding"><a class="anchor" href="#_decoding"></a>2.4. Decoding</h3>
+<div class="paragraph">
+<p>A pseudo-code implementation of the encoding process:</p>
+</div>
+<div class="listingblock">
+<div class="content">
+<pre class="highlight"><code class="language-pseudocode" data-lang="pseudocode">ERIS-Encode(CONTENT, CONVERGENCE-SECRET, BLOCK-SIZE):
+ // initialize empty list of blocks to be output
+ BLOCKS := []
+
+ // initialize level to 0
+ LEVEL := 0
+
+ // split the input content into uniformly sized blocks and encode
+ LEVEL-0-BLOCKS, RK-PAIRS := Split-Content(CONTENT, BLOCK-SIZE)
+
+ // add blocks from level 0 to blocks to be output
+ BLOCKS := BLOCKS ++ LEVEL-0-BLOCKS
+
+ // loop until there is a single root reference
+ WHILE Length(RK-PAIRS) &gt; 1:
+ LEVEL-BLOCKS, RK-Pairs := Collect-RK-Pairs(RK-PAIRS, CONVERGENCE-SECRET, BLOCK-SIZE)
+
+ // add blocks to blocks to be output and increase the level counter
+ BLOCKS := BLOCKS ++ LEVEL-BLOCKS
+ LEVEL := LEVEL + 1
+
+ // extract the root reference-key pair
+ ROOT-RK-PAIR := RK-PAIRS[0]
+ ROOT-REFERENCE, ROOT-KEY := ROOT-RK-PAIR
+
+ // return blocks and read-capability
+ RETURN BLOCKS, BLOCK-SIZE, LEVEL, ROOT-REFERENCE, ROOT-KEY</code></pre>
+</div>
+</div>
+<div class="paragraph">
+<p>The sub-process <code>Split-Content</code> and <code>Collect-RK-Pairs</code> are explained in the following sections.</p>
+</div>
+<div class="sect3">
+<h4 id="_splitting_input_content_into_blocks"><a class="anchor" href="#_splitting_input_content_into_blocks"></a>2.4.1. Splitting Input Content into Blocks</h4>
+<div class="paragraph">
+<p>Input content is split into blocks of size at most block size such that only the last content block may be smaller than block size.</p>
+</div>
+<div class="paragraph">
+<p>The last content block is always padded according to the padding algorithm to block size. If the size of the padded last block is larger than block size it is split into content blocks of block size.</p>
+</div>
+<div class="paragraph">
+<p>A pseudo code implementation:</p>
+</div>
+<div class="listingblock">
+<div class="content">
+<pre class="highlight"><code class="language-pseudocode" data-lang="pseudocode">Split-Content(CONTENT,CONVERGENCE-SECRET,BLOCK-SIZE):
+ // initialize list of blocks and reference-key pairs to output
+ BLOCKS := []
+ RK-PAIRS := []
+
+ // read blocks of size BLOCK-SIZE from CONTENT
+ WHILE CONTENT-BLOCK, LAST? := READ(CONTENT, BLOCK-SIZE):
+
+ IF LAST?:
+ // pad block if it is the last
+ PADDED := PAD(CONTENT-BLOCK, BLOCK-SIZE)
+
+ IF Length(PADDED) &gt; BLOCK-SIZE:
+ PADDED-0, PADDED-1 := SPLIT(PADDED, BLOCK-SIZE)
+ ENCRYPTED-BLOCK-0, RK-PAIR-0 := Encrypt-Block(PADDED-0, CONVERGENCE-SECRET)
+ ENCRYPTED-BLOCK-1, RK-PAIR-1 := Encrypt-Block(PADDED-1, CONVERGENCE-SECRET)
+ BLOCKS := BLOCKS ++ [ENCRYPTED-BLOCK-0, ENCRYPTED-BLOCK-1]
+ RK-PAIRS := RK-PAIRS ++ [RK-PAIR-0, RK-PAIR-1]
+ ELSE:
+ ENCRYPTED-BLOCK, RK-PAIR := Encrypt-Block(PADDED, CONVERGENCE-SECRET)
+ BLOCKS := BLOCKS ++ [ENCRYPTED-BLOCK]
+ RK-PAIRS := RK-PAIRS ++ [RK-PAIR]
+
+ ELSE:
+ ENCRYPTED-BLOCK, RK-PAIR := Encrypt-Block(CONTENT-BLOCK, CONVERGENCE-SECRET)
+ BLOCKS := BLOCKS ++ [ENCRYPTED-BLOCK]
+ RK-PAIRS := RK-PAIRS ++ [RK-PAIR]
+
+ RETURN BLOCKS, RK-PAIRS</code></pre>
+</div>
+</div>
+<div class="admonitionblock note">
+<table>
+<tr>
+<td class="icon">
+<div class="title">Note</div>
+</td>
+<td class="content">
+If the length of the last content block is exactly block size, then padding will result in a padded block that is double the block size and must be split.
+</td>
+</tr>
+</table>
+</div>
+</div>
+<div class="sect3">
+<h4 id="_encrypt_block_and_compute_reference_key_pair"><a class="anchor" href="#_encrypt_block_and_compute_reference_key_pair"></a>2.4.2. Encrypt Block and Compute Reference-Key Pair</h4>
+<div class="paragraph">
+<p>A <em>reference-key pair</em> is a pair consisting of a reference to an encrypted block and the key to decrypt the block. Reference and key are both 32 bytes long. The concatenation of a reference-key pair is 64 bytes long (512 bits).</p>
+</div>
+<div class="paragraph">
+<p>The <code>Encrypt-Block</code> function encrypts a block and returns the encrypted block along with the reference-key pair:</p>
+</div>
+<div class="listingblock">
+<div class="content">
+<pre class="highlight"><code class="language-pseudocode" data-lang="pseudocode">Encrypt-Block(INPUT, CONVERGENCE-SECRET):
+ KEY := Blake2b-256(INPUT,CONVERGENCE-SECRET)
+ ENCRYPTED-BLOCK := ChaCha20(INPUT,KEY)
+ REFERENCE := Blake2b-256(ENCRYPTED-BLOCK)
+ RETURN ENCRYPTED-BLOCK, REFERENCE, KEY</code></pre>
+</div>
+</div>
+<div class="paragraph">
+<p>The convergence-secret MUST NOT be used to compute the reference to the encrypted block.</p>
+</div>
+</div>
+<div class="sect3">
+<h4 id="_collect_reference_key_pairs_in_nodes"><a class="anchor" href="#_collect_reference_key_pairs_in_nodes"></a>2.4.3. Collect Reference-Key Pairs in Nodes</h4>
+<div class="paragraph">
+<p>Reference-key pairs are collected into nodes of size block size by concatenating reference-key pair. The node is encrypted, and a reference-key pair to the node is computed. This results in a sequence of reference-key pairs that refer to nodes containing reference-key pairs at a lower level - a tree.</p>
+</div>
+<div class="paragraph">
+<p>If there are less than arity number of references-key pairs to collect in a node, then the node is filled with missing number of <em>null reference-key pairs</em> - 64 bytes of zeros. The size of a node is always equal the block size (implemented with the <code>FILL-WITH-NULL-RK-PAIRS</code> function).</p>
+</div>
+<div class="paragraph">
+<p>A pseudo-code implementation of <code>Collect-RK-Pairs</code>:</p>
+</div>
+<div class="listingblock">
+<div class="content">
+<pre class="highlight"><code class="language-pseudocode" data-lang="pseudocode">Collect-RK-Pairs(INPUT-RK-PAIRS, CONVERGENCE-SECRET, BLOCK-SIZE):
+ // number of reference-key pairs in a node
+ ARITY := BLOCK-SIZE / 64
+
+ // initialize blocks and reference-key pairs to output
+ BLOCKS := []
+ OUTPUT-RK-PAIRS := []
+
+ // take ARITY reference-key pairs from INPUT-RK-PAIRS at a time
+ WHILE RK-PAIRS-FOR-NODE := TAKE(INPUT-RK-PAIRS, ARITY):
+ // make sure there are exactly ARITY reference-key pairs in node
+ RK-PAIRS-FOR-NODE := FILL-WITH-NULL-RK-PAIRS(RK-PAIRS-FOR-NODE, ARITY)
+
+ // concat reference-key pairs to node
+ NODE := CONCAT(RK-PAIRS-FOR-NODE)
+
+ // encrypt node and compute reference-key pair
+ BLOCK, RK-TO-NODE := Encrypt-Block(NODE, CONVERGENCE-SECRET)
+
+ // add node to output
+ BLOCKS := BLOCKS ++ [BLOCK]
+ OUTPUT-RK-PAIRS := OUTPUT-RK-PAIRS ++ [RK-TO-NODE]
+ RETURN BLOCKS, OUTPUT-RK-PAIRS</code></pre>
+</div>
+</div>
+</div>
+<div class="sect3">
+<h4 id="_streaming"><a class="anchor" href="#_streaming"></a>2.4.4. Streaming</h4>
+<div class="paragraph">
+<p>The encoding process can be implemented to encode a stream of content while immediately outputting encrypted blocks when ready and eagerly collecting reference-key pairs to nodes. This allows the encoding of larger-than-memory content.</p>
+</div>
+<div class="paragraph">
+<p>For an example, see <a href="https://gitlab.com/openengiadina/eris/-/raw/main/eris/encode.scm">the reference Guile implementation</a>.</p>
+</div>
+</div>
</div>
<div class="sect2">
-<h3 id="_binary_encoding_of_read_capability"><a class="anchor" href="#_binary_encoding_of_read_capability"></a>2.5. Binary Encoding of Read Capability</h3>
+<h3 id="_decoding"><a class="anchor" href="#_decoding"></a>2.5. Decoding</h3>
</div>
<div class="sect2">
-<h3 id="_urn"><a class="anchor" href="#_urn"></a>2.6. URN</h3>
+<h3 id="_binary_encoding_of_read_capability"><a class="anchor" href="#_binary_encoding_of_read_capability"></a>2.6. Binary Encoding of Read Capability</h3>
</div>
<div class="sect2">
-<h3 id="_eris_in_streaming_applications"><a class="anchor" href="#_eris_in_streaming_applications"></a>2.7. ERIS in Streaming Applications</h3>
+<h3 id="_urn"><a class="anchor" href="#_urn"></a>2.7. URN</h3>
</div>
</div>
@@ -1281,7 +1362,15 @@ The constructed tree of nodes containing reference-key pairs is called a Merkle
</div>
</div>
<div class="sect1">
-<h2 id="_acknowledgments"><a class="anchor" href="#_acknowledgments"></a>4. Acknowledgments</h2>
+<h2 id="_implementations"><a class="anchor" href="#_implementations"></a>4. Implementations</h2>
+<div class="sectionbody">
+<div class="paragraph">
+<p>A reference implementation is available in Guile: <a href="https://gitlab.com/openengiadina/eris/" class="bare">https://gitlab.com/openengiadina/eris/</a></p>
+</div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_acknowledgments"><a class="anchor" href="#_acknowledgments"></a>5. Acknowledgments</h2>
<div class="sectionbody">
</div>
@@ -1353,12 +1442,12 @@ The constructed tree of nodes containing reference-key pairs is called a Merkle
<div id="footnotes">
<hr>
<div class="footnote" id="_footnotedef_1">
-<a href="#_footnoteref_1">1</a>. Also as apparently specified in ISO/IEC 7816-4, which however is not openly available. Fuck you ISO.
+<a href="#_footnoteref_1">1</a>. This padding algorithm is apparently also specified in ISO/IEC 7816-4. However, the speicifcation is not openly available. Fuck you ISO.
</div>
</div>
<div id="footer">
<div id="footer-text">
-Last updated 2020-10-23 10:29:58 +0200
+Last updated 2020-10-23 16:16:14 +0200
</div>
</div>
</body>