aboutsummaryrefslogtreecommitdiff
path: root/doc
diff options
context:
space:
mode:
authorpukkamustard <pukkamustard@posteo.net>2020-10-23 16:15:48 +0200
committerpukkamustard <pukkamustard@posteo.net>2020-10-23 16:16:22 +0200
commit2b03565f2f4e0f5820af518edabb07d8b19c44d6 (patch)
tree895dfa1cb322d296e1a15217edcad2c85990eced /doc
parent25986f98d606f3b0980bfc5f7da54cd3877ce373 (diff)
eris.adoc: pseudocode
Diffstat (limited to 'doc')
-rw-r--r--doc/diagrams.org30
-rw-r--r--doc/eris-merkle-tree.svg30
-rw-r--r--doc/eris.adoc185
3 files changed, 175 insertions, 70 deletions
diff --git a/doc/diagrams.org b/doc/diagrams.org
index 4d372ac..f814c20 100644
--- a/doc/diagrams.org
+++ b/doc/diagrams.org
@@ -32,14 +32,14 @@ digraph {
rk_0_0 [shape=record, color=yellow, style=filled, label="r | k"];
};
- rk_0_0 -> block_0;
- rk_0_1 -> block_1;
- rk_0_2 -> block_2;
- rk_0_3 -> block_3;
- rk_0_4 -> block_4;
- rk_0_5 -> block_5;
- rk_0_6 -> block_6;
- rk_0_7 -> block_7;
+ rk_0_0 -> block_0 [style=dotted];
+ rk_0_1 -> block_1 [style=dotted];
+ rk_0_2 -> block_2 [style=dotted];
+ rk_0_3 -> block_3 [style=dotted];
+ rk_0_4 -> block_4 [style=dotted];
+ rk_0_5 -> block_5 [style=dotted];
+ rk_0_6 -> block_6 [style=dotted];
+ rk_0_7 -> block_7 [style=dotted];
subgraph cluster__level_1 {
style=dotted;
@@ -70,10 +70,10 @@ digraph {
node_1_3 -> rk_0_6;
node_1_3 -> rk_0_7;
- rk_1_0 -> node_1_0;
- rk_1_1 -> node_1_1;
- rk_1_2 -> node_1_2;
- rk_1_3 -> node_1_3;
+ rk_1_0 -> node_1_0 [style=dotted];
+ rk_1_1 -> node_1_1 [style=dotted];
+ rk_1_2 -> node_1_2 [style=dotted];
+ rk_1_3 -> node_1_3 [style=dotted];
subgraph cluster__level_2 {
style=dotted;
@@ -94,8 +94,8 @@ digraph {
node_2_1 -> rk_1_2;
node_2_1 -> rk_1_3;
- rk_2_0 -> node_2_0;
- rk_2_1 -> node_2_1;
+ rk_2_0 -> node_2_0 [style=dotted];
+ rk_2_1 -> node_2_1 [style=dotted];
subgraph cluster__level_3 {
style=dotted;
@@ -111,7 +111,7 @@ digraph {
node_3_0 -> rk_2_1;
node_3_0 -> rk_2_0;
- rk_3_0 -> node_3_0;
+ rk_3_0 -> node_3_0 [style=dotted];
}
#+END_SRC
diff --git a/doc/eris-merkle-tree.svg b/doc/eris-merkle-tree.svg
index 3133632..8008cf7 100644
--- a/doc/eris-merkle-tree.svg
+++ b/doc/eris-merkle-tree.svg
@@ -88,7 +88,7 @@
<!-- rk_0_7&#45;&gt;block_7 -->
<g id="edge8" class="edge">
<title>rk_0_7&#45;&gt;block_7</title>
-<path fill="none" stroke="black" d="M643.66,-88.43C644.87,-80.67 646.34,-71.28 647.7,-62.56"/>
+<path fill="none" stroke="black" stroke-dasharray="1,5" d="M643.66,-88.43C644.87,-80.67 646.34,-71.28 647.7,-62.56"/>
<polygon fill="black" stroke="black" points="651.2,-62.82 649.29,-52.4 644.29,-61.74 651.2,-62.82"/>
</g>
<!-- rk_0_6 -->
@@ -102,7 +102,7 @@
<!-- rk_0_6&#45;&gt;block_6 -->
<g id="edge7" class="edge">
<title>rk_0_6&#45;&gt;block_6</title>
-<path fill="none" stroke="black" d="M558.42,-88.43C559.52,-80.67 560.85,-71.28 562.09,-62.56"/>
+<path fill="none" stroke="black" stroke-dasharray="1,5" d="M558.42,-88.43C559.52,-80.67 560.85,-71.28 562.09,-62.56"/>
<polygon fill="black" stroke="black" points="565.59,-62.79 563.53,-52.4 558.66,-61.81 565.59,-62.79"/>
</g>
<!-- rk_0_5 -->
@@ -116,7 +116,7 @@
<!-- rk_0_5&#45;&gt;block_5 -->
<g id="edge6" class="edge">
<title>rk_0_5&#45;&gt;block_5</title>
-<path fill="none" stroke="black" d="M473.94,-88.43C474.82,-80.67 475.88,-71.28 476.87,-62.56"/>
+<path fill="none" stroke="black" stroke-dasharray="1,5" d="M473.94,-88.43C474.82,-80.67 475.88,-71.28 476.87,-62.56"/>
<polygon fill="black" stroke="black" points="480.38,-62.73 478.03,-52.4 473.42,-61.94 480.38,-62.73"/>
</g>
<!-- rk_0_4 -->
@@ -130,7 +130,7 @@
<!-- rk_0_4&#45;&gt;block_4 -->
<g id="edge5" class="edge">
<title>rk_0_4&#45;&gt;block_4</title>
-<path fill="none" stroke="black" d="M390.97,-88.43C391.41,-80.67 391.94,-71.28 392.44,-62.56"/>
+<path fill="none" stroke="black" stroke-dasharray="1,5" d="M390.97,-88.43C391.41,-80.67 391.94,-71.28 392.44,-62.56"/>
<polygon fill="black" stroke="black" points="395.94,-62.58 393.01,-52.4 388.95,-62.18 395.94,-62.58"/>
</g>
<!-- rk_0_3 -->
@@ -144,7 +144,7 @@
<!-- rk_0_3&#45;&gt;block_3 -->
<g id="edge4" class="edge">
<title>rk_0_3&#45;&gt;block_3</title>
-<path fill="none" stroke="black" d="M310.27,-88.43C309.94,-80.67 309.54,-71.28 309.17,-62.56"/>
+<path fill="none" stroke="black" stroke-dasharray="1,5" d="M310.27,-88.43C309.94,-80.67 309.54,-71.28 309.17,-62.56"/>
<polygon fill="black" stroke="black" points="312.66,-62.24 308.74,-52.4 305.67,-62.54 312.66,-62.24"/>
</g>
<!-- rk_0_2 -->
@@ -158,7 +158,7 @@
<!-- rk_0_2&#45;&gt;block_2 -->
<g id="edge3" class="edge">
<title>rk_0_2&#45;&gt;block_2</title>
-<path fill="none" stroke="black" d="M228.06,-88.43C227.18,-80.67 226.12,-71.28 225.13,-62.56"/>
+<path fill="none" stroke="black" stroke-dasharray="1,5" d="M228.06,-88.43C227.18,-80.67 226.12,-71.28 225.13,-62.56"/>
<polygon fill="black" stroke="black" points="228.58,-61.94 223.97,-52.4 221.62,-62.73 228.58,-61.94"/>
</g>
<!-- rk_0_1 -->
@@ -172,7 +172,7 @@
<!-- rk_0_1&#45;&gt;block_1 -->
<g id="edge2" class="edge">
<title>rk_0_1&#45;&gt;block_1</title>
-<path fill="none" stroke="black" d="M144.34,-88.43C143.13,-80.67 141.66,-71.28 140.3,-62.56"/>
+<path fill="none" stroke="black" stroke-dasharray="1,5" d="M144.34,-88.43C143.13,-80.67 141.66,-71.28 140.3,-62.56"/>
<polygon fill="black" stroke="black" points="143.71,-61.74 138.71,-52.4 136.8,-62.82 143.71,-61.74"/>
</g>
<!-- rk_0_0 -->
@@ -186,7 +186,7 @@
<!-- rk_0_0&#45;&gt;block_0 -->
<g id="edge1" class="edge">
<title>rk_0_0&#45;&gt;block_0</title>
-<path fill="none" stroke="black" d="M55.31,-88.43C54.53,-80.67 53.6,-71.28 52.74,-62.56"/>
+<path fill="none" stroke="black" stroke-dasharray="1,5" d="M55.31,-88.43C54.53,-80.67 53.6,-71.28 52.74,-62.56"/>
<polygon fill="black" stroke="black" points="56.2,-62 51.73,-52.4 49.23,-62.7 56.2,-62"/>
</g>
<!-- node_1_0 -->
@@ -272,7 +272,7 @@
<!-- rk_1_3&#45;&gt;node_1_3 -->
<g id="edge20" class="edge">
<title>rk_1_3&#45;&gt;node_1_3</title>
-<path fill="none" stroke="black" d="M458,-245.31C458,-237.29 458,-227.55 458,-218.57"/>
+<path fill="none" stroke="black" stroke-dasharray="1,5" d="M458,-245.31C458,-237.29 458,-227.55 458,-218.57"/>
<polygon fill="black" stroke="black" points="461.5,-218.53 458,-208.53 454.5,-218.53 461.5,-218.53"/>
</g>
<!-- rk_1_2 -->
@@ -286,7 +286,7 @@
<!-- rk_1_2&#45;&gt;node_1_2 -->
<g id="edge19" class="edge">
<title>rk_1_2&#45;&gt;node_1_2</title>
-<path fill="none" stroke="black" d="M386,-245.31C386,-237.29 386,-227.55 386,-218.57"/>
+<path fill="none" stroke="black" stroke-dasharray="1,5" d="M386,-245.31C386,-237.29 386,-227.55 386,-218.57"/>
<polygon fill="black" stroke="black" points="389.5,-218.53 386,-208.53 382.5,-218.53 389.5,-218.53"/>
</g>
<!-- rk_1_1 -->
@@ -300,7 +300,7 @@
<!-- rk_1_1&#45;&gt;node_1_1 -->
<g id="edge18" class="edge">
<title>rk_1_1&#45;&gt;node_1_1</title>
-<path fill="none" stroke="black" d="M314,-245.31C314,-237.29 314,-227.55 314,-218.57"/>
+<path fill="none" stroke="black" stroke-dasharray="1,5" d="M314,-245.31C314,-237.29 314,-227.55 314,-218.57"/>
<polygon fill="black" stroke="black" points="317.5,-218.53 314,-208.53 310.5,-218.53 317.5,-218.53"/>
</g>
<!-- rk_1_0 -->
@@ -314,7 +314,7 @@
<!-- rk_1_0&#45;&gt;node_1_0 -->
<g id="edge17" class="edge">
<title>rk_1_0&#45;&gt;node_1_0</title>
-<path fill="none" stroke="black" d="M242,-245.31C242,-237.29 242,-227.55 242,-218.57"/>
+<path fill="none" stroke="black" stroke-dasharray="1,5" d="M242,-245.31C242,-237.29 242,-227.55 242,-218.57"/>
<polygon fill="black" stroke="black" points="245.5,-218.53 242,-208.53 238.5,-218.53 245.5,-218.53"/>
</g>
<!-- node_2_1 -->
@@ -364,7 +364,7 @@
<!-- rk_2_1&#45;&gt;node_2_1 -->
<g id="edge26" class="edge">
<title>rk_2_1&#45;&gt;node_2_1</title>
-<path fill="none" stroke="black" d="M386,-402.31C386,-394.29 386,-384.55 386,-375.57"/>
+<path fill="none" stroke="black" stroke-dasharray="1,5" d="M386,-402.31C386,-394.29 386,-384.55 386,-375.57"/>
<polygon fill="black" stroke="black" points="389.5,-375.53 386,-365.53 382.5,-375.53 389.5,-375.53"/>
</g>
<!-- rk_2_0 -->
@@ -378,7 +378,7 @@
<!-- rk_2_0&#45;&gt;node_2_0 -->
<g id="edge25" class="edge">
<title>rk_2_0&#45;&gt;node_2_0</title>
-<path fill="none" stroke="black" d="M314,-402.31C314,-394.29 314,-384.55 314,-375.57"/>
+<path fill="none" stroke="black" stroke-dasharray="1,5" d="M314,-402.31C314,-394.29 314,-384.55 314,-375.57"/>
<polygon fill="black" stroke="black" points="317.5,-375.53 314,-365.53 310.5,-375.53 317.5,-375.53"/>
</g>
<!-- node_3_0 -->
@@ -410,7 +410,7 @@
<!-- rk_3_0&#45;&gt;node_3_0 -->
<g id="edge29" class="edge">
<title>rk_3_0&#45;&gt;node_3_0</title>
-<path fill="none" stroke="black" d="M350,-559.31C350,-551.29 350,-541.55 350,-532.57"/>
+<path fill="none" stroke="black" stroke-dasharray="1,5" d="M350,-559.31C350,-551.29 350,-541.55 350,-532.57"/>
<polygon fill="black" stroke="black" points="353.5,-532.53 350,-522.53 346.5,-532.53 353.5,-532.53"/>
</g>
</g>
diff --git a/doc/eris.adoc b/doc/eris.adoc
index b208423..89b7c2f 100644
--- a/doc/eris.adoc
+++ b/doc/eris.adoc
@@ -94,7 +94,7 @@ We use a byte padding scheme to ensure that input content size is a multiple of
`PAD(INPUT,BLOCK-SIZE)` :: For `INPUT` of size `n` adds a mandatory byte valued `0x80` (hexadecimal) to `INPUT` followed by `m < BLOCK-SIZE - 1` bytes valued `0x00` such that `n + m + 1` is a multiple of `BLOCK-SIZE`.
`UNPAD(INPUT,BLOCK-SIZE)` :: Starts reading bytes from the end of `INPUT` until a `0x80` is read and then returns bytes of `INPUT` before the `0x80`. Throws an error if a value other than `0x00` is read before reading `0x80` or if no `0x80` is read after reading `BLOCK-SIZE - 1` bytes from the end.
-This is the padding algorithm implemented in https://libsodium.gitbook.io/doc/padding[libsodium]footnote:[Also as apparently specified in ISO/IEC 7816-4, which however is not openly available. Fuck you ISO.].
+This is the padding algorithm implemented in https://libsodium.gitbook.io/doc/padding[libsodium]footnote:[This padding algorithm is apparently also specified in ISO/IEC 7816-4. However, the speicifcation is not openly available. Fuck you ISO.].
=== Block Size
@@ -110,23 +110,79 @@ The block size is encoded in the read capability and the decoding process is cap
====
When using block size 32Kb to encode content smaller than 1Kb, the content will be encoded in a 32Kb block. This is a storage overhead of over 3100%. When encoding very many pieces of small content (e.g. short messages or cartographic nodes) this overhead is not acceptable.
-On the other hand, using small block sizes increases the number of internal nodes that must be used to encode the content (see <<_merkle_tree>>). When encoding larger content it is more efficient to use a block size of 32Kb.
+On the other hand, using small block sizes increases the number of internal nodes that must be used to encode the content (see <<_collect_reference_key_pairs_in_nodes>>). When encoding larger content it is more efficient to use a block size of 32Kb.
====
+=== Convergence Secret
+
+Using the hash of the content as key is called _convergent encryption_.
+
+Because the hash of the content is deterministically computed from the content, the key will be the same when the same content is encoded twice. This results in de-duplication of content. Convergent encryption suffers from two known attacks: The Confirmation Of A File Attack and The Learn-The-Remaining-Information Attack <<Zooko2008>>. A defense against both attacks is to use a _convergence secret_. This results in different encoding of the same content with different convergence secret.
+
+If no convergence secret is specified a null convergence secret is used (32 bytes of zeroes).
+
+The convergence secret is implemented as the keying feature of the Blake2 cryptographic hash <<RFC7693>>.
+
=== Encoding
-An overview of how content (sequence of bytes) is encoded using ERIS:
+Inputs to the encoding process are:
+
+`CONTENT` :: An arbitary length byte sequence of content to be encoded.
+`CONVERGENCE-SECRET` :: A 256 bit (32 byte) byte sequence (see <<_convergence_secret>>).
+`BLOCK-SIZE` :: The block size used for encoding in bytes can be either 1024 (1Kb) or 32768 (32Kb) (see <<_block_size>>).
+
+Content is encoded by first splitting into uniformly sized blocks, encrypting the blocks and computing references to the blocks. If there are multiple references to blocks they are collected in nodes that have the same size as content blocks. The nodes are encrypted and references to the nodes are computed. This process is repeated until there is a single root reference.
+
+References to nodes and blocks of content consist of a reference to an encrypted block and a key to decrypt the block - a _reference-key pair_. The process of encrypting a block and computing a reference-key pair is explained in <<_encrypt_block_and_compute_reference_key_pair>>.
+
+The encoding process constructs a tree of reference-key pairs that reference nodes that hold references to nodes of a lower level or to content.
+
+The number of reference-key pairs collected into a node is called the _arity_ of the tree and depends on the block size. For block size 1Kb the arity of the tree is 16, for block size 32Kb the arity is 512.
+
+An encoding of a content that is split into eight blocks is depicted in <<figure_merkle_tree>>. For illustration purposes the tree is of arity 2 (instead of 16 or 512).
+
+[[figure_merkle_tree]]
+.Encoding of content as tree. Solid edges are concatenations of reference-key pairs as described in <<_collect_reference_key_pairs_in_nodes>>. Dotted edges are encryption and computation of reference-key pairs as described in <<_encrypt_block_and_compute_reference_key_pair>>.
+image::eris-merkle-tree.svg[Merkle Tree,opts=inline]
+
+The block-size, the level of the root reference and the root reference-key pair itself are the necessary pieces of information required to decode content. The tuple consisting of block size, level, root reference and key is called the _read capability_.
+
+The encrypted blocks and the read capability are the outputs of the encoding process.
-1. Split content into unencrypted blocks of size block size (see <<_splitting_input_content_into_blocks>>).
-2. Encrypt unencrypted block and compute reference to encrypted block. This is a fundamental operation during encoding and described in section <<_reference_key_pair>>.
-3. Build a tree of nodes containing references to blocks by (see <<_merkle_tree>>):
- - Collect references to encrypted blocks in a node of size block size
- - Encrypt node and compute reference in the same way as for data blocks.
- - Recursively collect references in nodes of higher level until there is only a single root node.
+A pseudo-code implementation of the encoding process:
-The input to the encoding process is the content as sequence of bytes and an optional convergence secret (see <<_convergence_secret>>).
+[source,pseudocode]
+----
+ERIS-Encode(CONTENT, CONVERGENCE-SECRET, BLOCK-SIZE):
+ // initialize empty list of blocks to be output
+ BLOCKS := []
-The output of the encoding process is a sequence of encrypted blocks (of size block size) and a root reference-key pair.
+ // initialize level to 0
+ LEVEL := 0
+
+ // split the input content into uniformly sized blocks and encode
+ LEVEL-0-BLOCKS, RK-PAIRS := Split-Content(CONTENT, BLOCK-SIZE)
+
+ // add blocks from level 0 to blocks to be output
+ BLOCKS := BLOCKS ++ LEVEL-0-BLOCKS
+
+ // loop until there is a single root reference
+ WHILE Length(RK-PAIRS) > 1:
+ LEVEL-BLOCKS, RK-Pairs := Collect-RK-Pairs(RK-PAIRS, CONVERGENCE-SECRET, BLOCK-SIZE)
+
+ // add blocks to blocks to be output and increase the level counter
+ BLOCKS := BLOCKS ++ LEVEL-BLOCKS
+ LEVEL := LEVEL + 1
+
+ // extract the root reference-key pair
+ ROOT-RK-PAIR := RK-PAIRS[0]
+ ROOT-REFERENCE, ROOT-KEY := ROOT-RK-PAIR
+
+ // return blocks and read-capability
+ RETURN BLOCKS, BLOCK-SIZE, LEVEL, ROOT-REFERENCE, ROOT-KEY
+----
+
+The sub-process `Split-Content` and `Collect-RK-Pairs` are explained in the following sections.
==== Splitting Input Content into Blocks
@@ -134,53 +190,101 @@ Input content is split into blocks of size at most block size such that only the
The last content block is always padded according to the padding algorithm to block size. If the size of the padded last block is larger than block size it is split into content blocks of block size.
-NOTE: If the length of the last content block is exactly block size, then padding will result in a padded block that is double the block size and must be split.
+A pseudo code implementation:
+
+[source,pseudocode]
+----
+Split-Content(CONTENT,CONVERGENCE-SECRET,BLOCK-SIZE):
+ // initialize list of blocks and reference-key pairs to output
+ BLOCKS := []
+ RK-PAIRS := []
+
+ // read blocks of size BLOCK-SIZE from CONTENT
+ WHILE CONTENT-BLOCK, LAST? := READ(CONTENT, BLOCK-SIZE):
+
+ IF LAST?:
+ // pad block if it is the last
+ PADDED := PAD(CONTENT-BLOCK, BLOCK-SIZE)
+
+ IF Length(PADDED) > BLOCK-SIZE:
+ PADDED-0, PADDED-1 := SPLIT(PADDED, BLOCK-SIZE)
+ ENCRYPTED-BLOCK-0, RK-PAIR-0 := Encrypt-Block(PADDED-0, CONVERGENCE-SECRET)
+ ENCRYPTED-BLOCK-1, RK-PAIR-1 := Encrypt-Block(PADDED-1, CONVERGENCE-SECRET)
+ BLOCKS := BLOCKS ++ [ENCRYPTED-BLOCK-0, ENCRYPTED-BLOCK-1]
+ RK-PAIRS := RK-PAIRS ++ [RK-PAIR-0, RK-PAIR-1]
+ ELSE:
+ ENCRYPTED-BLOCK, RK-PAIR := Encrypt-Block(PADDED, CONVERGENCE-SECRET)
+ BLOCKS := BLOCKS ++ [ENCRYPTED-BLOCK]
+ RK-PAIRS := RK-PAIRS ++ [RK-PAIR]
+
+ ELSE:
+ ENCRYPTED-BLOCK, RK-PAIR := Encrypt-Block(CONTENT-BLOCK, CONVERGENCE-SECRET)
+ BLOCKS := BLOCKS ++ [ENCRYPTED-BLOCK]
+ RK-PAIRS := RK-PAIRS ++ [RK-PAIR]
+
+ RETURN BLOCKS, RK-PAIRS
+----
-==== Reference-Key Pair
+NOTE: If the length of the last content block is exactly block size, then padding will result in a padded block that is double the block size and must be split.
-A _reference-key pair_ is a pair consisting of a reference to an encrypted block and the key to decrypt the block. Reference and key are both 32 bytes long. The concatenation of a reference-key pair is 64 bytes long (512 bits). In the following we also use the abbreviation _r-k pair_ for reference-key pair.
+==== Encrypt Block and Compute Reference-Key Pair
-Encrypting a block and computing the reference-key pair is a fundamental operation of the encoding process and may be implemented as following sub-process or function:
+A _reference-key pair_ is a pair consisting of a reference to an encrypted block and the key to decrypt the block. Reference and key are both 32 bytes long. The concatenation of a reference-key pair is 64 bytes long (512 bits).
-1. Compute the hash of the unencrypted block. If a convergence secret is used the convergence secret MUST be used as key of the hash function. The output of the hash is the _key_.
-2. Encrypt the block using the symmetric key cipher with the key.
-3. The hash of the encrypted block is used as _reference_ to the encrypted block.
+The `Encrypt-Block` function encrypts a block and returns the encrypted block along with the reference-key pair:
-The encrypted block MUST be added to the output of the encoding process. The reference-key pair is returned and used for further processing.
+[source,pseudocode]
+----
+Encrypt-Block(INPUT, CONVERGENCE-SECRET):
+ KEY := Blake2b-256(INPUT,CONVERGENCE-SECRET)
+ ENCRYPTED-BLOCK := ChaCha20(INPUT,KEY)
+ REFERENCE := Blake2b-256(ENCRYPTED-BLOCK)
+ RETURN ENCRYPTED-BLOCK, REFERENCE, KEY
+----
The convergence-secret MUST NOT be used to compute the reference to the encrypted block.
-===== Convergence Secret
+==== Collect Reference-Key Pairs in Nodes
-Using the hash of the content as key is called _convergent encryption_.
+Reference-key pairs are collected into nodes of size block size by concatenating reference-key pair. The node is encrypted, and a reference-key pair to the node is computed. This results in a sequence of reference-key pairs that refer to nodes containing reference-key pairs at a lower level - a tree.
-Because the hash of the content is deterministically computed from the content, the key will be the same when the same content is encoded twice. This results in de-duplication of content.
+If there are less than arity number of references-key pairs to collect in a node, then the node is filled with missing number of _null reference-key pairs_ - 64 bytes of zeros. The size of a node is always equal the block size (implemented with the `FILL-WITH-NULL-RK-PAIRS` function).
-However convergent encryption suffers from two known attacks: The Confirmation Of A File Attack and The Learn-The-Remaining-Information Attack <<Zooko2008>>.
+A pseudo-code implementation of `Collect-RK-Pairs`:
-A defense against both attacks is to use a _convergence secret_. This results in different encoding of the same content with different convergence secret.
+[source,pseudocode]
+----
+Collect-RK-Pairs(INPUT-RK-PAIRS, CONVERGENCE-SECRET, BLOCK-SIZE):
+ // number of reference-key pairs in a node
+ ARITY := BLOCK-SIZE / 64
-The convergence secret is implemented as the keying feature of the Blake2 cryptographic hash <<RFC7693>>.
-
-==== Merkle Tree
+ // initialize blocks and reference-key pairs to output
+ BLOCKS := []
+ OUTPUT-RK-PAIRS := []
-Reference-key pairs are collected into nodes of size block size by concatenating the concatenated reference-key pair. The node is encrypted, and a reference-key pair to the node is computed. This results in a sequence of reference-key pairs that refer nodes containing reference-key pairs at a lower level - a tree. This process is recursively applied until there is a single reference-key pair - the _root reference-key pair_. We keep track of the level of recursion.
+ // take ARITY reference-key pairs from INPUT-RK-PAIRS at a time
+ WHILE RK-PAIRS-FOR-NODE := TAKE(INPUT-RK-PAIRS, ARITY):
+ // make sure there are exactly ARITY reference-key pairs in node
+ RK-PAIRS-FOR-NODE := FILL-WITH-NULL-RK-PAIRS(RK-PAIRS-FOR-NODE, ARITY)
-The number of reference-key pairs collected into a node is called the _arity_ of the tree and depends on the block size. For block size 1Kb the arity of the tree is 16, for block size 32Kb the arity is 512. If there are less than arity number of references-key pairs to collect in a node, then the node is filled with missing number of _null reference-key pairs_ - 64 bytes of zeros. The size of a node is always equal the block size.
+ // concat reference-key pairs to node
+ NODE := CONCAT(RK-PAIRS-FOR-NODE)
-The initial input (level 0) is the sequence of reference-key pairs to the input content.
+ // encrypt node and compute reference-key pair
+ BLOCK, RK-TO-NODE := Encrypt-Block(NODE, CONVERGENCE-SECRET)
-The root reference-key pair, the level of the root reference-key pair and the block-size are the necessary pieces of information required to decode content. The tuple consisting of block size, level, root reference and key is called the _read capability_.
+ // add node to output
+ BLOCKS := BLOCKS ++ [BLOCK]
+ OUTPUT-RK-PAIRS := OUTPUT-RK-PAIRS ++ [RK-TO-NODE]
-The read capability as well as the encrypted blocks (as output by the <<_reference_key_pair>> sub-process) is the output of the entire encoding process.
+ RETURN BLOCKS, OUTPUT-RK-PAIRS
+----
-NOTE: The constructed tree of nodes containing reference-key pairs is called a Merkle Tree.
+==== Streaming
-An encoding of a content that is split into eight blocks is depicted in <<figure_merkle_tree>>. For illustration purposes the tree is of arity 2 (instead of 16 or 512).
+The encoding process can be implemented to encode a stream of content while immediately outputting encrypted blocks when ready and eagerly collecting reference-key pairs to nodes. This allows the encoding of larger-than-memory content.
-[[figure_merkle_tree]]
-.Merkle Tree
-image::eris-merkle-tree.svg[Merkle Tree,opts=inline]
+For an example, see https://gitlab.com/openengiadina/eris/-/raw/main/eris/encode.scm[the reference Guile implementation].
=== Decoding
@@ -188,15 +292,16 @@ image::eris-merkle-tree.svg[Merkle Tree,opts=inline]
=== URN
-
-=== ERIS in Streaming Applications
-
== Applications
=== Storage and Transport Layers
=== Namespaces
+== Implementations
+
+A reference implementation is available in Guile: https://gitlab.com/openengiadina/eris/
+
== Acknowledgments
[appendix]