Buckets:

hf-doc-build/doc-dev / xet /pr_2272 /en /hashing.html
rtrm's picture
download
raw
54.7 kB
<meta charset="utf-8" /><meta name="hf:doc:metadata" content="{&quot;title&quot;:&quot;Hashing&quot;,&quot;local&quot;:&quot;hashing&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Chunk Hashes&quot;,&quot;local&quot;:&quot;chunk-hashes&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;DATA_KEY&quot;,&quot;local&quot;:&quot;datakey&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Xorb Hashes&quot;,&quot;local&quot;:&quot;xorb-hashes&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;INTERNAL_NODE_KEY&quot;,&quot;local&quot;:&quot;internalnodekey&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Example of data for internal node&quot;,&quot;local&quot;:&quot;example-of-data-for-internal-node&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Example Python code for the internal hash function&quot;,&quot;local&quot;:&quot;example-python-code-for-the-internal-hash-function&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;File Hashes&quot;,&quot;local&quot;:&quot;file-hashes&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Term Verification Hashes&quot;,&quot;local&quot;:&quot;term-verification-hashes&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;VERIFICATION_KEY&quot;,&quot;local&quot;:&quot;verificationkey&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Example Python code for the verification hash&quot;,&quot;local&quot;:&quot;example-python-code-for-the-verification-hash&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Reference Files&quot;,&quot;local&quot;:&quot;reference-files&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Chunk Hashes Sample&quot;,&quot;local&quot;:&quot;chunk-hashes-sample&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;File Hash Sample&quot;,&quot;local&quot;:&quot;file-hash-sample&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Xorb Hash Sample&quot;,&quot;local&quot;:&quot;xorb-hash-sample&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Range Hash Sample&quot;,&quot;local&quot;:&quot;range-hash-sample&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2}],&quot;depth&quot;:1}">
<link href="/docs/xet/pr_2272/en/_app/immutable/assets/0.e3b0c442.css" rel="modulepreload">
<link rel="modulepreload" href="/docs/xet/pr_2272/en/_app/immutable/entry/start.7209fe0c.js">
<link rel="modulepreload" href="/docs/xet/pr_2272/en/_app/immutable/chunks/scheduler.de5597d1.js">
<link rel="modulepreload" href="/docs/xet/pr_2272/en/_app/immutable/chunks/singletons.800e7c38.js">
<link rel="modulepreload" href="/docs/xet/pr_2272/en/_app/immutable/chunks/paths.d1eab31c.js">
<link rel="modulepreload" href="/docs/xet/pr_2272/en/_app/immutable/entry/app.54268a84.js">
<link rel="modulepreload" href="/docs/xet/pr_2272/en/_app/immutable/chunks/preload-helper.1b7d12f9.js">
<link rel="modulepreload" href="/docs/xet/pr_2272/en/_app/immutable/chunks/index.f8bac2c1.js">
<link rel="modulepreload" href="/docs/xet/pr_2272/en/_app/immutable/nodes/0.097c0756.js">
<link rel="modulepreload" href="/docs/xet/pr_2272/en/_app/immutable/chunks/each.e59479a4.js">
<link rel="modulepreload" href="/docs/xet/pr_2272/en/_app/immutable/nodes/9.e762f09e.js">
<link rel="modulepreload" href="/docs/xet/pr_2272/en/_app/immutable/chunks/MermaidChart.svelte_svelte_type_style_lang.4e1a23d9.js">
<link rel="modulepreload" href="/docs/xet/pr_2272/en/_app/immutable/chunks/CodeBlock.d8c0c481.js"><!-- HEAD_svelte-u9bgzb_START --><meta name="hf:doc:metadata" content="{&quot;title&quot;:&quot;Hashing&quot;,&quot;local&quot;:&quot;hashing&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Chunk Hashes&quot;,&quot;local&quot;:&quot;chunk-hashes&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;DATA_KEY&quot;,&quot;local&quot;:&quot;datakey&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Xorb Hashes&quot;,&quot;local&quot;:&quot;xorb-hashes&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;INTERNAL_NODE_KEY&quot;,&quot;local&quot;:&quot;internalnodekey&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Example of data for internal node&quot;,&quot;local&quot;:&quot;example-of-data-for-internal-node&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Example Python code for the internal hash function&quot;,&quot;local&quot;:&quot;example-python-code-for-the-internal-hash-function&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;File Hashes&quot;,&quot;local&quot;:&quot;file-hashes&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Term Verification Hashes&quot;,&quot;local&quot;:&quot;term-verification-hashes&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;VERIFICATION_KEY&quot;,&quot;local&quot;:&quot;verificationkey&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Example Python code for the verification hash&quot;,&quot;local&quot;:&quot;example-python-code-for-the-verification-hash&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Reference Files&quot;,&quot;local&quot;:&quot;reference-files&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Chunk Hashes Sample&quot;,&quot;local&quot;:&quot;chunk-hashes-sample&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;File Hash Sample&quot;,&quot;local&quot;:&quot;file-hash-sample&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Xorb Hash Sample&quot;,&quot;local&quot;:&quot;xorb-hash-sample&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Range Hash Sample&quot;,&quot;local&quot;:&quot;range-hash-sample&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2}],&quot;depth&quot;:1}"><!-- HEAD_svelte-u9bgzb_END --> <p></p> <div class="items-center shrink-0 min-w-[100px] max-sm:min-w-[50px] justify-end ml-auto flex" style="float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"><div class="inline-flex rounded-md max-sm:rounded-sm"><button class="inline-flex items-center gap-1 h-7 max-sm:h-7 px-2 max-sm:px-1.5 text-sm font-medium text-gray-800 border border-r-0 rounded-l-md max-sm:rounded-l-sm border-gray-200 bg-white hover:shadow-inner dark:border-gray-850 dark:bg-gray-950 dark:text-gray-200 dark:hover:bg-gray-800" aria-live="polite"><span class="inline-flex items-center justify-center rounded-md p-0.5 max-sm:p-0 hover:text-gray-800 dark:hover:text-gray-200"><svg class="sm:size-3.5 size-3" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg></span> <span>Copy page</span></button> <button class="inline-flex items-center justify-center w-6 max-sm:w-5 h-7 max-sm:h-7 disabled:pointer-events-none text-sm text-gray-500 hover:text-gray-700 dark:hover:text-white rounded-r-md max-sm:rounded-r-sm border border-l transition border-gray-200 bg-white hover:shadow-inner dark:border-gray-850 dark:bg-gray-950 dark:text-gray-200 dark:hover:bg-gray-800" aria-haspopup="menu" aria-expanded="false" aria-label="Open copy menu"><svg class="transition-transform text-gray-400 overflow-visible sm:size-3.5 size-3 rotate-0" width="1em" height="1em" viewBox="0 0 12 7" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M1 1L6 6L11 1" stroke="currentColor"></path></svg></button></div> </div> <h1 class="relative group"><a id="hashing" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#hashing"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Hashing</span></h1> <ul data-svelte-h="svelte-1p4elgl"><li><a href="#chunk-hashes">Chunk hashes</a> - compute for each chunk from chunk data.</li> <li><a href="#xorb-hashes">Xorb Hashes</a> - compute for each xorb from its chunk hashes.</li> <li><a href="#file-hashes">File Hashes</a> - compute for each file from its chunk hashes.</li> <li><a href="#term-verification-hashes">Term Verification Hashes</a> - compute for each term in a reconstruction when serializing a shard from the chunk hashes in the xorb that is used in that term.</li></ul> <p data-svelte-h="svelte-qn784m">The Xet protocol utilizes a few different hashing types.</p> <p data-svelte-h="svelte-1dw5srm">All hashes referenced are 32 bytes (256 bits) long.</p> <h2 class="relative group"><a id="chunk-hashes" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#chunk-hashes"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Chunk Hashes</span></h2> <p data-svelte-h="svelte-1n4jnlt">After cutting a chunk of data, the chunk hash is computed via a blake3 keyed hash with the following key (DATA_KEY):</p> <h3 class="relative group"><a id="datakey" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#datakey"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>DATA_KEY</span></h3> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-punctuation">[</span>
<span class="hljs-number">102</span><span class="hljs-punctuation">,</span> <span class="hljs-number">151</span><span class="hljs-punctuation">,</span> <span class="hljs-number">245</span><span class="hljs-punctuation">,</span> <span class="hljs-number">119</span><span class="hljs-punctuation">,</span> <span class="hljs-number">91</span><span class="hljs-punctuation">,</span> <span class="hljs-number">149</span><span class="hljs-punctuation">,</span> <span class="hljs-number">80</span><span class="hljs-punctuation">,</span> <span class="hljs-number">222</span><span class="hljs-punctuation">,</span> <span class="hljs-number">49</span><span class="hljs-punctuation">,</span> <span class="hljs-number">53</span><span class="hljs-punctuation">,</span> <span class="hljs-number">203</span><span class="hljs-punctuation">,</span> <span class="hljs-number">172</span><span class="hljs-punctuation">,</span> <span class="hljs-number">165</span><span class="hljs-punctuation">,</span> <span class="hljs-number">151</span><span class="hljs-punctuation">,</span> <span class="hljs-number">24</span><span class="hljs-punctuation">,</span> <span class="hljs-number">28</span><span class="hljs-punctuation">,</span> <span class="hljs-number">157</span><span class="hljs-punctuation">,</span> <span class="hljs-number">228</span><span class="hljs-punctuation">,</span> <span class="hljs-number">33</span><span class="hljs-punctuation">,</span> <span class="hljs-number">16</span><span class="hljs-punctuation">,</span> <span class="hljs-number">155</span><span class="hljs-punctuation">,</span> <span class="hljs-number">235</span><span class="hljs-punctuation">,</span> <span class="hljs-number">43</span><span class="hljs-punctuation">,</span> <span class="hljs-number">88</span><span class="hljs-punctuation">,</span> <span class="hljs-number">180</span><span class="hljs-punctuation">,</span> <span class="hljs-number">208</span><span class="hljs-punctuation">,</span> <span class="hljs-number">176</span><span class="hljs-punctuation">,</span> <span class="hljs-number">75</span><span class="hljs-punctuation">,</span> <span class="hljs-number">147</span><span class="hljs-punctuation">,</span> <span class="hljs-number">173</span><span class="hljs-punctuation">,</span> <span class="hljs-number">242</span><span class="hljs-punctuation">,</span> <span class="hljs-number">41</span>
<span class="hljs-punctuation">]</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-vblynd"><a href="https://github.com/huggingface/xet-core/blob/main/merklehash/src/data_hash.rs#L308-L311" rel="nofollow">reference implementation</a></p> <h2 class="relative group"><a id="xorb-hashes" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#xorb-hashes"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Xorb Hashes</span></h2> <p data-svelte-h="svelte-9sa3h0">Xorbs are composed of a series of chunks; given the series of chunks that make up a xorb, to compute the hash or xorb hash we will compute a MerkleHash using a <a href="https://en.wikipedia.org/wiki/Merkle_tree" rel="nofollow">Merkle Tree</a> data structure with custom hashing functions.
<strong>The xorb hash will be the root node hash of the MerkleTree.</strong></p> <p data-svelte-h="svelte-1j8skhm">The leaf node hashes are the chunk hashes as described in the previous section.</p> <p data-svelte-h="svelte-170oeev">The hash function used to compute internal node hashes is as follows:</p> <ul data-svelte-h="svelte-120fvhk"><li>concatenate the hashes together such that for each chunk there is a line in order formatted like <code>{chunk_hash:x} : {size}\n</code> <ul><li>the hash first in lowercase hex format (64 hex characters e.g. <code>a3f91d6e8b47c20ff9d84a1c77dcb8e5a91e6fbf2b2d483af6d3c1e90ac57843</code>)</li> <li>a space, a colon, a space (<code>:</code>)</li> <li>the chunk length number e.g. 64000</li> <li>finally a newline <code>\n</code> character</li></ul></li> <li>Then take the bytes from this string and compute a blake3 keyed hash with the following key (INTERNAL_NODE_KEY)</li></ul> <p data-svelte-h="svelte-1itha2s"><a href="https://github.com/huggingface/xet-core/blob/main/merklehash/src/aggregated_hashes.rs#L103-L109" rel="nofollow">reference implementation</a></p> <h3 class="relative group"><a id="internalnodekey" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#internalnodekey"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>INTERNAL_NODE_KEY</span></h3> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-punctuation">[</span>
<span class="hljs-number">1</span><span class="hljs-punctuation">,</span> <span class="hljs-number">126</span><span class="hljs-punctuation">,</span> <span class="hljs-number">197</span><span class="hljs-punctuation">,</span> <span class="hljs-number">199</span><span class="hljs-punctuation">,</span> <span class="hljs-number">165</span><span class="hljs-punctuation">,</span> <span class="hljs-number">71</span><span class="hljs-punctuation">,</span> <span class="hljs-number">41</span><span class="hljs-punctuation">,</span> <span class="hljs-number">150</span><span class="hljs-punctuation">,</span> <span class="hljs-number">253</span><span class="hljs-punctuation">,</span> <span class="hljs-number">148</span><span class="hljs-punctuation">,</span> <span class="hljs-number">102</span><span class="hljs-punctuation">,</span> <span class="hljs-number">102</span><span class="hljs-punctuation">,</span> <span class="hljs-number">180</span><span class="hljs-punctuation">,</span> <span class="hljs-number">138</span><span class="hljs-punctuation">,</span> <span class="hljs-number">2</span><span class="hljs-punctuation">,</span> <span class="hljs-number">230</span><span class="hljs-punctuation">,</span> <span class="hljs-number">93</span><span class="hljs-punctuation">,</span> <span class="hljs-number">221</span><span class="hljs-punctuation">,</span> <span class="hljs-number">83</span><span class="hljs-punctuation">,</span> <span class="hljs-number">111</span><span class="hljs-punctuation">,</span> <span class="hljs-number">55</span><span class="hljs-punctuation">,</span> <span class="hljs-number">199</span><span class="hljs-punctuation">,</span> <span class="hljs-number">109</span><span class="hljs-punctuation">,</span> <span class="hljs-number">210</span><span class="hljs-punctuation">,</span> <span class="hljs-number">248</span><span class="hljs-punctuation">,</span> <span class="hljs-number">99</span><span class="hljs-punctuation">,</span> <span class="hljs-number">82</span><span class="hljs-punctuation">,</span> <span class="hljs-number">230</span><span class="hljs-punctuation">,</span> <span class="hljs-number">74</span><span class="hljs-punctuation">,</span> <span class="hljs-number">83</span><span class="hljs-punctuation">,</span> <span class="hljs-number">113</span><span class="hljs-punctuation">,</span> <span class="hljs-number">63</span>
<span class="hljs-punctuation">]</span><!-- HTML_TAG_END --></pre></div> <h3 class="relative group"><a id="example-of-data-for-internal-node" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#example-of-data-for-internal-node"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Example of data for internal node</span></h3> <p data-svelte-h="svelte-1onbsm0">Consider that a node were 4 chunks with the following pairs of hashes and lengths:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->hash,length (bytes)
1f6a2b8e9d3c4075a2e8c5fd4f0b763e6f3c1d7a9b2e6487de3f91ab7c6d5401,10000
7c94fe2a38bdcf9b4d2a6f7e1e08ac35bc24a7903d6f5a0e7d1c2b93e5f748de,20000
cfd18a92e0743bb09e56dbf76ea2c34d99b5a0cf271f8d429b6cd148203df061,25000
e38d7c09a21b4cf8d0f92b3a85e6df19f7c20435e0b1c78a9d635f7b8c2e4da1,64000<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1cux42q">Then to form the buffer to compute the internal node hash we will create this string (note the <code>\n</code> newline at the end):</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->&quot;1f6a2b8e9d3c4075a2e8c5fd4f0b763e6f3c1d7a9b2e6487de3f91ab7c6d5401 : 10000
7c94fe2a38bdcf9b4d2a6f7e1e08ac35bc24a7903d6f5a0e7d1c2b93e5f748de : 20000
cfd18a92e0743bb09e56dbf76ea2c34d99b5a0cf271f8d429b6cd148203df061 : 25000
e38d7c09a21b4cf8d0f92b3a85e6df19f7c20435e0b1c78a9d635f7b8c2e4da1 : 64000
&quot;<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1ycy69l">Then compute the blake3 keyed hash with INTERNAL_NODE_KEY to get the final hash.</p> <h3 class="relative group"><a id="example-python-code-for-the-internal-hash-function" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#example-python-code-for-the-internal-hash-function"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Example Python code for the internal hash function</span></h3> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> blake3 <span class="hljs-keyword">import</span> blake3
<span class="hljs-keyword">def</span> <span class="hljs-title function_">internal_hash_function</span>(<span class="hljs-params">node</span>):
buffer = <span class="hljs-string">&quot;&quot;</span>
<span class="hljs-keyword">for</span> chunk <span class="hljs-keyword">in</span> node:
size = <span class="hljs-built_in">len</span>(chunk)
chunk_hash = compute_chunk_hash(chunk)
buffer += <span class="hljs-string">f&quot;<span class="hljs-subst">{chunk_hash:x}</span> : <span class="hljs-subst">{size}</span>\n&quot;</span>
blake3(<span class="hljs-built_in">bytes</span>(buffer), key=INTERNAL_NODE_KEY)<!-- HTML_TAG_END --></pre></div> <h2 class="relative group"><a id="file-hashes" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#file-hashes"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>File Hashes</span></h2> <p data-svelte-h="svelte-1jtatff">After chunking a whole file, to compute the file hash, follow the same procedure used to compute the xorb hash and then take that final hash as data to compute a blake3 keyed hash with a key that is all 0’s.</p> <p data-svelte-h="svelte-hh3b8l">This means create a MerkleTree using the same hashing functions described in the previous section.
Then take the root node’s hash and compute a blake3 keyed hash with the key being 32 0-value bytes.</p> <p data-svelte-h="svelte-1n540og"><a href="https://github.com/huggingface/xet-core/blob/main/merklehash/src/aggregated_hashes.rs#L123-L125" rel="nofollow">reference implementation</a></p> <h2 class="relative group"><a id="term-verification-hashes" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#term-verification-hashes"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Term Verification Hashes</span></h2> <p data-svelte-h="svelte-18d9rp">When uploading a shard, each term in each file info in the shard MUST have a matching FileVerificationEntry section that contains a hash.</p> <p data-svelte-h="svelte-18be7u7">To generate this hash, take the chunk hashes for the specific range of chunks that make up the term and:</p> <ol data-svelte-h="svelte-1nbfhlz"><li><p><strong>Concatenate the raw hash bytes</strong>: Take all the chunk hashes in the range (from <code>chunk_index_start</code> to <code>chunk_index_end</code> in the xorb specified in the term) and concatenate their raw 32-byte representations together in order.</p></li> <li><p><strong>Apply keyed hash</strong>: Compute a blake3 keyed hash of the concatenated bytes using the following verification key (VERIFICATION_KEY):</p></li></ol> <h3 class="relative group"><a id="verificationkey" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#verificationkey"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>VERIFICATION_KEY</span></h3> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-punctuation">[</span>
<span class="hljs-number">127</span><span class="hljs-punctuation">,</span> <span class="hljs-number">24</span><span class="hljs-punctuation">,</span> <span class="hljs-number">87</span><span class="hljs-punctuation">,</span> <span class="hljs-number">214</span><span class="hljs-punctuation">,</span> <span class="hljs-number">206</span><span class="hljs-punctuation">,</span> <span class="hljs-number">86</span><span class="hljs-punctuation">,</span> <span class="hljs-number">237</span><span class="hljs-punctuation">,</span> <span class="hljs-number">102</span><span class="hljs-punctuation">,</span> <span class="hljs-number">18</span><span class="hljs-punctuation">,</span> <span class="hljs-number">127</span><span class="hljs-punctuation">,</span> <span class="hljs-number">249</span><span class="hljs-punctuation">,</span> <span class="hljs-number">19</span><span class="hljs-punctuation">,</span> <span class="hljs-number">231</span><span class="hljs-punctuation">,</span> <span class="hljs-number">165</span><span class="hljs-punctuation">,</span> <span class="hljs-number">195</span><span class="hljs-punctuation">,</span> <span class="hljs-number">243</span><span class="hljs-punctuation">,</span> <span class="hljs-number">164</span><span class="hljs-punctuation">,</span> <span class="hljs-number">205</span><span class="hljs-punctuation">,</span> <span class="hljs-number">38</span><span class="hljs-punctuation">,</span> <span class="hljs-number">213</span><span class="hljs-punctuation">,</span> <span class="hljs-number">181</span><span class="hljs-punctuation">,</span> <span class="hljs-number">219</span><span class="hljs-punctuation">,</span> <span class="hljs-number">73</span><span class="hljs-punctuation">,</span> <span class="hljs-number">230</span><span class="hljs-punctuation">,</span> <span class="hljs-number">65</span><span class="hljs-punctuation">,</span> <span class="hljs-number">36</span><span class="hljs-punctuation">,</span> <span class="hljs-number">152</span><span class="hljs-punctuation">,</span> <span class="hljs-number">127</span><span class="hljs-punctuation">,</span> <span class="hljs-number">40</span><span class="hljs-punctuation">,</span> <span class="hljs-number">251</span><span class="hljs-punctuation">,</span> <span class="hljs-number">148</span><span class="hljs-punctuation">,</span> <span class="hljs-number">195</span>
<span class="hljs-punctuation">]</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1habaob">The result of the blake3 keyed hash is the verification hash that MUST be used in the FileVerificationEntry for the term.</p> <p data-svelte-h="svelte-1a87ss8"><a href="https://github.com/huggingface/xet-core/blob/main/mdb_shard/src/chunk_verification.rs#L4-L16" rel="nofollow">reference implementation</a></p> <h3 class="relative group"><a id="example-python-code-for-the-verification-hash" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#example-python-code-for-the-verification-hash"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Example Python code for the verification hash</span></h3> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">def</span> <span class="hljs-title function_">verification_hash_function</span>(<span class="hljs-params">term</span>):
buffer = <span class="hljs-built_in">bytes</span>()
<span class="hljs-comment"># note chunk ranges are end exclusive</span>
<span class="hljs-keyword">for</span> chunk_hash <span class="hljs-keyword">in</span> term.xorb.chunk_hashes[term.chunk_index_start : term.chunk_index_end]:
buffer.extend(<span class="hljs-built_in">bytes</span>(chunk_hash))
<span class="hljs-keyword">return</span> blake3(buffer, key=VERIFICATION_KEY)<!-- HTML_TAG_END --></pre></div> <h2 class="relative group"><a id="reference-files" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#reference-files"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Reference Files</span></h2> <p data-svelte-h="svelte-106smfh">Reference files are provided in Hugging Face Dataset repository <a href="https://huggingface.co/datasets/xet-team/xet-spec-reference-files" rel="nofollow">xet-team/xet-spec-reference-files</a>.</p> <p data-svelte-h="svelte-zalecs">In this repository there are a number of different samples implementors can use to verify hash computations.</p> <blockquote data-svelte-h="svelte-ccbbrv"><p>Note that all hashes are represented as strings.
To get the raw value of these hashes you must invert the endianness of each byte octet in the hash string, reversing the procedure described in <a href="./api#converting-hashes-to-strings">api</a>.</p></blockquote> <h3 class="relative group"><a id="chunk-hashes-sample" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#chunk-hashes-sample"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Chunk Hashes Sample</span></h3> <p data-svelte-h="svelte-16y0ek8">There are 3 chunks files, for each file name, the first 64 characters are the string format of the chunk hash of the data in the file:</p> <ul data-svelte-h="svelte-1fxoa4s"><li><a href="https://huggingface.co/datasets/xet-team/xet-spec-reference-files/blob/main/b10aa1dc71c61661de92280c41a188aabc47981739b785724a099945d8dc5ce4.chunk" rel="nofollow">b10aa1dc71c61661de92280c41a188aabc47981739b785724a099945d8dc5ce4.chunk</a></li> <li><a href="https://huggingface.co/datasets/xet-team/xet-spec-reference-files/blob/main/26255591fa803b6baf25d88c315b8a6f5153d5bcfdf18ec5ef526264e0ccc907.chunk" rel="nofollow">26255591fa803b6baf25d88c315b8a6f5153d5bcfdf18ec5ef526264e0ccc907.chunk</a></li> <li><a href="https://huggingface.co/datasets/xet-team/xet-spec-reference-files/blob/main/099cb228194fe640e36a6c7d274ee5ed3a714ccd557a0951d9b6b43a7292b5d1.chunk" rel="nofollow">099cb228194fe640e36a6c7d274ee5ed3a714ccd557a0951d9b6b43a7292b5d1.chunk</a></li></ul> <h3 class="relative group"><a id="file-hash-sample" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#file-hash-sample"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>File Hash Sample</span></h3> <p data-svelte-h="svelte-g3zk4y">The <a href="https://huggingface.co/datasets/xet-team/xet-spec-reference-files" rel="nofollow">xet-team/xet-spec-reference-files</a> repository contains the original file
<a href="https://huggingface.co/datasets/xet-team/xet-spec-reference-files/blob/main/Electric_Vehicle_Population_Data_20250917.csv" rel="nofollow">Electric_Vehicle_Population_Data_20250917.csv</a>.</p> <p data-svelte-h="svelte-fwmkut">When processed through the Xet upload protocol the chunks that are produced for this file are listed (formatted <code>&lt;hash&gt; &lt;length&gt;</code>) in the file
<a href="https://huggingface.co/datasets/xet-team/xet-spec-reference-files/blob/main/Electric_Vehicle_Population_Data_20250917.csv.chunks" rel="nofollow">Electric_Vehicle_Population_Data_20250917.csv.chunks</a>.</p> <p data-svelte-h="svelte-rac1s">Using these chunks to compute a file hash of the entire file the result is the hash stored in the file
<a href="https://huggingface.co/datasets/xet-team/xet-spec-reference-files/blob/main/Electric_Vehicle_Population_Data_20250917.csv.xet-file-hash" rel="nofollow">Electric_Vehicle_Population_Data_20250917.csv.xet-file-hash</a> or the raw value <code>118a53328412787fee04011dcf82fdc4acf3a4a1eddec341c910d30a306aaf97</code>.</p> <h3 class="relative group"><a id="xorb-hash-sample" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#xorb-hash-sample"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Xorb Hash Sample</span></h3> <p data-svelte-h="svelte-rn2oha">All of the chunks of <a href="https://huggingface.co/datasets/xet-team/xet-spec-reference-files/blob/main/Electric_Vehicle_Population_Data_20250917.csv" rel="nofollow">Electric_Vehicle_Population_Data_20250917.csv</a> can fit into 1 single xorb.</p> <p data-svelte-h="svelte-1hsvxc0">The xorb produced with all of the chunks in order for this file can be found serialized in file <a href="https://huggingface.co/datasets/xet-team/xet-spec-reference-files/blob/main/eea25d6ee393ccae385820daed127b96ef0ea034dfb7cf6da3a950ce334b7632.xorb" rel="nofollow">eea25d6ee393ccae385820daed127b96ef0ea034dfb7cf6da3a950ce334b7632.xorb</a>.</p> <p data-svelte-h="svelte-iyfjm7">The hash of this xorb is <code>eea25d6ee393ccae385820daed127b96ef0ea034dfb7cf6da3a950ce334b7632</code>, the value in <a href="https://huggingface.co/datasets/xet-team/xet-spec-reference-files/blob/main/Electric_Vehicle_Population_Data_20250917.csv.xet-xorb-hash" rel="nofollow">Electric_Vehicle_Population_Data_20250917.csv.xet-xorb-hash</a>.</p> <p data-svelte-h="svelte-1r234cl">The chunks that make up this xorb are listed in a file <a href="https://huggingface.co/datasets/xet-team/xet-spec-reference-files/blob/main/eea25d6ee393ccae385820daed127b96ef0ea034dfb7cf6da3a950ce334b7632.xorb.chunks" rel="nofollow">eea25d6ee393ccae385820daed127b96ef0ea034dfb7cf6da3a950ce334b7632.xorb.chunks</a>;
note this file is equivalent to <a href="https://huggingface.co/datasets/xet-team/xet-spec-reference-files/blob/main/Electric_Vehicle_Population_Data_20250917.csv.chunks" rel="nofollow">Electric_Vehicle_Population_Data_20250917.csv.chunks</a>.</p> <h3 class="relative group"><a id="range-hash-sample" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#range-hash-sample"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Range Hash Sample</span></h3> <p data-svelte-h="svelte-1eepk7q">In the reconstruction of <a href="https://huggingface.co/datasets/xet-team/xet-spec-reference-files/blob/main/Electric_Vehicle_Population_Data_20250917.csv" rel="nofollow">Electric_Vehicle_Population_Data_20250917.csv</a>
with xorb <a href="https://huggingface.co/datasets/xet-team/xet-spec-reference-files/blob/main/eea25d6ee393ccae385820daed127b96ef0ea034dfb7cf6da3a950ce334b7632.xorb" rel="nofollow">eea25d6ee393ccae385820daed127b96ef0ea034dfb7cf6da3a950ce334b7632</a> there is 1 range that contains all 796 chunks.</p> <p data-svelte-h="svelte-e3x7l">The verification range hash for this range is the value in <a href="https://huggingface.co/datasets/xet-team/xet-spec-reference-files/blob/main/eea25d6ee393ccae385820daed127b96ef0ea034dfb7cf6da3a950ce334b7632.xorb.range-hash" rel="nofollow">eea25d6ee393ccae385820daed127b96ef0ea034dfb7cf6da3a950ce334b7632.xorb.range-hash</a>
which is <code>d81c11b1fc9bc2a25587108c675bbfe65ca2e5d350b0cd92c58329fcc8444178</code>.</p> <a class="!text-gray-400 !no-underline text-sm flex items-center not-prose mt-4" href="https://github.com/huggingface/hub-docs/blob/main/docs/xet/hashing.md" target="_blank"><svg class="mr-1" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M31,16l-7,7l-1.41-1.41L28.17,16l-5.58-5.59L24,9l7,7z"></path><path d="M1,16l7-7l1.41,1.41L3.83,16l5.58,5.59L8,23l-7-7z"></path><path d="M12.419,25.484L17.639,6.552l1.932,0.518L14.351,26.002z"></path></svg> <span data-svelte-h="svelte-zjs2n5"><span class="underline">Update</span> on GitHub</span></a> <p></p>
<script>
{
__sveltekit_1t7h0ns = {
assets: "/docs/xet/pr_2272/en",
base: "/docs/xet/pr_2272/en",
env: {}
};
const element = document.currentScript.parentElement;
const data = [null,null];
Promise.all([
import("/docs/xet/pr_2272/en/_app/immutable/entry/start.7209fe0c.js"),
import("/docs/xet/pr_2272/en/_app/immutable/entry/app.54268a84.js")
]).then(([kit, app]) => {
kit.start(app, element, {
node_ids: [0, 9],
data,
form: null,
error: null
});
});
}
</script>

Xet Storage Details

Size:
54.7 kB
·
Xet hash:
d1709a33d7150f542235c4d4ddd29da5f9a721db9e7bfe574eaf3aea66862145

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.