Buckets:

rtrm's picture
download
raw
57.2 kB
<meta charset="utf-8" /><meta name="hf:doc:metadata" content="{&quot;title&quot;:&quot;Neuron Model Cache&quot;,&quot;local&quot;:&quot;neuron-model-cache&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;How to use the Neuron model cache&quot;,&quot;local&quot;:&quot;how-to-use-the-neuron-model-cache&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;How caching works&quot;,&quot;local&quot;:&quot;how-caching-works&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Neuron model cache lookup (inferentia only)&quot;,&quot;local&quot;:&quot;neuron-model-cache-lookup-inferentia-only&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Advanced usage (trainium only)&quot;,&quot;local&quot;:&quot;advanced-usage-trainium-only&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;How to use a private Neuron model cache (trainium only)&quot;,&quot;local&quot;:&quot;how-to-use-a-private-neuron-model-cache-trainium-only&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Using the Optimum CLI&quot;,&quot;local&quot;:&quot;using-the-optimum-cli&quot;,&quot;sections&quot;:[],&quot;depth&quot;:4},{&quot;title&quot;:&quot;Using the environment variable CUSTOM_CACHE_REPO&quot;,&quot;local&quot;:&quot;using-the-environment-variable-customcacherepo&quot;,&quot;sections&quot;:[],&quot;depth&quot;:4}],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Cache system flow&quot;,&quot;local&quot;:&quot;cache-system-flow&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Optimum CLI&quot;,&quot;local&quot;:&quot;optimum-cli&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Add a model to the cache (trainium only)&quot;,&quot;local&quot;:&quot;add-a-model-to-the-cache-trainium-only&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;List a cache repo&quot;,&quot;local&quot;:&quot;list-a-cache-repo&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2}],&quot;depth&quot;:1}">
<link href="/docs/optimum.neuron/v0.2.0.dev2/en/_app/immutable/assets/0.e3b0c442.css" rel="modulepreload">
<link rel="modulepreload" href="/docs/optimum.neuron/v0.2.0.dev2/en/_app/immutable/entry/start.c3692dcd.js">
<link rel="modulepreload" href="/docs/optimum.neuron/v0.2.0.dev2/en/_app/immutable/chunks/scheduler.85c25b89.js">
<link rel="modulepreload" href="/docs/optimum.neuron/v0.2.0.dev2/en/_app/immutable/chunks/singletons.bf318e21.js">
<link rel="modulepreload" href="/docs/optimum.neuron/v0.2.0.dev2/en/_app/immutable/chunks/paths.4f2bc42b.js">
<link rel="modulepreload" href="/docs/optimum.neuron/v0.2.0.dev2/en/_app/immutable/entry/app.40ef12d9.js">
<link rel="modulepreload" href="/docs/optimum.neuron/v0.2.0.dev2/en/_app/immutable/chunks/index.c9bcf812.js">
<link rel="modulepreload" href="/docs/optimum.neuron/v0.2.0.dev2/en/_app/immutable/nodes/0.8386078c.js">
<link rel="modulepreload" href="/docs/optimum.neuron/v0.2.0.dev2/en/_app/immutable/chunks/each.e59479a4.js">
<link rel="modulepreload" href="/docs/optimum.neuron/v0.2.0.dev2/en/_app/immutable/nodes/6.c2da8bd3.js">
<link rel="modulepreload" href="/docs/optimum.neuron/v0.2.0.dev2/en/_app/immutable/chunks/Tip.d8f753fa.js">
<link rel="modulepreload" href="/docs/optimum.neuron/v0.2.0.dev2/en/_app/immutable/chunks/CodeBlock.c004bd26.js">
<link rel="modulepreload" href="/docs/optimum.neuron/v0.2.0.dev2/en/_app/immutable/chunks/index.9790a2b6.js"><!-- HEAD_svelte-u9bgzb_START --><meta name="hf:doc:metadata" content="{&quot;title&quot;:&quot;Neuron Model Cache&quot;,&quot;local&quot;:&quot;neuron-model-cache&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;How to use the Neuron model cache&quot;,&quot;local&quot;:&quot;how-to-use-the-neuron-model-cache&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;How caching works&quot;,&quot;local&quot;:&quot;how-caching-works&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Neuron model cache lookup (inferentia only)&quot;,&quot;local&quot;:&quot;neuron-model-cache-lookup-inferentia-only&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Advanced usage (trainium only)&quot;,&quot;local&quot;:&quot;advanced-usage-trainium-only&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;How to use a private Neuron model cache (trainium only)&quot;,&quot;local&quot;:&quot;how-to-use-a-private-neuron-model-cache-trainium-only&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Using the Optimum CLI&quot;,&quot;local&quot;:&quot;using-the-optimum-cli&quot;,&quot;sections&quot;:[],&quot;depth&quot;:4},{&quot;title&quot;:&quot;Using the environment variable CUSTOM_CACHE_REPO&quot;,&quot;local&quot;:&quot;using-the-environment-variable-customcacherepo&quot;,&quot;sections&quot;:[],&quot;depth&quot;:4}],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Cache system flow&quot;,&quot;local&quot;:&quot;cache-system-flow&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Optimum CLI&quot;,&quot;local&quot;:&quot;optimum-cli&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Add a model to the cache (trainium only)&quot;,&quot;local&quot;:&quot;add-a-model-to-the-cache-trainium-only&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;List a cache repo&quot;,&quot;local&quot;:&quot;list-a-cache-repo&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2}],&quot;depth&quot;:1}"><!-- HEAD_svelte-u9bgzb_END --> <p></p> <h1 class="relative group"><a id="neuron-model-cache" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#neuron-model-cache"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Neuron Model Cache</span></h1> <p data-svelte-h="svelte-1gpva7n">The Neuron Model Cache is a remote cache for compiled Neuron models in the <code>neff</code> format.
It is integrated into the <code>NeuronTrainer</code> and <code>NeuronModelForCausalLM</code> classes to enable loading pretrained models from the cache instead of compiling them locally.</p> <p data-svelte-h="svelte-17gthuj"><strong>Note: it is not available for models exported using any other NeuronModelXX classes, that use a different export mechanism.</strong></p> <p data-svelte-h="svelte-1brqt65">The Neuron Model Cache is hosted on the <a href="https://huggingface.co/aws-neuron/optimum-neuron-cache" rel="nofollow">Hugging Face Hub</a> and includes compiled files for all popular and supported <code>optimum-neuron</code> pre-trained models.</p> <p data-svelte-h="svelte-bxzl2y">Before training a Transformers or Diffusion model or loading a NeuronModelForCausalLM on Neuron platforms, it needs to be exported to neuron format
with <a href="https://github.com/aws-neuron/aws-neuron-samples/tree/master/torch-neuronx" rel="nofollow"><code>torch-neuronx</code></a>.</p> <p data-svelte-h="svelte-1ht0rpw">When exporting a model, <a href="https://github.com/aws-neuron/aws-neuron-samples/tree/master/torch-neuronx" rel="nofollow"><code>torch-neuronx</code></a> will:</p> <ul data-svelte-h="svelte-o0k9t9"><li>convert it to a set of <a href="https://github.com/pytorch/xla/" rel="nofollow">XLA</a> subgraphs,</li> <li>compile each subgraph with the neuronx compiler into a Neuron Executable File Format (NEFF) binary file.</li></ul> <p data-svelte-h="svelte-19utg6k">The first step is relatively fast, but the compilation takes a lot of time.
To avoid recompiling all NEFF files every time a model is loaded on a NeuronX host, <a href="https://github.com/aws-neuron/aws-neuron-samples/tree/master/torch-neuronx" rel="nofollow"><code>torch-neuronx</code></a>
stores NEFF files in a local directory, usually <code>/var/tmp/neuron-compile-cache</code>.</p> <p data-svelte-h="svelte-xqjm40">However, this local cache is not shared between platforms, which means that every time you train or export a model on a new host, you need to recompile it.</p> <p data-svelte-h="svelte-pgddmx">We created the Neuron Model Cache to solve this limitation by providing a public repository of precompiled model graphs.</p> <p data-svelte-h="svelte-1ih36js">Note: we also support the creation of private, secured, remote model cache.</p> <h2 class="relative group"><a id="how-to-use-the-neuron-model-cache" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#how-to-use-the-neuron-model-cache"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>How to use the Neuron model cache</span></h2> <p data-svelte-h="svelte-7db3j2">The public model cache will be used when you use the <code>NeuronTrainer</code> or <code>NeuronModelForCausalLM</code> classes. There are no additional changes needed.</p> <p data-svelte-h="svelte-1yf7qx3">When exporting a model to neuron format, <code>optimum-neuron</code> will simply look for cached NEFF files in the hub repository during the compilation of the
model subgraphs.</p> <p data-svelte-h="svelte-3uovh2">If the NEFF files are cached, they will be fetched from the hub and directly loaded instead of being recompiled.</p> <h2 class="relative group"><a id="how-caching-works" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#how-caching-works"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>How caching works</span></h2> <p data-svelte-h="svelte-ten2g9">The Optimum Neuron Cache is built on top of the <a href="https://awsdocs-neuron.readthedocs-hosted.com/en/latest/general/arch/neuron-features/neuron-caching.html" rel="nofollow">NeuronX compiler cache</a>.</p> <p data-svelte-h="svelte-4n1dx0">It is important to understand that the cache operates on NEFF binaries, and not on the model itself.</p> <p data-svelte-h="svelte-j69uqo">As explained previously, each model exported to Neuron using the <code>NeuronTrainer</code> or <code>NeuronModelForCausalLM</code> is composed of <a href="https://github.com/pytorch/xla/" rel="nofollow">XLA</a> subgraphs.</p> <p data-svelte-h="svelte-a13ahi">Each subgraph is unique, and results from the combination of:</p> <ul data-svelte-h="svelte-n2s1go"><li>the <code>transformers</code> or <code>transformers_neuronx</code> python modeling code,</li> <li>the <code>transformers</code> model config,</li> <li>the <code>input_shapes</code> selected during the export,</li> <li>The precision of the model, full-precision, fp16 or bf16.</li></ul> <p data-svelte-h="svelte-1d5pbx1">When compiling a subgraph to a NEFF file, other parameters influence the result:</p> <ul data-svelte-h="svelte-esajut"><li>The version of the Neuron X compiler,</li> <li>The number of Neuron cores used,</li> <li>The compilation parameters (such as the optimization level).</li></ul> <p data-svelte-h="svelte-19kg2jt">All these parameters are combined together to create a unique hash that identifies a NEFF file.</p> <p data-svelte-h="svelte-10ycggs">This has two very important consequences:</p> <ul data-svelte-h="svelte-13jfx6f"><li>it is only when actually exporting a model that the associated NEFF files can be identified,</li> <li>even a small change in the model configuration will lead to a different set of NEFF files.</li></ul> <p data-svelte-h="svelte-1gfp5wb">It is therefore very difficult to know in advance if the NEFFs associated to a specific model configuration are cached.</p> <h2 class="relative group"><a id="neuron-model-cache-lookup-inferentia-only" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#neuron-model-cache-lookup-inferentia-only"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Neuron model cache lookup (inferentia only)</span></h2> <p data-svelte-h="svelte-1vbfh4e">The neuron cache lookup is a feature allowing users to look for compatible cached model configurations before exporting
a model for inference.</p> <p data-svelte-h="svelte-3pnikr">It is based on a dedicated registry composed of stored cached configurations.</p> <p data-svelte-h="svelte-peax48">Cached model configurations are stored as entries under a specific subfolder in the Neuron Model Cache:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->neuronxcc<span class="hljs-number">-2</span>.<span class="hljs-number">12</span>.<span class="hljs-number">54</span>.<span class="hljs-number">0</span>+f631c2365
├── <span class="hljs-number">0</span>_REGISTRY
│ └── <span class="hljs-number">0</span>.<span class="hljs-number">0</span>.<span class="hljs-number">18</span>
│ └── <span class="hljs-keyword">llama
</span>│ └── meta-<span class="hljs-keyword">llama
</span>│ └── <span class="hljs-keyword">Llama-2-7b-chat-hf
</span>│ └── <span class="hljs-number">54</span>c<span class="hljs-symbol">1f</span>6689cd<span class="hljs-symbol">88f</span><span class="hljs-symbol">246f</span>ce.<span class="hljs-keyword">json</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-ap9tv8">Each entry corresponds to the combination of a model configuration and its export parameters: this is as close as we can get to
uniquely identify the exported model.</p> <p data-svelte-h="svelte-i6ijok">You can use the <code>optimum-cli</code> to lookup for compatible cached entries by passing it a hub model_id or the path to a file
containing a model <code>config.json</code>.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta prompt_">$ </span><span class="language-bash">optimum-cli neuron cache lookup meta-llama/Llama-2-7b-chat-hf</span>
*** 1 entrie(s) found in cache for meta-llama/Llama-2-7b-chat-hf ***
task: text-generation
batch_size: 1
num_cores: 24
auto_cast_type: fp16
sequence_length: 2048
compiler_type: neuronx-cc
compiler_version: 2.12.54.0+f631c2365
checkpoint_id: meta-llama/Llama-2-7b-chat-hf
checkpoint_revision: c1b0db933684edbfe29a06fa47eb19cc48025e93<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-34litn"><strong>Note that even if compatible cached entries exist, this does not always guarantee that the model will not be recompiled during export
if you modified the compilation parameters or updated the neuronx packages.</strong></p> <h2 class="relative group"><a id="advanced-usage-trainium-only" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#advanced-usage-trainium-only"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Advanced usage (trainium only)</span></h2> <h3 class="relative group"><a id="how-to-use-a-private-neuron-model-cache-trainium-only" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#how-to-use-a-private-neuron-model-cache-trainium-only"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>How to use a private Neuron model cache (trainium only)</span></h3> <p data-svelte-h="svelte-1ob0ma0">The repository for the public cache is <code>aws-neuron/optimum-neuron-cache</code>. This repository includes all precompiled files for commonly used models so that it is publicly available and free to use for everyone. But there are two limitations:</p> <ol data-svelte-h="svelte-1u4191l"><li>You will not be able to push your own compiled files on this repo</li> <li>It is public and you might want to use a private repo for private models</li></ol> <p data-svelte-h="svelte-16nb7jv">To alleviate that you can create your own private cache repository using the <code>optimum-cli</code> or set the environment variable <code>CUSTOM_CACHE_REPO</code>.</p> <h4 class="relative group"><a id="using-the-optimum-cli" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#using-the-optimum-cli"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Using the Optimum CLI</span></h4> <p data-svelte-h="svelte-1a6223y">The Optimum CLI offers 2 subcommands for cache creation and setting:</p> <ul data-svelte-h="svelte-y2ved0"><li><code>create</code>: To create a new cache repository that you can use as a private Neuron Model cache.</li> <li><code>set</code>: To set the name of the Neuron cache repository locally, the repository needs to exists
and will be used by default by <code>optimum-neuron</code>.</li></ul> <p data-svelte-h="svelte-3k36mw">Create a new Neuron cache repository:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->optimum-cli neuron cache create --help
usage: optimum-cli neuron cache create [-h] [-n <span class="hljs-keyword">NAME</span>] [--<span class="hljs-keyword">public</span>]
optional arguments:
-h, --help show this help <span class="hljs-keyword">message</span> <span class="hljs-keyword">and</span> <span class="hljs-keyword">exit</span>
-n <span class="hljs-keyword">NAME</span>, --<span class="hljs-keyword">name</span> <span class="hljs-keyword">NAME</span> The <span class="hljs-keyword">name</span> <span class="hljs-keyword">of</span> the repo that will be used <span class="hljs-keyword">as</span> a remote cache <span class="hljs-keyword">for</span> the compilation files.
--<span class="hljs-keyword">public</span> <span class="hljs-keyword">If</span> <span class="hljs-keyword">set</span>, the created repo will be <span class="hljs-keyword">public</span>. By <span class="hljs-keyword">default</span> the cache repo <span class="hljs-keyword">is</span> <span class="hljs-keyword">private</span>.
<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-19qd8g8">The <code>-n</code> / <code>--name</code> option allows you to specify a name for the Neuron cache repo, if not set the default name will be used. The <code>--public</code> flag allows you to make your Neuron cache public as it will be created as a private repository by default.</p> <p data-svelte-h="svelte-11lpom8">Example:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->optimum-cli neuron cache create
Neuron cache created on the Hugging Face Hub: michaelbenayoun/optimum-neuron-cache [<span class="hljs-keyword">private</span>].
Neuron cache name set locally to michaelbenayoun<span class="hljs-regexp">/optimum-neuron-cache in /</span>home<span class="hljs-regexp">/michael/</span>.cache<span class="hljs-regexp">/huggingface/</span>optimum_neuron_custom_cache.<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-zfzeq3">Set a different Trainium cache repository:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->usage: optimum-cli neuron cache set [-h] name
positional arguments:
name The name of the repo to use as remote cache.
optional arguments:
-h, --help show this help message and <span class="hljs-keyword">exit</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-11lpom8">Example:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->optimum-cli neuron cache set michaelbenayoun/optimum-neuron-cache
Neuron cache name set locally to michaelbenayoun<span class="hljs-regexp">/optimum-neuron-cache in /</span>home<span class="hljs-regexp">/michael/</span>.cache<span class="hljs-regexp">/huggingface/</span>optimum_neuron_custom_cache<!-- HTML_TAG_END --></pre></div> <div class="course-tip bg-gradient-to-br dark:bg-gradient-to-r before:border-green-500 dark:before:border-green-800 from-green-50 dark:from-gray-900 to-white dark:to-gray-950 border border-green-50 text-green-700 dark:text-gray-400"><p data-svelte-h="svelte-j3xvxf">The <code>optimum-cli neuron cache set</code> command is useful when working on a new instance to use your own cache.</p></div> <h4 class="relative group"><a id="using-the-environment-variable-customcacherepo" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#using-the-environment-variable-customcacherepo"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Using the environment variable CUSTOM_CACHE_REPO</span></h4> <p data-svelte-h="svelte-1k1yegn">Using the CLI is not always feasible, and not very practical for small testing. In this case, you can simply set the environment variable <code>CUSTOM_CACHE_REPO</code>.</p> <p data-svelte-h="svelte-1789np1">For example, if your cache repo is called <code>michaelbenayoun/my_custom_cache_repo</code>, you just need to do:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->CUSTOM_CACHE_REPO=<span class="hljs-string">&quot;michaelbenayoun/my_custom_cache_repo&quot;</span> torchrun ...<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-ylttvt">or:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-built_in">export</span> CUSTOM_CACHE_REPO=<span class="hljs-string">&quot;michaelbenayoun/my_custom_cache_repo&quot;</span>
torchrun ...<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-hjb8vh">You have to be <a href="https://huggingface.co/docs/huggingface_hub/quick-start#login" rel="nofollow">logged into the Hugging Face Hub</a> to be able to push and pull files from your private cache repository.</p> <h3 class="relative group"><a id="cache-system-flow" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#cache-system-flow"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Cache system flow</span></h3> <p align="center" data-svelte-h="svelte-1v6yz4b"><img alt="Cache system flow" src="https://huggingface.co/datasets/optimum/documentation-images/resolve/main/neuron/cache_system_flow.jpg"> <br> <em style="color: grey">Cache system flow</em></p> <p data-svelte-h="svelte-1vj6c2d">At each the beginning of each training step, the <a href="/docs/optimum.neuron/v0.2.0.dev2/en/package_reference/trainer#optimum.neuron.NeuronTrainer">NeuronTrainer</a> computes a <code>NeuronHash</code> and checks the cache repo(s) (official and custom) on the Hugging Face Hub to see if there are compiled files associated to this hash.
If that is the case, the files are downloaded directly to the local cache directory and no compilation is needed. Otherwise compilation is performed.</p> <p data-svelte-h="svelte-or9lg6">Just as for downloading compiled files, the <a href="/docs/optimum.neuron/v0.2.0.dev2/en/package_reference/trainer#optimum.neuron.NeuronTrainer">NeuronTrainer</a> will keep track of the newly created compilation files at each training step, and upload them to the Hugging Face Hub at save time or when training ends. This assumes that you have writing access to the cache repo, otherwise nothing will be pushed.</p> <h2 class="relative group"><a id="optimum-cli" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#optimum-cli"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Optimum CLI</span></h2> <p data-svelte-h="svelte-18b2k5h">The Optimum CLI can be used to perform various cache-related tasks, as described by the <code>optimum-cli neuron cache</code> command usage message:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->usage: optimum-cli neuron cache [-h] {create,<span class="hljs-keyword">set</span>,<span class="hljs-built_in">add</span>,<span class="hljs-keyword">list</span>} ...
positional arguments:
{create,<span class="hljs-keyword">set</span>,<span class="hljs-built_in">add</span>,<span class="hljs-keyword">list</span>,synchronize,lookup}
create Create <span class="hljs-keyword">a</span> model repo <span class="hljs-keyword">on</span> the Hugging Face Hub <span class="hljs-keyword">to</span> store Neuron <span class="hljs-keyword">X</span> compilation <span class="hljs-keyword">files</span>.
<span class="hljs-keyword">set</span> Set the name of the Neuron cache repo <span class="hljs-keyword">to</span> use locally (trainium <span class="hljs-keyword">only</span>).
<span class="hljs-built_in">add</span> Add <span class="hljs-keyword">a</span> model <span class="hljs-keyword">to</span> the cache of your choice (trainium <span class="hljs-keyword">only</span>).
<span class="hljs-keyword">list</span> List models in <span class="hljs-keyword">a</span> cache repo (trainium <span class="hljs-keyword">only</span>).
synchronize Synchronize local <span class="hljs-keyword">compiler</span> cache with the hub cache (inferentia <span class="hljs-keyword">only</span>).
lookup Lookup the neuronx <span class="hljs-keyword">compiler</span> hub cache <span class="hljs-keyword">for</span> the specified model id (inferentia <span class="hljs-keyword">only</span>).
optional arguments:
-h, --<span class="hljs-keyword">help</span> show this <span class="hljs-keyword">help</span> message <span class="hljs-built_in">and</span> <span class="hljs-keyword">exit</span><!-- HTML_TAG_END --></pre></div> <h3 class="relative group"><a id="add-a-model-to-the-cache-trainium-only" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#add-a-model-to-the-cache-trainium-only"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Add a model to the cache (trainium only)</span></h3> <p data-svelte-h="svelte-yzylfb">It is possible to add a model compilation files to a cache repo via the <code>optimum-cli neuron cache add</code> command:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->usage: optimum-cli neuron cache add <span class="hljs-selector-attr">[-h]</span> -m MODEL <span class="hljs-attr">--task</span> TASK <span class="hljs-attr">--train_batch_size</span> TRAIN_BATCH_SIZE <span class="hljs-selector-attr">[--eval_batch_size EVAL_BATCH_SIZE]</span> <span class="hljs-selector-attr">[--sequence_length SEQUENCE_LENGTH]</span>
<span class="hljs-selector-attr">[--encoder_sequence_length ENCODER_SEQUENCE_LENGTH]</span> <span class="hljs-selector-attr">[--decoder_sequence_length DECODER_SEQUENCE_LENGTH]</span>
<span class="hljs-selector-attr">[--gradient_accumulation_steps GRADIENT_ACCUMULATION_STEPS]</span> <span class="hljs-attr">--precision</span> {fp,bf16} <span class="hljs-attr">--num_cores</span>
{<span class="hljs-number">1</span>,<span class="hljs-number">2</span>,<span class="hljs-number">3</span>,<span class="hljs-number">4</span>,<span class="hljs-number">5</span>,<span class="hljs-number">6</span>,<span class="hljs-number">7</span>,<span class="hljs-number">8</span>,<span class="hljs-number">9</span>,<span class="hljs-number">10</span>,<span class="hljs-number">11</span>,<span class="hljs-number">12</span>,<span class="hljs-number">13</span>,<span class="hljs-number">14</span>,<span class="hljs-number">15</span>,<span class="hljs-number">16</span>,<span class="hljs-number">17</span>,<span class="hljs-number">18</span>,<span class="hljs-number">19</span>,<span class="hljs-number">20</span>,<span class="hljs-number">21</span>,<span class="hljs-number">22</span>,<span class="hljs-number">23</span>,<span class="hljs-number">24</span>,<span class="hljs-number">25</span>,<span class="hljs-number">26</span>,<span class="hljs-number">27</span>,<span class="hljs-number">28</span>,<span class="hljs-number">29</span>,<span class="hljs-number">30</span>,<span class="hljs-number">31</span>,<span class="hljs-number">32</span>} <span class="hljs-selector-attr">[--max_steps MAX_STEPS]</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-jrg4yl">When running this command a small training session will be run and the resulting compilation files will be pushed.</p> <div class="course-tip course-tip-orange bg-gradient-to-br dark:bg-gradient-to-r before:border-orange-500 dark:before:border-orange-800 from-orange-50 dark:from-gray-900 to-white dark:to-gray-950 border border-orange-50 text-orange-700 dark:text-gray-400">Make sure that the Neuron cache repo to use is set up locally, this can be done by running the `optimum-cli neuron cache set` command.
You also need to make sure that you are logged in to the Hugging Face Hub and that you have the writing rights for the specified cache repo,
this can be done via the `huggingface-cli login` command.
<p data-svelte-h="svelte-1aq9jeo">If at least one of those requirements is not met, the command will fail.</p></div> <p data-svelte-h="svelte-11lpom8">Example:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->optimum-cli neuron cache <span class="hljs-built_in">add</span> \
<span class="hljs-comment">--model prajjwal1/bert-tiny \</span>
<span class="hljs-comment">--task text-classification \</span>
<span class="hljs-comment">--train_batch_size 16 \</span>
<span class="hljs-comment">--eval_batch_size 16 \</span>
<span class="hljs-comment">--sequence_length 128 \</span>
<span class="hljs-comment">--gradient_accumulation_steps 32 \</span>
<span class="hljs-comment">--num_cores 32 \</span>
<span class="hljs-comment">--precision bf16</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1nerjnd">This will push compilation files for the <code>prajjwal1/bert-tiny</code> model on the Neuron cache repo that was set up for the specified parameters.</p> <h3 class="relative group"><a id="list-a-cache-repo" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#list-a-cache-repo"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>List a cache repo</span></h3> <p data-svelte-h="svelte-17hzr66">It can also be convenient to request the cache repo to know which compilation files are available. This can be done via the <code>optimum-cli neuron cache list</code> command:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->usage: optimum-cli neuron cache <span class="hljs-keyword">list</span> [-h] [-<span class="hljs-keyword">m</span> MODEL] [-v VERSION] [name]
positional arguments:
name The name of the repo <span class="hljs-keyword">to</span> <span class="hljs-keyword">list</span>. Will use the locally saved cache repo <span class="hljs-keyword">if</span> <span class="hljs-keyword">left</span> unspecified.
optional arguments:
-h, --<span class="hljs-keyword">help</span> show this <span class="hljs-keyword">help</span> message <span class="hljs-built_in">and</span> <span class="hljs-keyword">exit</span>
-<span class="hljs-keyword">m</span> MODEL, --model MODEL
The model name <span class="hljs-built_in">or</span> path of the model <span class="hljs-keyword">to</span> consider. If <span class="hljs-keyword">left</span> unspecified, will <span class="hljs-keyword">list</span> <span class="hljs-keyword">all</span> available models.
-v VERSION, --<span class="hljs-keyword">version</span> VERSION
The <span class="hljs-keyword">version</span> of the Neuron <span class="hljs-keyword">X</span> Compiler <span class="hljs-keyword">to</span> consider. Will <span class="hljs-keyword">list</span> <span class="hljs-keyword">all</span> available versions <span class="hljs-keyword">if</span> <span class="hljs-keyword">left</span> unspecified.<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-eogkf7">As you can see, it is possible to:</p> <ul data-svelte-h="svelte-h35kw2"><li>List all the models available for all compiler versions.</li> <li>List all the models available for a given compiler version by specifying the <code>-v / --version</code> argument.</li> <li>List all compilation files for a given model, there can be many for different input shapes and so on, by specifying the <code>-m / --model</code> argument.</li></ul> <p data-svelte-h="svelte-11lpom8">Example:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->optimum-cli neuron <span class="hljs-keyword">cache</span> list aws-neuron/optimum-neuron-<span class="hljs-keyword">cache</span><!-- HTML_TAG_END --></pre></div> <p></p>
<script>
{
__sveltekit_1eimgpm = {
assets: "/docs/optimum.neuron/v0.2.0.dev2/en",
base: "/docs/optimum.neuron/v0.2.0.dev2/en",
env: {}
};
const element = document.currentScript.parentElement;
const data = [null,null];
Promise.all([
import("/docs/optimum.neuron/v0.2.0.dev2/en/_app/immutable/entry/start.c3692dcd.js"),
import("/docs/optimum.neuron/v0.2.0.dev2/en/_app/immutable/entry/app.40ef12d9.js")
]).then(([kit, app]) => {
kit.start(app, element, {
node_ids: [0, 6],
data,
form: null,
error: null
});
});
}
</script>

Xet Storage Details

Size:
57.2 kB
·
Xet hash:
7c4c17d829e1803e779b026c97253b794741c7d7a0082a28c70eee0d42ada4df

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.