Buckets:
hf-doc-build/doc-dev / optimum-neuron /pr_1097 /en /inference_tutorials /CodeLlama-7B-Compilation.html
| <meta charset="utf-8" /><meta name="hf:doc:metadata" content="{"title":"Run codellama/CodeLlama-7b-hf from Hugging Face on Inf2 & Trn1","local":"run-codellamacodellama-7b-hf-from-hugging-face-on-inf2--trn1","sections":[{"title":"Deploy your instance","local":"deploy-your-instance","sections":[],"depth":2},{"title":"Set up the Jupyter Notebook","local":"set-up-the-jupyter-notebook","sections":[],"depth":2},{"title":"Inference from a pre-compiled model","local":"inference-from-a-pre-compiled-model","sections":[],"depth":2},{"title":"Troubleshooting","local":"troubleshooting","sections":[],"depth":2},{"title":"Compiling a model","local":"compiling-a-model","sections":[],"depth":2},{"title":"Save to disk","local":"save-to-disk","sections":[],"depth":2}],"depth":1}"> | |
| <link href="/docs/optimum.neuron/pr_1097/en/_app/immutable/assets/0.e3b0c442.css" rel="modulepreload"> | |
| <link rel="modulepreload" href="/docs/optimum.neuron/pr_1097/en/_app/immutable/entry/start.82e2ff17.js"> | |
| <link rel="modulepreload" href="/docs/optimum.neuron/pr_1097/en/_app/immutable/chunks/scheduler.56725da7.js"> | |
| <link rel="modulepreload" href="/docs/optimum.neuron/pr_1097/en/_app/immutable/chunks/singletons.2080b4fc.js"> | |
| <link rel="modulepreload" href="/docs/optimum.neuron/pr_1097/en/_app/immutable/chunks/paths.90dabf70.js"> | |
| <link rel="modulepreload" href="/docs/optimum.neuron/pr_1097/en/_app/immutable/entry/app.9b0c0103.js"> | |
| <link rel="modulepreload" href="/docs/optimum.neuron/pr_1097/en/_app/immutable/chunks/preload-helper.9dba61fb.js"> | |
| <link rel="modulepreload" href="/docs/optimum.neuron/pr_1097/en/_app/immutable/chunks/index.18a26576.js"> | |
| <link rel="modulepreload" href="/docs/optimum.neuron/pr_1097/en/_app/immutable/nodes/0.912aab06.js"> | |
| <link rel="modulepreload" href="/docs/optimum.neuron/pr_1097/en/_app/immutable/chunks/each.e59479a4.js"> | |
| <link rel="modulepreload" href="/docs/optimum.neuron/pr_1097/en/_app/immutable/nodes/17.28892865.js"> | |
| <link rel="modulepreload" href="/docs/optimum.neuron/pr_1097/en/_app/immutable/chunks/CopyLLMTxtMenu.fb3856d8.js"> | |
| <link rel="modulepreload" href="/docs/optimum.neuron/pr_1097/en/_app/immutable/chunks/globals.7f7f1b26.js"> | |
| <link rel="modulepreload" href="/docs/optimum.neuron/pr_1097/en/_app/immutable/chunks/MermaidChart.svelte_svelte_type_style_lang.a16844e0.js"> | |
| <link rel="modulepreload" href="/docs/optimum.neuron/pr_1097/en/_app/immutable/chunks/CodeBlock.2d00672f.js"><!-- HEAD_svelte-u9bgzb_START --><meta name="hf:doc:metadata" content="{"title":"Run codellama/CodeLlama-7b-hf from Hugging Face on Inf2 & Trn1","local":"run-codellamacodellama-7b-hf-from-hugging-face-on-inf2--trn1","sections":[{"title":"Deploy your instance","local":"deploy-your-instance","sections":[],"depth":2},{"title":"Set up the Jupyter Notebook","local":"set-up-the-jupyter-notebook","sections":[],"depth":2},{"title":"Inference from a pre-compiled model","local":"inference-from-a-pre-compiled-model","sections":[],"depth":2},{"title":"Troubleshooting","local":"troubleshooting","sections":[],"depth":2},{"title":"Compiling a model","local":"compiling-a-model","sections":[],"depth":2},{"title":"Save to disk","local":"save-to-disk","sections":[],"depth":2}],"depth":1}"><!-- HEAD_svelte-u9bgzb_END --> <p></p> <div class="items-center shrink-0 min-w-[100px] max-sm:min-w-[50px] justify-end ml-auto flex" style="float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"><div class="inline-flex rounded-md max-sm:rounded-sm"><button class="inline-flex items-center gap-1 h-7 max-sm:h-7 px-2 max-sm:px-1.5 text-sm font-medium text-gray-800 border border-r-0 rounded-l-md max-sm:rounded-l-sm border-gray-200 bg-white hover:shadow-inner dark:border-gray-850 dark:bg-gray-950 dark:text-gray-200 dark:hover:bg-gray-800" aria-live="polite"><span class="inline-flex items-center justify-center rounded-md p-0.5 max-sm:p-0 hover:text-gray-800 dark:hover:text-gray-200"><svg class="sm:size-3.5 size-3" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg></span> <span>Copy page</span></button> <button class="inline-flex items-center justify-center w-6 max-sm:w-5 h-7 max-sm:h-7 disabled:pointer-events-none text-sm text-gray-500 hover:text-gray-700 dark:hover:text-white rounded-r-md max-sm:rounded-r-sm border border-l transition border-gray-200 bg-white hover:shadow-inner dark:border-gray-850 dark:bg-gray-950 dark:text-gray-200 dark:hover:bg-gray-800" aria-haspopup="menu" aria-expanded="false" aria-label="Open copy menu"><svg class="transition-transform text-gray-400 overflow-visible sm:size-3.5 size-3 rotate-0" width="1em" height="1em" viewBox="0 0 12 7" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M1 1L6 6L11 1" stroke="currentColor"></path></svg></button></div> </div> <h1 class="relative group"><a id="run-codellamacodellama-7b-hf-from-hugging-face-on-inf2--trn1" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#run-codellamacodellama-7b-hf-from-hugging-face-on-inf2--trn1"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Run codellama/CodeLlama-7b-hf from Hugging Face on Inf2 & Trn1</span></h1> <p data-svelte-h="svelte-yg6d4t">In this example we compile and deploy the <a href="https://huggingface.co/codellama/CodeLlama-7b-hf" rel="nofollow">codellama/CodeLlama-7b-hf</a> model from Hugging Face on Neuron core devices using the <code>transformers-neuronx</code> package.</p> <p data-svelte-h="svelte-1lgguto">The example has the following main sections:</p> <ol data-svelte-h="svelte-1v147rl"><li>Deploy your instance</li> <li>Set up the Jupyter Notebook</li> <li>Inference from a pre-compiled model</li> <li>Compiling a model</li> <li>Save to disk</li></ol> <p data-svelte-h="svelte-11ibnqg">This Jupyter Notebook can be run on a Inf2 instance (<code>inf2.xlarge</code>) or larger or also a Trn1 instance (<code>trn1.32xlarge</code>).</p> <h2 class="relative group"><a id="deploy-your-instance" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#deploy-your-instance"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Deploy your instance</span></h2> <ol data-svelte-h="svelte-ymbvah"><li><p>Raise your limits | |
| The default limit for Inferentia and Trainium instances are 0, so request an increase before you start! EC2 instances are limited by vCPUs, but SageMaker limits are based on number of instances and instance size. Start with a vCPU limit request of 32 so you can deploy an inf2.xlarge (4 vCPUs) or an inf2.8xlarge (32 vCPUs). Go to <a href="https://us-west-2.console.aws.amazon.com/servicequotas/home/services/ec2/quotas" rel="nofollow">Service Quotas</a> and search for <code>inf</code>.</p> <p>Request increases for both On-Demand and Spot (yes, there are usually Spot instances available!). IAD (us-east-1) and PDX (us-west-2) are the best regions to start in for the US. Limits are region specific!</p></li> <li><p>Go to EC2 to launch an instance. Make sure you choose the region to match your limit increases.</p></li> <li><p>Launch an inf2.xlarge instance with the <a href="https://aws.amazon.com/marketplace/pp/prodview-gr3e6yiscria2" rel="nofollow">Hugging Face DLAMI(Deep learning AMI)</a>. This image comes with all the software preinstalled. Alternatively, you can follow the instructions at the <a href="https://awsdocs-neuron.readthedocs-hosted.com/en/latest/general/setup/torch-neuronx.html#setup-torch-neuronx" rel="nofollow">Neuron Setup Guide</a></p></li> <li><p>100GB disk size is fine for this example, but larger models such as 70B make take over 500GB</p></li></ol> <h2 class="relative group"><a id="set-up-the-jupyter-notebook" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#set-up-the-jupyter-notebook"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Set up the Jupyter Notebook</span></h2> <p data-svelte-h="svelte-1aux5lf">(You could also just copy and paste most of this code into an SSH prompt)</p> <p data-svelte-h="svelte-lxbycy">The following steps set up Jupyter Notebook and launch this tutorial:</p> <ol data-svelte-h="svelte-u6ygv7"><li>Clone the <a href="https://github.com/huggingface/optimum-neuron" rel="nofollow">Hugging Face Optimum Neuron</a> repo to your instance using</li></ol> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->git clone https:<span class="hljs-regexp">//gi</span>thub.com<span class="hljs-regexp">/huggingface/</span>optimum-neuron.git<!-- HTML_TAG_END --></pre></div> <ol start="2" data-svelte-h="svelte-jnp9rc"><li>Navigate to the <code>text-generation</code> notebooks folder</li></ol> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->cd optimum-neuron<span class="hljs-regexp">/notebooks/</span>text-generation<!-- HTML_TAG_END --></pre></div> <ol start="3" data-svelte-h="svelte-bjvu09"><li>Follow the instructions in <a href="https://awsdocs-neuron.readthedocs-hosted.com/en/latest/general/setup/notebook/setup-jupyter-notebook-steps-troubleshooting.html" rel="nofollow">Jupyter Notebook QuickStart</a> to run Jupyter Notebook on your instance.</li> <li>Locate this tutorial in your Jupyter Notebook session (<code>CodeLlama-7B-Compilation.ipynb</code>) and launch it. Follow the rest of the instructions in this tutorial.</li></ol> <h2 class="relative group"><a id="inference-from-a-pre-compiled-model" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#inference-from-a-pre-compiled-model"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Inference from a pre-compiled model</span></h2> <p data-svelte-h="svelte-mgsrv8">The model needs to be exported(compiled) to a Neuron specific format. Some configuration changes (such as batch size and number of Neuron cores used) will require a re-compile. We have pre-compiled a model for you to use as a test using the Hugging Face Optimum Neuron library.</p> <p data-svelte-h="svelte-v19a99">In this case, the model is pre-compiled for 2 Neuron cores, so it will run on an inf2.xlarge. You can read more about the model at <a href="https://huggingface.co/aws-neuron/CodeLlama-7b-hf-neuron-8xlarge" rel="nofollow">aws-neuron/CodeLlama-7b-hf-neuron-8xlarge</a></p> <p data-svelte-h="svelte-1ve0bic">Instructions on how to compile a model are in the next section.</p> <p data-svelte-h="svelte-v6j9nq"><strong>Keep in mind that this is a model for code generation, so the example prompt is code, and the response will probably be code</strong></p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> optimum.neuron <span class="hljs-keyword">import</span> pipeline | |
| p = pipeline(<span class="hljs-string">"text-generation"</span>, <span class="hljs-string">"aws-neuron/CodeLlama-7b-hf-neuron-8xlarge"</span>) | |
| p( | |
| <span class="hljs-string">"import socket\n\ndef ping_exponential_backoff(host: str):"</span>, | |
| do_sample=<span class="hljs-literal">True</span>, | |
| top_k=<span class="hljs-number">10</span>, | |
| temperature=<span class="hljs-number">0.1</span>, | |
| top_p=<span class="hljs-number">0.95</span>, | |
| num_return_sequences=<span class="hljs-number">1</span>, | |
| max_length=<span class="hljs-number">200</span>, | |
| )<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-17zwaw5">If you are going to compile in the same session, you need to release the Neuron cores by deleting the pipeline object. Otherwise, you may get load errors.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">del</span> p<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-c9hbnf">Example output:</p> <p data-svelte-h="svelte-lxrv77">[{‘generated_text’: ‘import socket\n\ndef ping_exponential_backoff(host: str):\n """\n Ping a host with exponential backoff.\n\n :param host: Host to ping\n :return: True if host is reachable, False otherwise\n """\n for i in range(1, 10):\n try:\n socket.create_connection((host, 80), 1).close()\n return True\n except OSError:\n time.sleep(2 ** i)\n return False\n\n\ndef ping_exponential_backoff_with_timeout(host: str, timeout: int):\n """\n Ping a host with exponential backoff and timeout.\n\n :param host: Host to ping\n :param timeout: Timeout in seconds\n :return: True if host is reachable, False otherwise\n """\n for’}]</p> <h2 class="relative group"><a id="troubleshooting" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#troubleshooting"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Troubleshooting</span></h2> <p data-svelte-h="svelte-1l9cyje">If you see the error</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->FileNotFoundError: Could <span class="hljs-keyword">not</span> find <span class="hljs-keyword">a</span> matching NEFF <span class="hljs-keyword">for</span> your HLO <span class="hljs-keyword">in</span> this <span class="hljs-built_in">directory</span>. Ensure that <span class="hljs-keyword">the</span> model you are trying <span class="hljs-built_in">to</span> <span class="hljs-built_in">load</span> is <span class="hljs-keyword">the</span> same type <span class="hljs-keyword">and</span> has <span class="hljs-keyword">the</span> same parameters <span class="hljs-keyword">as</span> <span class="hljs-keyword">the</span> <span class="hljs-literal">one</span> you saved <span class="hljs-keyword">or</span> call <span class="hljs-string">"save"</span> <span class="hljs-keyword">on</span> <span class="hljs-title">this</span> <span class="hljs-title">model</span> <span class="hljs-title">to</span> <span class="hljs-title">reserialize</span> <span class="hljs-title">it</span>.<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-p2bqco">then you may be using a different version of the Neuron SDK than we used in this example.</p> <p data-svelte-h="svelte-161gbp6">You can fix that by compiling your own following the instructions below!</p> <h2 class="relative group"><a id="compiling-a-model" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#compiling-a-model"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Compiling a model</span></h2> <p data-svelte-h="svelte-147ghya">Before you run the code below consider what machine you want to run the final result on. You will need to change the num_cores to match the number of cores on your target machine (at a maximum — you could run a model compiled for 2 cores on an inf2.24xlarge, but you will only use 2 of the 12 available cores).</p> <p data-svelte-h="svelte-1p7bbeb">Using this method, the machine you are compiling on needs to also have at least that number of cores.</p> <p data-svelte-h="svelte-1deiptj">-inf2.xlarge, inf2.8xlarge: num_cores=2</p> <p data-svelte-h="svelte-1uq0i30">-inf2.24xlarge: num_cores=12</p> <p data-svelte-h="svelte-1ssp7rd">-inf2.48xlarge: num_cores=24</p> <p data-svelte-h="svelte-3lye3s"><strong>The speed of this process depends on whether this is the first time you are running it (and the model needs to be downloaded to the local system) and whether the process is able to automatically download pre-compiled files from the online cache <a href="https://huggingface.co/aws-neuron/optimum-neuron-cache" rel="nofollow">aws-neuron/optimum-neuron-cache</a></strong></p> <p data-svelte-h="svelte-gc733z">The optimum.neuron library will download the original model directly from Hugging Face. In this example, that is <a href="https://huggingface.co/codellama/CodeLlama-7b-hf" rel="nofollow">codellama/CodeLlama-7b-hf</a>. This is different from the code above where we pointed to a model in the aws-neuron group.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> optimum.neuron <span class="hljs-keyword">import</span> NeuronModelForCausalLM | |
| <span class="hljs-comment"># num_cores should be changed based on the instance. inf2.24xlarge has 6 neuron processors (they have two cores each) so 12 total</span> | |
| compiler_args = {<span class="hljs-string">"num_cores"</span>: <span class="hljs-number">2</span>, <span class="hljs-string">"auto_cast_type"</span>: <span class="hljs-string">"fp16"</span>} | |
| input_shapes = {<span class="hljs-string">"batch_size"</span>: <span class="hljs-number">1</span>, <span class="hljs-string">"sequence_length"</span>: <span class="hljs-number">2048</span>} | |
| model = NeuronModelForCausalLM.from_pretrained( | |
| <span class="hljs-string">"codellama/CodeLlama-7b-hf"</span>, export=<span class="hljs-literal">True</span>, **compiler_args, **input_shapes | |
| )<!-- HTML_TAG_END --></pre></div> <h2 class="relative group"><a id="save-to-disk" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#save-to-disk"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Save to disk</span></h2> <p data-svelte-h="svelte-1a0pp9v">If you want to save your compiled model out to a local directory:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->model.save_pretrained(<span class="hljs-string">"CodeLlama-7b-hf-neuron-8xlarge"</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-er2dar">Once you have the model saved locally,you can just give the local directory name in the pipeline code above instead of the path on Hugging Face. You should delete the model object before loading the pipeline code above.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">del</span> model<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-jowga9">If you want to save your model back up to Hugging Face:</p> <ol data-svelte-h="svelte-ar4b6m"><li>Create a WRITE token for you to use for command line access <a href="https://huggingface.co/settings/tokens" rel="nofollow">https://huggingface.co/settings/tokens</a></li> <li>While you are there, create a “New Model” for your system to reside in. Our example uses jburtoft/CodeLlama-7b-hf-neuron-24xlarge</li></ol> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> huggingface_hub.hf_api <span class="hljs-keyword">import</span> HfFolder | |
| HfFolder.save_token(<span class="hljs-string">"MY_HUGGINGFACE_TOKEN_HERE"</span>)<!-- HTML_TAG_END --></pre></div> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> huggingface_hub <span class="hljs-keyword">import</span> HfApi, login | |
| api = HfApi() | |
| login() | |
| api.upload_folder( | |
| folder_path=<span class="hljs-string">"CodeLlama-7b-hf-neuron-8xlarge"</span>, | |
| repo_id=<span class="hljs-string">"jburtoft/CodeLlama-7b-hf-neuron-8xlarge"</span>, | |
| repo_type=<span class="hljs-string">"model"</span>, | |
| multi_commits=<span class="hljs-literal">True</span>, | |
| multi_commits_verbose=<span class="hljs-literal">True</span>, | |
| )<!-- HTML_TAG_END --></pre></div> <p></p> | |
| <script> | |
| { | |
| __sveltekit_19pvy0q = { | |
| assets: "/docs/optimum.neuron/pr_1097/en", | |
| base: "/docs/optimum.neuron/pr_1097/en", | |
| env: {} | |
| }; | |
| const element = document.currentScript.parentElement; | |
| const data = [null,null]; | |
| Promise.all([ | |
| import("/docs/optimum.neuron/pr_1097/en/_app/immutable/entry/start.82e2ff17.js"), | |
| import("/docs/optimum.neuron/pr_1097/en/_app/immutable/entry/app.9b0c0103.js") | |
| ]).then(([kit, app]) => { | |
| kit.start(app, element, { | |
| node_ids: [0, 17], | |
| data, | |
| form: null, | |
| error: null | |
| }); | |
| }); | |
| } | |
| </script> | |
Xet Storage Details
- Size:
- 38.2 kB
- Xet hash:
- c9f752457d62c9b5010ba5a7ef534aa3ce6e94633eddeaffdeec4856c9ca46da
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.